diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index b63aadb5..b1e24de0 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -348,7 +348,16 @@ class QdrantVectorDBStorage(BaseVectorStorage): # New naming scheme with model isolation # Example: "lightrag_vdb_chunks_text_embedding_ada_002_1536d" - self.final_namespace = f"lightrag_vdb_{self.namespace}_{model_suffix}" + # Ensure model_suffix is not empty before appending + if model_suffix: + self.final_namespace = f"lightrag_vdb_{self.namespace}_{model_suffix}" + else: + # Fallback: use legacy namespace if model_suffix is unavailable + self.final_namespace = self.legacy_namespace + logger.warning( + f"Model suffix unavailable, using legacy collection name '{self.legacy_namespace}'. " + f"Ensure embedding_func has model_name for proper model isolation." + ) logger.info( f"Qdrant collection naming: " diff --git a/tests/test_empty_model_suffix.py b/tests/test_empty_model_suffix.py new file mode 100644 index 00000000..2ab9f807 --- /dev/null +++ b/tests/test_empty_model_suffix.py @@ -0,0 +1,294 @@ +""" +Tests for handling empty model suffix in PostgreSQL and Qdrant storage. + +This test module verifies that both storage backends gracefully handle +the case when _generate_collection_suffix() returns an empty string. +""" + +import pytest +from unittest.mock import Mock, patch, MagicMock +from lightrag.base import BaseVectorStorage +from lightrag.utils import EmbeddingFunc + + +def dummy_embedding_func(*args, **kwargs): + """Dummy embedding function for testing.""" + pass + + +class TestEmptyModelSuffix: + """Test suite for handling empty model suffix scenarios.""" + + def test_postgres_table_name_with_empty_suffix(self): + """ + Test PostgreSQL table name generation when model_suffix is empty. + + Bug Fix Verification: + - Before: table_name = "LIGHTRAG_VDB_CHUNKS_" (trailing underscore) + - After: table_name = "LIGHTRAG_VDB_CHUNKS" (fallback to base name) + """ + from lightrag.kg.postgres_impl import PostgresVectorDBStorage + from lightrag.kg.shared_storage import namespace_to_table_name + + # Create a mock embedding function without get_model_identifier + mock_embedding_func = Mock(spec=["embedding_dim"]) + mock_embedding_func.embedding_dim = 1536 + + # Setup global_config without embedding_func + global_config = { + "embedding_batch_num": 100, + "pgvector_precision": "hybrid", + "pg_host": "localhost", + "pg_port": 5432, + "pg_user": "user", + "pg_password": "password", + "pg_database": "lightrag", + } + + # Create PostgreSQL storage instance + storage = PostgresVectorDBStorage( + namespace="chunks", + workspace="test", + global_config=global_config, + embedding_func=mock_embedding_func, + ) + + # Verify that: + # 1. model_suffix is empty + # 2. table_name doesn't have trailing underscore + # 3. table_name equals the base table name + assert storage.model_suffix == "", "model_suffix should be empty" + assert ( + not storage.table_name.endswith("_"), + f"table_name should not have trailing underscore: {storage.table_name}", + ) + + # Expected base table name + expected_base = namespace_to_table_name("chunks") + assert storage.table_name == expected_base, ( + f"table_name should fallback to base name when model_suffix is empty. " + f"Expected: {expected_base}, Got: {storage.table_name}" + ) + + def test_qdrant_collection_name_with_empty_suffix(self): + """ + Test Qdrant collection name generation when model_suffix is empty. + + Bug Fix Verification: + - Before: final_namespace = "lightrag_vdb_chunks_" (trailing underscore) + - After: final_namespace = "lightrag_vdb_chunks" (fallback to legacy name) + """ + from lightrag.kg.qdrant_impl import QdrantVectorDBStorage + + # Create a mock embedding function without get_model_identifier + mock_embedding_func = Mock(spec=["embedding_dim"]) + mock_embedding_func.embedding_dim = 1536 + + # Setup global_config without embedding_func + global_config = { + "embedding_batch_num": 100, + "qdrant_url": "http://localhost:6333", + } + + # Create Qdrant storage instance + storage = QdrantVectorDBStorage( + namespace="chunks", + workspace="test", + global_config=global_config, + embedding_func=mock_embedding_func, + ) + + # Verify that: + # 1. model_suffix is empty + # 2. final_namespace doesn't have trailing underscore + # 3. final_namespace equals the legacy namespace + assert storage._generate_collection_suffix() == "", ( + "model_suffix should be empty" + ) + assert ( + not storage.final_namespace.endswith("_"), + f"final_namespace should not have trailing underscore: {storage.final_namespace}", + ) + assert storage.final_namespace == storage.legacy_namespace, ( + f"final_namespace should fallback to legacy_namespace when model_suffix is empty. " + f"Expected: {storage.legacy_namespace}, Got: {storage.final_namespace}" + ) + + def test_postgres_table_name_with_valid_suffix(self): + """ + Test PostgreSQL table name generation with valid model suffix. + + Verification: + - When embedding_func has get_model_identifier, use it + - table_name has proper format: base_table_model_suffix + """ + from lightrag.kg.postgres_impl import PostgresVectorDBStorage + from lightrag.kg.shared_storage import namespace_to_table_name + + # Create a proper embedding function with model_name + embedding_func = EmbeddingFunc( + embedding_dim=1536, func=dummy_embedding_func, model_name="text-embedding-ada-002" + ) + + # Setup global_config + global_config = { + "embedding_batch_num": 100, + "pgvector_precision": "hybrid", + "pg_host": "localhost", + "pg_port": 5432, + "pg_user": "user", + "pg_password": "password", + "pg_database": "lightrag", + "embedding_func": embedding_func, + } + + # Create PostgreSQL storage instance + storage = PostgresVectorDBStorage( + namespace="chunks", + workspace="test", + global_config=global_config, + embedding_func=embedding_func, + ) + + # Verify that: + # 1. model_suffix is not empty + # 2. table_name has correct format + assert storage.model_suffix != "", "model_suffix should not be empty" + assert "_" in storage.table_name, "table_name should contain underscore as separator" + + # Expected format: base_table_model_suffix + expected_base = namespace_to_table_name("chunks") + expected_model_id = embedding_func.get_model_identifier() + expected_table_name = f"{expected_base}_{expected_model_id}" + + assert storage.table_name == expected_table_name, ( + f"table_name format incorrect. Expected: {expected_table_name}, Got: {storage.table_name}" + ) + + def test_qdrant_collection_name_with_valid_suffix(self): + """ + Test Qdrant collection name generation with valid model suffix. + + Verification: + - When embedding_func has get_model_identifier, use it + - final_namespace has proper format: lightrag_vdb_namespace_model_suffix + """ + from lightrag.kg.qdrant_impl import QdrantVectorDBStorage + + # Create a proper embedding function with model_name + embedding_func = EmbeddingFunc( + embedding_dim=1536, func=dummy_embedding_func, model_name="text-embedding-ada-002" + ) + + # Setup global_config + global_config = { + "embedding_batch_num": 100, + "qdrant_url": "http://localhost:6333", + "embedding_func": embedding_func, + } + + # Create Qdrant storage instance + storage = QdrantVectorDBStorage( + namespace="chunks", + workspace="test", + global_config=global_config, + embedding_func=embedding_func, + ) + + # Verify that: + # 1. model_suffix is not empty + # 2. final_namespace has correct format + model_suffix = storage._generate_collection_suffix() + assert model_suffix != "", "model_suffix should not be empty" + assert "_" in storage.final_namespace, ( + "final_namespace should contain underscore as separator" + ) + + # Expected format: lightrag_vdb_namespace_model_suffix + expected_model_id = embedding_func.get_model_identifier() + expected_collection_name = f"lightrag_vdb_chunks_{expected_model_id}" + + assert storage.final_namespace == expected_collection_name, ( + f"final_namespace format incorrect. Expected: {expected_collection_name}, Got: {storage.final_namespace}" + ) + + def test_suffix_generation_fallback_chain(self): + """ + Test the fallback chain in _generate_collection_suffix. + + Verification: + 1. Direct method: embedding_func.get_model_identifier() + 2. Global config fallback: global_config["embedding_func"].get_model_identifier() + 3. Final fallback: return empty string + """ + from lightrag.base import BaseVectorStorage + + # Create a concrete implementation for testing + class TestStorage(BaseVectorStorage): + async def query(self, *args, **kwargs): + pass + + async def upsert(self, *args, **kwargs): + pass + + async def delete_entity(self, *args, **kwargs): + pass + + async def delete_entity_relation(self, *args, **kwargs): + pass + + async def get_by_id(self, *args, **kwargs): + pass + + async def get_by_ids(self, *args, **kwargs): + pass + + async def delete(self, *args, **kwargs): + pass + + async def get_vectors_by_ids(self, *args, **kwargs): + pass + + async def index_done_callback(self): + pass + + async def drop(self): + pass + + # Case 1: Direct method available + embedding_func = EmbeddingFunc( + embedding_dim=1536, func=dummy_embedding_func, model_name="test-model" + ) + storage = TestStorage( + namespace="test", + workspace="test", + global_config={}, + embedding_func=embedding_func, + ) + assert ( + storage._generate_collection_suffix() == "test_model_1536d" + ), "Should use direct method when available" + + # Case 2: Global config fallback + mock_embedding_func = Mock(spec=[]) # No get_model_identifier + storage = TestStorage( + namespace="test", + workspace="test", + global_config={"embedding_func": embedding_func}, + embedding_func=mock_embedding_func, + ) + assert ( + storage._generate_collection_suffix() == "test_model_1536d" + ), "Should fallback to global_config embedding_func" + + # Case 3: Final fallback (no embedding_func anywhere) + storage = TestStorage( + namespace="test", + workspace="test", + global_config={}, + embedding_func=mock_embedding_func, + ) + assert storage._generate_collection_suffix() == "", ( + "Should return empty string when no model_identifier available" + ) +