From 5d9547344ababb45df903f13f89386895b8f02c1 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Thu, 20 Nov 2025 01:08:15 +0800 Subject: [PATCH] fix: correct Qdrant legacy_namespace for data migration Why this change is needed: The legacy_namespace logic was incorrectly including workspace in the collection name, causing migration to fail in E2E tests. When workspace was set (e.g., to a temp directory path), legacy_namespace became "/tmp/xxx_chunks" instead of "lightrag_vdb_chunks", so the migration logic couldn't find the legacy collection. How it solves it: Changed legacy_namespace to always use the old naming scheme without workspace prefix: "lightrag_vdb_{namespace}". This matches the actual collection names from pre-migration code and aligns with PostgreSQL's approach where legacy_table_name = base_table (without workspace). Impact: - Qdrant legacy data migration now works correctly in E2E tests - All unit tests pass (6/6 for both Qdrant and PostgreSQL) - E2E test_legacy_migration_qdrant should now pass Testing: - Unit tests: pytest tests/test_qdrant_migration.py -v (6/6 passed) - Unit tests: pytest tests/test_postgres_migration.py -v (6/6 passed) - Updated test_qdrant_collection_naming to verify new legacy_namespace --- lightrag/kg/qdrant_impl.py | 14 ++++++-------- tests/test_qdrant_migration.py | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index 99b9f6f5..e4d08b71 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -292,15 +292,13 @@ class QdrantVectorDBStorage(BaseVectorStorage): # Generate model suffix model_suffix = self._generate_collection_suffix() - # Get legacy namespace for data migration from old version - # Note: Legacy namespace logic is preserved for backward compatibility - if effective_workspace: - self.legacy_namespace = f"{effective_workspace}_{self.namespace}" - else: - self.legacy_namespace = self.namespace + # Legacy collection name (without model suffix, for migration) + # This matches the old naming scheme before model isolation was implemented + # Example: "lightrag_vdb_chunks" (without model suffix) + self.legacy_namespace = f"lightrag_vdb_{self.namespace}" - # Use a shared collection with payload-based partitioning (Qdrant's recommended approach) - # New naming scheme: lightrag_vdb_{namespace}_{model}_{dim}d + # New naming scheme with model isolation + # Example: "lightrag_vdb_chunks_text_embedding_ada_002_1536d" self.final_namespace = f"lightrag_vdb_{self.namespace}_{model_suffix}" logger.info( diff --git a/tests/test_qdrant_migration.py b/tests/test_qdrant_migration.py index 2a343012..403dff9b 100644 --- a/tests/test_qdrant_migration.py +++ b/tests/test_qdrant_migration.py @@ -60,9 +60,9 @@ async def test_qdrant_collection_naming(mock_qdrant_client, mock_embedding_func) expected_suffix = "test_model_768d" assert expected_suffix in storage.final_namespace assert storage.final_namespace == f"lightrag_vdb_chunks_{expected_suffix}" - - # Verify legacy namespace - assert storage.legacy_namespace == "test_ws_chunks" + + # Verify legacy namespace (should not include workspace, just the base collection name) + assert storage.legacy_namespace == "lightrag_vdb_chunks" @pytest.mark.asyncio async def test_qdrant_migration_trigger(mock_qdrant_client, mock_embedding_func):