fix: correct Qdrant legacy_namespace for data migration
Why this change is needed:
The legacy_namespace logic was incorrectly including workspace in the
collection name, causing migration to fail in E2E tests. When workspace
was set (e.g., to a temp directory path), legacy_namespace became
"/tmp/xxx_chunks" instead of "lightrag_vdb_chunks", so the migration
logic couldn't find the legacy collection.
How it solves it:
Changed legacy_namespace to always use the old naming scheme without
workspace prefix: "lightrag_vdb_{namespace}". This matches the actual
collection names from pre-migration code and aligns with PostgreSQL's
approach where legacy_table_name = base_table (without workspace).
Impact:
- Qdrant legacy data migration now works correctly in E2E tests
- All unit tests pass (6/6 for both Qdrant and PostgreSQL)
- E2E test_legacy_migration_qdrant should now pass
Testing:
- Unit tests: pytest tests/test_qdrant_migration.py -v (6/6 passed)
- Unit tests: pytest tests/test_postgres_migration.py -v (6/6 passed)
- Updated test_qdrant_collection_naming to verify new legacy_namespace
This commit is contained in:
parent
fa7a43a6d2
commit
5d9547344a
2 changed files with 9 additions and 11 deletions
|
|
@ -292,15 +292,13 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
||||||
# Generate model suffix
|
# Generate model suffix
|
||||||
model_suffix = self._generate_collection_suffix()
|
model_suffix = self._generate_collection_suffix()
|
||||||
|
|
||||||
# Get legacy namespace for data migration from old version
|
# Legacy collection name (without model suffix, for migration)
|
||||||
# Note: Legacy namespace logic is preserved for backward compatibility
|
# This matches the old naming scheme before model isolation was implemented
|
||||||
if effective_workspace:
|
# Example: "lightrag_vdb_chunks" (without model suffix)
|
||||||
self.legacy_namespace = f"{effective_workspace}_{self.namespace}"
|
self.legacy_namespace = f"lightrag_vdb_{self.namespace}"
|
||||||
else:
|
|
||||||
self.legacy_namespace = self.namespace
|
|
||||||
|
|
||||||
# Use a shared collection with payload-based partitioning (Qdrant's recommended approach)
|
# New naming scheme with model isolation
|
||||||
# New naming scheme: lightrag_vdb_{namespace}_{model}_{dim}d
|
# Example: "lightrag_vdb_chunks_text_embedding_ada_002_1536d"
|
||||||
self.final_namespace = f"lightrag_vdb_{self.namespace}_{model_suffix}"
|
self.final_namespace = f"lightrag_vdb_{self.namespace}_{model_suffix}"
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -61,8 +61,8 @@ async def test_qdrant_collection_naming(mock_qdrant_client, mock_embedding_func)
|
||||||
assert expected_suffix in storage.final_namespace
|
assert expected_suffix in storage.final_namespace
|
||||||
assert storage.final_namespace == f"lightrag_vdb_chunks_{expected_suffix}"
|
assert storage.final_namespace == f"lightrag_vdb_chunks_{expected_suffix}"
|
||||||
|
|
||||||
# Verify legacy namespace
|
# Verify legacy namespace (should not include workspace, just the base collection name)
|
||||||
assert storage.legacy_namespace == "test_ws_chunks"
|
assert storage.legacy_namespace == "lightrag_vdb_chunks"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_qdrant_migration_trigger(mock_qdrant_client, mock_embedding_func):
|
async def test_qdrant_migration_trigger(mock_qdrant_client, mock_embedding_func):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue