test: add Qdrant legacy migration E2E test

Why this change is needed: Complete E2E test coverage for vector model isolation feature requires testing legacy data migration for both PostgreSQL and Qdrant backends. Previously only PostgreSQL migration was tested. How it solves it: - Add test_legacy_migration_qdrant() function to test automatic migration from legacy collection (no model suffix) to model-suffixed collection - Test creates legacy "lightrag_vdb_chunks" collection with 1536d vectors - Initializes LightRAG with model_name="text-embedding-ada-002" - Verifies automatic migration to "lightrag_vdb_chunks_text_embedding_ada_002_1536d" - Validates vector count, dimension, and collection existence Impact: - Ensures Qdrant migration works correctly in real scenarios - Provides parity with PostgreSQL E2E test coverage - Will be automatically run in CI via -k "qdrant" filter Testing: - Test follows same pattern as test_legacy_migration_postgres - Uses complete LightRAG initialization with mock LLM and embedding - Includes proper cleanup via qdrant_cleanup fixture - Syntax validated with python3 -m py_compile
2025-11-20 00:19:21 +08:00 · 2025-11-20 00:19:21 +08:00 · c7e7b347e9
commit c7e7b347e9
parent dc2061583f
1 changed files with 125 additions and 4 deletions
--- a/tests/test_e2e_multi_instance.py
+++ b/tests/test_e2e_multi_instance.py
@ -2,10 +2,11 @@
 E2E Tests for Multi-Instance LightRAG with Multiple Workspaces

 These tests verify:
-1. Multiple LightRAG instances with different embedding models
-2. Multiple workspaces isolation
-3. Both PostgreSQL and Qdrant vector storage
-4. Real document insertion and query operations
+1. Legacy data migration from tables/collections without model suffix
+2. Multiple LightRAG instances with different embedding models
+3. Multiple workspaces isolation
+4. Both PostgreSQL and Qdrant vector storage
+5. Real document insertion and query operations

 Prerequisites:
 - PostgreSQL with pgvector extension
@ -106,6 +107,8 @@ def qdrant_cleanup(qdrant_config):
    )

    collections_to_delete = [
+        "lightrag_vdb_chunks",  # Legacy collection (no model suffix)
+        "lightrag_vdb_chunks_text_embedding_ada_002_1536d",  # Migrated collection
        "lightrag_vdb_chunks_model_a_768d",
        "lightrag_vdb_chunks_model_b_1024d",
    ]
@ -292,6 +295,124 @@ async def test_legacy_migration_postgres(
        shutil.rmtree(temp_dir, ignore_errors=True)


+# Test: Qdrant legacy data migration
+@pytest.mark.asyncio
+async def test_legacy_migration_qdrant(
+    qdrant_cleanup, mock_llm_func, mock_tokenizer, qdrant_config
+):
+    """
+    Test automatic migration from legacy Qdrant collection (no model suffix)
+
+    Scenario:
+    1. Create legacy collection without model suffix
+    2. Insert test vectors with 1536d
+    3. Initialize LightRAG with model_name (triggers migration)
+    4. Verify data migrated to new collection with model suffix
+    """
+    print("\n[E2E Test] Qdrant legacy data migration (1536d)")
+
+    # Create temp working dir
+    import tempfile
+    import shutil
+    temp_dir = tempfile.mkdtemp(prefix="lightrag_qdrant_legacy_")
+
+    try:
+        # Step 1: Create legacy collection and insert data
+        legacy_collection = "lightrag_vdb_chunks"
+
+        # Create legacy collection without model suffix
+        from qdrant_client.models import Distance, VectorParams
+
+        qdrant_cleanup.create_collection(
+            collection_name=legacy_collection,
+            vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+        )
+        print(f"✅ Created legacy collection: {legacy_collection}")
+
+        # Insert 3 test records
+        from qdrant_client.models import PointStruct
+
+        test_vectors = []
+        for i in range(3):
+            vector = np.random.rand(1536).tolist()
+            point = PointStruct(
+                id=i,
+                vector=vector,
+                payload={
+                    "id": f"legacy_{i}",
+                    "content": f"Legacy content {i}",
+                    "tokens": 100,
+                    "chunk_order_index": i,
+                    "full_doc_id": "legacy_doc",
+                    "file_path": "/test/path",
+                }
+            )
+            test_vectors.append(point)
+
+        qdrant_cleanup.upsert(
+            collection_name=legacy_collection,
+            points=test_vectors
+        )
+
+        # Verify legacy data
+        legacy_count = qdrant_cleanup.count(legacy_collection).count
+        print(f"✅ Legacy collection created with {legacy_count} vectors")
+
+        # Step 2: Initialize LightRAG with model_name (triggers migration)
+        async def embed_func(texts):
+            await asyncio.sleep(0)
+            return np.random.rand(len(texts), 1536)
+
+        embedding_func = EmbeddingFunc(
+            embedding_dim=1536,
+            max_token_size=8192,
+            func=embed_func,
+            model_name="text-embedding-ada-002"
+        )
+
+        rag = LightRAG(
+            working_dir=temp_dir,
+            llm_model_func=mock_llm_func,
+            embedding_func=embedding_func,
+            tokenizer=mock_tokenizer,
+            vector_storage="QdrantVectorDBStorage",
+            vector_db_storage_cls_kwargs={
+                **qdrant_config,
+                "cosine_better_than_threshold": 0.8
+            },
+        )
+
+        print("🔄 Initializing LightRAG (triggers migration)...")
+        await rag.initialize_storages()
+
+        # Step 3: Verify migration
+        new_collection = rag.chunk_entity_relation_graph.chunk_vdb.final_namespace
+        assert "text_embedding_ada_002_1536d" in new_collection
+
+        # Verify new collection exists
+        assert qdrant_cleanup.collection_exists(new_collection), \
+            f"New collection {new_collection} should exist"
+
+        new_count = qdrant_cleanup.count(new_collection).count
+
+        assert new_count == legacy_count, \
+            f"Expected {legacy_count} vectors migrated, got {new_count}"
+        print(f"✅ Migration successful: {new_count}/{legacy_count} vectors migrated")
+        print(f"✅ New collection: {new_collection}")
+
+        # Verify vector dimension
+        collection_info = qdrant_cleanup.get_collection(new_collection)
+        assert collection_info.config.params.vectors.size == 1536, \
+            "Migrated collection should have 1536 dimensions"
+        print(f"✅ Vector dimension verified: {collection_info.config.params.vectors.size}d")
+
+        await rag.finalize_storages()
+
+    finally:
+        # Cleanup temp dir
+        shutil.rmtree(temp_dir, ignore_errors=True)
+
+
 # Test: Multiple LightRAG instances with PostgreSQL
@pytest.mark.asyncio
 async def test_multi_instance_postgres(