test: add Qdrant legacy migration E2E test

Why this change is needed: Complete E2E test coverage for vector model isolation feature requires testing legacy data migration for both PostgreSQL and Qdrant backends. Previously only PostgreSQL migration was tested. How it solves it: - Add test_legacy_migration_qdrant() function to test automatic migration from legacy collection (no model suffix) to model-suffixed collection - Test creates legacy "lightrag_vdb_chunks" collection with 1536d vectors - Initializes LightRAG with model_name="text-embedding-ada-002" - Verifies automatic migration to "lightrag_vdb_chunks_text_embedding_ada_002_1536d" - Validates vector count, dimension, and collection existence Impact: - Ensures Qdrant migration works correctly in real scenarios - Provides parity with PostgreSQL E2E test coverage - Will be automatically run in CI via -k "qdrant" filter Testing: - Test follows same pattern as test_legacy_migration_postgres - Uses complete LightRAG initialization with mock LLM and embedding - Includes proper cleanup via qdrant_cleanup fixture - Syntax validated with python3 -m py_compile
2025-11-20 00:19:21 +08:00 · 2025-11-20 00:19:21 +08:00 · c7e7b347e9
commit c7e7b347e9
parent dc2061583f
1 changed files with 125 additions and 4 deletions
--- a/tests/test_e2e_multi_instance.py
+++ b/tests/test_e2e_multi_instance.py
@ -2,10 +2,11 @@
 E2E Tests for Multi-Instance LightRAG with Multiple Workspaces
 These tests verify:
-1. Multiple LightRAG instances with different embedding models
+1. Legacy data migration from tables/collections without model suffix
-2. Multiple workspaces isolation
+2. Multiple LightRAG instances with different embedding models
-3. Both PostgreSQL and Qdrant vector storage
+3. Multiple workspaces isolation
-4. Real document insertion and query operations
+4. Both PostgreSQL and Qdrant vector storage
 5. Real document insertion and query operations
 Prerequisites:
 - PostgreSQL with pgvector extension
@ -106,6 +107,8 @@ def qdrant_cleanup(qdrant_config):
    )
    collections_to_delete = [
        "lightrag_vdb_chunks",  # Legacy collection (no model suffix)
        "lightrag_vdb_chunks_text_embedding_ada_002_1536d",  # Migrated collection
        "lightrag_vdb_chunks_model_a_768d",
        "lightrag_vdb_chunks_model_b_1024d",
    ]
@ -292,6 +295,124 @@ async def test_legacy_migration_postgres(
        shutil.rmtree(temp_dir, ignore_errors=True)
 # Test: Qdrant legacy data migration
@pytest.mark.asyncio
 async def test_legacy_migration_qdrant(
    qdrant_cleanup, mock_llm_func, mock_tokenizer, qdrant_config
 ):
    """
    Test automatic migration from legacy Qdrant collection (no model suffix)
    Scenario:
    1. Create legacy collection without model suffix
    2. Insert test vectors with 1536d
    3. Initialize LightRAG with model_name (triggers migration)
    4. Verify data migrated to new collection with model suffix
    """
    print("\n[E2E Test] Qdrant legacy data migration (1536d)")
    # Create temp working dir
    import tempfile
    import shutil
    temp_dir = tempfile.mkdtemp(prefix="lightrag_qdrant_legacy_")
    try:
        # Step 1: Create legacy collection and insert data
        legacy_collection = "lightrag_vdb_chunks"
        # Create legacy collection without model suffix
        from qdrant_client.models import Distance, VectorParams
        qdrant_cleanup.create_collection(
            collection_name=legacy_collection,
            vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
        )
        print(f"✅ Created legacy collection: {legacy_collection}")
        # Insert 3 test records
        from qdrant_client.models import PointStruct
        test_vectors = []
        for i in range(3):
            vector = np.random.rand(1536).tolist()
            point = PointStruct(
                id=i,
                vector=vector,
                payload={
                    "id": f"legacy_{i}",
                    "content": f"Legacy content {i}",
                    "tokens": 100,
                    "chunk_order_index": i,
                    "full_doc_id": "legacy_doc",
                    "file_path": "/test/path",
                }
            )
            test_vectors.append(point)
        qdrant_cleanup.upsert(
            collection_name=legacy_collection,
            points=test_vectors
        )
        # Verify legacy data
        legacy_count = qdrant_cleanup.count(legacy_collection).count
        print(f"✅ Legacy collection created with {legacy_count} vectors")
        # Step 2: Initialize LightRAG with model_name (triggers migration)
        async def embed_func(texts):
            await asyncio.sleep(0)
            return np.random.rand(len(texts), 1536)
        embedding_func = EmbeddingFunc(
            embedding_dim=1536,
            max_token_size=8192,
            func=embed_func,
            model_name="text-embedding-ada-002"
        )
        rag = LightRAG(
            working_dir=temp_dir,
            llm_model_func=mock_llm_func,
            embedding_func=embedding_func,
            tokenizer=mock_tokenizer,
            vector_storage="QdrantVectorDBStorage",
            vector_db_storage_cls_kwargs={
                **qdrant_config,
                "cosine_better_than_threshold": 0.8
            },
        )
        print("🔄 Initializing LightRAG (triggers migration)...")
        await rag.initialize_storages()
        # Step 3: Verify migration
        new_collection = rag.chunk_entity_relation_graph.chunk_vdb.final_namespace
        assert "text_embedding_ada_002_1536d" in new_collection
        # Verify new collection exists
        assert qdrant_cleanup.collection_exists(new_collection), \
            f"New collection {new_collection} should exist"
        new_count = qdrant_cleanup.count(new_collection).count
        assert new_count == legacy_count, \
            f"Expected {legacy_count} vectors migrated, got {new_count}"
        print(f"✅ Migration successful: {new_count}/{legacy_count} vectors migrated")
        print(f"✅ New collection: {new_collection}")
        # Verify vector dimension
        collection_info = qdrant_cleanup.get_collection(new_collection)
        assert collection_info.config.params.vectors.size == 1536, \
            "Migrated collection should have 1536 dimensions"
        print(f"✅ Vector dimension verified: {collection_info.config.params.vectors.size}d")
        await rag.finalize_storages()
    finally:
        # Cleanup temp dir
        shutil.rmtree(temp_dir, ignore_errors=True)
 # Test: Multiple LightRAG instances with PostgreSQL
@pytest.mark.asyncio
 async def test_multi_instance_postgres(