test: add Qdrant legacy migration E2E test
Why this change is needed: Complete E2E test coverage for vector model isolation feature requires testing legacy data migration for both PostgreSQL and Qdrant backends. Previously only PostgreSQL migration was tested. How it solves it: - Add test_legacy_migration_qdrant() function to test automatic migration from legacy collection (no model suffix) to model-suffixed collection - Test creates legacy "lightrag_vdb_chunks" collection with 1536d vectors - Initializes LightRAG with model_name="text-embedding-ada-002" - Verifies automatic migration to "lightrag_vdb_chunks_text_embedding_ada_002_1536d" - Validates vector count, dimension, and collection existence Impact: - Ensures Qdrant migration works correctly in real scenarios - Provides parity with PostgreSQL E2E test coverage - Will be automatically run in CI via -k "qdrant" filter Testing: - Test follows same pattern as test_legacy_migration_postgres - Uses complete LightRAG initialization with mock LLM and embedding - Includes proper cleanup via qdrant_cleanup fixture - Syntax validated with python3 -m py_compile
This commit is contained in:
parent
dc2061583f
commit
c7e7b347e9
1 changed files with 125 additions and 4 deletions
|
|
@ -2,10 +2,11 @@
|
|||
E2E Tests for Multi-Instance LightRAG with Multiple Workspaces
|
||||
|
||||
These tests verify:
|
||||
1. Multiple LightRAG instances with different embedding models
|
||||
2. Multiple workspaces isolation
|
||||
3. Both PostgreSQL and Qdrant vector storage
|
||||
4. Real document insertion and query operations
|
||||
1. Legacy data migration from tables/collections without model suffix
|
||||
2. Multiple LightRAG instances with different embedding models
|
||||
3. Multiple workspaces isolation
|
||||
4. Both PostgreSQL and Qdrant vector storage
|
||||
5. Real document insertion and query operations
|
||||
|
||||
Prerequisites:
|
||||
- PostgreSQL with pgvector extension
|
||||
|
|
@ -106,6 +107,8 @@ def qdrant_cleanup(qdrant_config):
|
|||
)
|
||||
|
||||
collections_to_delete = [
|
||||
"lightrag_vdb_chunks", # Legacy collection (no model suffix)
|
||||
"lightrag_vdb_chunks_text_embedding_ada_002_1536d", # Migrated collection
|
||||
"lightrag_vdb_chunks_model_a_768d",
|
||||
"lightrag_vdb_chunks_model_b_1024d",
|
||||
]
|
||||
|
|
@ -292,6 +295,124 @@ async def test_legacy_migration_postgres(
|
|||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
# Test: Qdrant legacy data migration
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_migration_qdrant(
|
||||
qdrant_cleanup, mock_llm_func, mock_tokenizer, qdrant_config
|
||||
):
|
||||
"""
|
||||
Test automatic migration from legacy Qdrant collection (no model suffix)
|
||||
|
||||
Scenario:
|
||||
1. Create legacy collection without model suffix
|
||||
2. Insert test vectors with 1536d
|
||||
3. Initialize LightRAG with model_name (triggers migration)
|
||||
4. Verify data migrated to new collection with model suffix
|
||||
"""
|
||||
print("\n[E2E Test] Qdrant legacy data migration (1536d)")
|
||||
|
||||
# Create temp working dir
|
||||
import tempfile
|
||||
import shutil
|
||||
temp_dir = tempfile.mkdtemp(prefix="lightrag_qdrant_legacy_")
|
||||
|
||||
try:
|
||||
# Step 1: Create legacy collection and insert data
|
||||
legacy_collection = "lightrag_vdb_chunks"
|
||||
|
||||
# Create legacy collection without model suffix
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
|
||||
qdrant_cleanup.create_collection(
|
||||
collection_name=legacy_collection,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
||||
)
|
||||
print(f"✅ Created legacy collection: {legacy_collection}")
|
||||
|
||||
# Insert 3 test records
|
||||
from qdrant_client.models import PointStruct
|
||||
|
||||
test_vectors = []
|
||||
for i in range(3):
|
||||
vector = np.random.rand(1536).tolist()
|
||||
point = PointStruct(
|
||||
id=i,
|
||||
vector=vector,
|
||||
payload={
|
||||
"id": f"legacy_{i}",
|
||||
"content": f"Legacy content {i}",
|
||||
"tokens": 100,
|
||||
"chunk_order_index": i,
|
||||
"full_doc_id": "legacy_doc",
|
||||
"file_path": "/test/path",
|
||||
}
|
||||
)
|
||||
test_vectors.append(point)
|
||||
|
||||
qdrant_cleanup.upsert(
|
||||
collection_name=legacy_collection,
|
||||
points=test_vectors
|
||||
)
|
||||
|
||||
# Verify legacy data
|
||||
legacy_count = qdrant_cleanup.count(legacy_collection).count
|
||||
print(f"✅ Legacy collection created with {legacy_count} vectors")
|
||||
|
||||
# Step 2: Initialize LightRAG with model_name (triggers migration)
|
||||
async def embed_func(texts):
|
||||
await asyncio.sleep(0)
|
||||
return np.random.rand(len(texts), 1536)
|
||||
|
||||
embedding_func = EmbeddingFunc(
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=embed_func,
|
||||
model_name="text-embedding-ada-002"
|
||||
)
|
||||
|
||||
rag = LightRAG(
|
||||
working_dir=temp_dir,
|
||||
llm_model_func=mock_llm_func,
|
||||
embedding_func=embedding_func,
|
||||
tokenizer=mock_tokenizer,
|
||||
vector_storage="QdrantVectorDBStorage",
|
||||
vector_db_storage_cls_kwargs={
|
||||
**qdrant_config,
|
||||
"cosine_better_than_threshold": 0.8
|
||||
},
|
||||
)
|
||||
|
||||
print("🔄 Initializing LightRAG (triggers migration)...")
|
||||
await rag.initialize_storages()
|
||||
|
||||
# Step 3: Verify migration
|
||||
new_collection = rag.chunk_entity_relation_graph.chunk_vdb.final_namespace
|
||||
assert "text_embedding_ada_002_1536d" in new_collection
|
||||
|
||||
# Verify new collection exists
|
||||
assert qdrant_cleanup.collection_exists(new_collection), \
|
||||
f"New collection {new_collection} should exist"
|
||||
|
||||
new_count = qdrant_cleanup.count(new_collection).count
|
||||
|
||||
assert new_count == legacy_count, \
|
||||
f"Expected {legacy_count} vectors migrated, got {new_count}"
|
||||
print(f"✅ Migration successful: {new_count}/{legacy_count} vectors migrated")
|
||||
print(f"✅ New collection: {new_collection}")
|
||||
|
||||
# Verify vector dimension
|
||||
collection_info = qdrant_cleanup.get_collection(new_collection)
|
||||
assert collection_info.config.params.vectors.size == 1536, \
|
||||
"Migrated collection should have 1536 dimensions"
|
||||
print(f"✅ Vector dimension verified: {collection_info.config.params.vectors.size}d")
|
||||
|
||||
await rag.finalize_storages()
|
||||
|
||||
finally:
|
||||
# Cleanup temp dir
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
# Test: Multiple LightRAG instances with PostgreSQL
|
||||
@pytest.mark.asyncio
|
||||
async def test_multi_instance_postgres(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue