test: add Qdrant legacy migration E2E test
Why this change is needed: Complete E2E test coverage for vector model isolation feature requires testing legacy data migration for both PostgreSQL and Qdrant backends. Previously only PostgreSQL migration was tested. How it solves it: - Add test_legacy_migration_qdrant() function to test automatic migration from legacy collection (no model suffix) to model-suffixed collection - Test creates legacy "lightrag_vdb_chunks" collection with 1536d vectors - Initializes LightRAG with model_name="text-embedding-ada-002" - Verifies automatic migration to "lightrag_vdb_chunks_text_embedding_ada_002_1536d" - Validates vector count, dimension, and collection existence Impact: - Ensures Qdrant migration works correctly in real scenarios - Provides parity with PostgreSQL E2E test coverage - Will be automatically run in CI via -k "qdrant" filter Testing: - Test follows same pattern as test_legacy_migration_postgres - Uses complete LightRAG initialization with mock LLM and embedding - Includes proper cleanup via qdrant_cleanup fixture - Syntax validated with python3 -m py_compile
This commit is contained in:
parent
dc2061583f
commit
c7e7b347e9
1 changed files with 125 additions and 4 deletions
|
|
@ -2,10 +2,11 @@
|
||||||
E2E Tests for Multi-Instance LightRAG with Multiple Workspaces
|
E2E Tests for Multi-Instance LightRAG with Multiple Workspaces
|
||||||
|
|
||||||
These tests verify:
|
These tests verify:
|
||||||
1. Multiple LightRAG instances with different embedding models
|
1. Legacy data migration from tables/collections without model suffix
|
||||||
2. Multiple workspaces isolation
|
2. Multiple LightRAG instances with different embedding models
|
||||||
3. Both PostgreSQL and Qdrant vector storage
|
3. Multiple workspaces isolation
|
||||||
4. Real document insertion and query operations
|
4. Both PostgreSQL and Qdrant vector storage
|
||||||
|
5. Real document insertion and query operations
|
||||||
|
|
||||||
Prerequisites:
|
Prerequisites:
|
||||||
- PostgreSQL with pgvector extension
|
- PostgreSQL with pgvector extension
|
||||||
|
|
@ -106,6 +107,8 @@ def qdrant_cleanup(qdrant_config):
|
||||||
)
|
)
|
||||||
|
|
||||||
collections_to_delete = [
|
collections_to_delete = [
|
||||||
|
"lightrag_vdb_chunks", # Legacy collection (no model suffix)
|
||||||
|
"lightrag_vdb_chunks_text_embedding_ada_002_1536d", # Migrated collection
|
||||||
"lightrag_vdb_chunks_model_a_768d",
|
"lightrag_vdb_chunks_model_a_768d",
|
||||||
"lightrag_vdb_chunks_model_b_1024d",
|
"lightrag_vdb_chunks_model_b_1024d",
|
||||||
]
|
]
|
||||||
|
|
@ -292,6 +295,124 @@ async def test_legacy_migration_postgres(
|
||||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
|
# Test: Qdrant legacy data migration
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_legacy_migration_qdrant(
|
||||||
|
qdrant_cleanup, mock_llm_func, mock_tokenizer, qdrant_config
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Test automatic migration from legacy Qdrant collection (no model suffix)
|
||||||
|
|
||||||
|
Scenario:
|
||||||
|
1. Create legacy collection without model suffix
|
||||||
|
2. Insert test vectors with 1536d
|
||||||
|
3. Initialize LightRAG with model_name (triggers migration)
|
||||||
|
4. Verify data migrated to new collection with model suffix
|
||||||
|
"""
|
||||||
|
print("\n[E2E Test] Qdrant legacy data migration (1536d)")
|
||||||
|
|
||||||
|
# Create temp working dir
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
temp_dir = tempfile.mkdtemp(prefix="lightrag_qdrant_legacy_")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Create legacy collection and insert data
|
||||||
|
legacy_collection = "lightrag_vdb_chunks"
|
||||||
|
|
||||||
|
# Create legacy collection without model suffix
|
||||||
|
from qdrant_client.models import Distance, VectorParams
|
||||||
|
|
||||||
|
qdrant_cleanup.create_collection(
|
||||||
|
collection_name=legacy_collection,
|
||||||
|
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
||||||
|
)
|
||||||
|
print(f"✅ Created legacy collection: {legacy_collection}")
|
||||||
|
|
||||||
|
# Insert 3 test records
|
||||||
|
from qdrant_client.models import PointStruct
|
||||||
|
|
||||||
|
test_vectors = []
|
||||||
|
for i in range(3):
|
||||||
|
vector = np.random.rand(1536).tolist()
|
||||||
|
point = PointStruct(
|
||||||
|
id=i,
|
||||||
|
vector=vector,
|
||||||
|
payload={
|
||||||
|
"id": f"legacy_{i}",
|
||||||
|
"content": f"Legacy content {i}",
|
||||||
|
"tokens": 100,
|
||||||
|
"chunk_order_index": i,
|
||||||
|
"full_doc_id": "legacy_doc",
|
||||||
|
"file_path": "/test/path",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
test_vectors.append(point)
|
||||||
|
|
||||||
|
qdrant_cleanup.upsert(
|
||||||
|
collection_name=legacy_collection,
|
||||||
|
points=test_vectors
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify legacy data
|
||||||
|
legacy_count = qdrant_cleanup.count(legacy_collection).count
|
||||||
|
print(f"✅ Legacy collection created with {legacy_count} vectors")
|
||||||
|
|
||||||
|
# Step 2: Initialize LightRAG with model_name (triggers migration)
|
||||||
|
async def embed_func(texts):
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
return np.random.rand(len(texts), 1536)
|
||||||
|
|
||||||
|
embedding_func = EmbeddingFunc(
|
||||||
|
embedding_dim=1536,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=embed_func,
|
||||||
|
model_name="text-embedding-ada-002"
|
||||||
|
)
|
||||||
|
|
||||||
|
rag = LightRAG(
|
||||||
|
working_dir=temp_dir,
|
||||||
|
llm_model_func=mock_llm_func,
|
||||||
|
embedding_func=embedding_func,
|
||||||
|
tokenizer=mock_tokenizer,
|
||||||
|
vector_storage="QdrantVectorDBStorage",
|
||||||
|
vector_db_storage_cls_kwargs={
|
||||||
|
**qdrant_config,
|
||||||
|
"cosine_better_than_threshold": 0.8
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("🔄 Initializing LightRAG (triggers migration)...")
|
||||||
|
await rag.initialize_storages()
|
||||||
|
|
||||||
|
# Step 3: Verify migration
|
||||||
|
new_collection = rag.chunk_entity_relation_graph.chunk_vdb.final_namespace
|
||||||
|
assert "text_embedding_ada_002_1536d" in new_collection
|
||||||
|
|
||||||
|
# Verify new collection exists
|
||||||
|
assert qdrant_cleanup.collection_exists(new_collection), \
|
||||||
|
f"New collection {new_collection} should exist"
|
||||||
|
|
||||||
|
new_count = qdrant_cleanup.count(new_collection).count
|
||||||
|
|
||||||
|
assert new_count == legacy_count, \
|
||||||
|
f"Expected {legacy_count} vectors migrated, got {new_count}"
|
||||||
|
print(f"✅ Migration successful: {new_count}/{legacy_count} vectors migrated")
|
||||||
|
print(f"✅ New collection: {new_collection}")
|
||||||
|
|
||||||
|
# Verify vector dimension
|
||||||
|
collection_info = qdrant_cleanup.get_collection(new_collection)
|
||||||
|
assert collection_info.config.params.vectors.size == 1536, \
|
||||||
|
"Migrated collection should have 1536 dimensions"
|
||||||
|
print(f"✅ Vector dimension verified: {collection_info.config.params.vectors.size}d")
|
||||||
|
|
||||||
|
await rag.finalize_storages()
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Cleanup temp dir
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
# Test: Multiple LightRAG instances with PostgreSQL
|
# Test: Multiple LightRAG instances with PostgreSQL
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_multi_instance_postgres(
|
async def test_multi_instance_postgres(
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue