This change ensures that when the model_suffix is empty, the final_namespace falls back to the legacy_namespace, preventing potential naming issues. A warning is logged to inform users about the missing model suffix and the fallback to the legacy naming scheme. Additionally, comprehensive tests have been added to verify the behavior of both PostgreSQL and Qdrant storage when model_suffix is empty, ensuring that the naming conventions are correctly applied and that no trailing underscores are present. Impact: - Prevents crashes due to empty model_suffix - Provides clear feedback to users regarding configuration issues - Maintains backward compatibility with existing setups Testing: All new tests pass, validating the handling of empty model_suffix scenarios.
294 lines
10 KiB
Python
294 lines
10 KiB
Python
"""
|
|
Tests for handling empty model suffix in PostgreSQL and Qdrant storage.
|
|
|
|
This test module verifies that both storage backends gracefully handle
|
|
the case when _generate_collection_suffix() returns an empty string.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
from lightrag.base import BaseVectorStorage
|
|
from lightrag.utils import EmbeddingFunc
|
|
|
|
|
|
def dummy_embedding_func(*args, **kwargs):
|
|
"""Dummy embedding function for testing."""
|
|
pass
|
|
|
|
|
|
class TestEmptyModelSuffix:
|
|
"""Test suite for handling empty model suffix scenarios."""
|
|
|
|
def test_postgres_table_name_with_empty_suffix(self):
|
|
"""
|
|
Test PostgreSQL table name generation when model_suffix is empty.
|
|
|
|
Bug Fix Verification:
|
|
- Before: table_name = "LIGHTRAG_VDB_CHUNKS_" (trailing underscore)
|
|
- After: table_name = "LIGHTRAG_VDB_CHUNKS" (fallback to base name)
|
|
"""
|
|
from lightrag.kg.postgres_impl import PostgresVectorDBStorage
|
|
from lightrag.kg.shared_storage import namespace_to_table_name
|
|
|
|
# Create a mock embedding function without get_model_identifier
|
|
mock_embedding_func = Mock(spec=["embedding_dim"])
|
|
mock_embedding_func.embedding_dim = 1536
|
|
|
|
# Setup global_config without embedding_func
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"pgvector_precision": "hybrid",
|
|
"pg_host": "localhost",
|
|
"pg_port": 5432,
|
|
"pg_user": "user",
|
|
"pg_password": "password",
|
|
"pg_database": "lightrag",
|
|
}
|
|
|
|
# Create PostgreSQL storage instance
|
|
storage = PostgresVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is empty
|
|
# 2. table_name doesn't have trailing underscore
|
|
# 3. table_name equals the base table name
|
|
assert storage.model_suffix == "", "model_suffix should be empty"
|
|
assert (
|
|
not storage.table_name.endswith("_"),
|
|
f"table_name should not have trailing underscore: {storage.table_name}",
|
|
)
|
|
|
|
# Expected base table name
|
|
expected_base = namespace_to_table_name("chunks")
|
|
assert storage.table_name == expected_base, (
|
|
f"table_name should fallback to base name when model_suffix is empty. "
|
|
f"Expected: {expected_base}, Got: {storage.table_name}"
|
|
)
|
|
|
|
def test_qdrant_collection_name_with_empty_suffix(self):
|
|
"""
|
|
Test Qdrant collection name generation when model_suffix is empty.
|
|
|
|
Bug Fix Verification:
|
|
- Before: final_namespace = "lightrag_vdb_chunks_" (trailing underscore)
|
|
- After: final_namespace = "lightrag_vdb_chunks" (fallback to legacy name)
|
|
"""
|
|
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
|
|
|
|
# Create a mock embedding function without get_model_identifier
|
|
mock_embedding_func = Mock(spec=["embedding_dim"])
|
|
mock_embedding_func.embedding_dim = 1536
|
|
|
|
# Setup global_config without embedding_func
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"qdrant_url": "http://localhost:6333",
|
|
}
|
|
|
|
# Create Qdrant storage instance
|
|
storage = QdrantVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is empty
|
|
# 2. final_namespace doesn't have trailing underscore
|
|
# 3. final_namespace equals the legacy namespace
|
|
assert storage._generate_collection_suffix() == "", (
|
|
"model_suffix should be empty"
|
|
)
|
|
assert (
|
|
not storage.final_namespace.endswith("_"),
|
|
f"final_namespace should not have trailing underscore: {storage.final_namespace}",
|
|
)
|
|
assert storage.final_namespace == storage.legacy_namespace, (
|
|
f"final_namespace should fallback to legacy_namespace when model_suffix is empty. "
|
|
f"Expected: {storage.legacy_namespace}, Got: {storage.final_namespace}"
|
|
)
|
|
|
|
def test_postgres_table_name_with_valid_suffix(self):
|
|
"""
|
|
Test PostgreSQL table name generation with valid model suffix.
|
|
|
|
Verification:
|
|
- When embedding_func has get_model_identifier, use it
|
|
- table_name has proper format: base_table_model_suffix
|
|
"""
|
|
from lightrag.kg.postgres_impl import PostgresVectorDBStorage
|
|
from lightrag.kg.shared_storage import namespace_to_table_name
|
|
|
|
# Create a proper embedding function with model_name
|
|
embedding_func = EmbeddingFunc(
|
|
embedding_dim=1536, func=dummy_embedding_func, model_name="text-embedding-ada-002"
|
|
)
|
|
|
|
# Setup global_config
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"pgvector_precision": "hybrid",
|
|
"pg_host": "localhost",
|
|
"pg_port": 5432,
|
|
"pg_user": "user",
|
|
"pg_password": "password",
|
|
"pg_database": "lightrag",
|
|
"embedding_func": embedding_func,
|
|
}
|
|
|
|
# Create PostgreSQL storage instance
|
|
storage = PostgresVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is not empty
|
|
# 2. table_name has correct format
|
|
assert storage.model_suffix != "", "model_suffix should not be empty"
|
|
assert "_" in storage.table_name, "table_name should contain underscore as separator"
|
|
|
|
# Expected format: base_table_model_suffix
|
|
expected_base = namespace_to_table_name("chunks")
|
|
expected_model_id = embedding_func.get_model_identifier()
|
|
expected_table_name = f"{expected_base}_{expected_model_id}"
|
|
|
|
assert storage.table_name == expected_table_name, (
|
|
f"table_name format incorrect. Expected: {expected_table_name}, Got: {storage.table_name}"
|
|
)
|
|
|
|
def test_qdrant_collection_name_with_valid_suffix(self):
|
|
"""
|
|
Test Qdrant collection name generation with valid model suffix.
|
|
|
|
Verification:
|
|
- When embedding_func has get_model_identifier, use it
|
|
- final_namespace has proper format: lightrag_vdb_namespace_model_suffix
|
|
"""
|
|
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
|
|
|
|
# Create a proper embedding function with model_name
|
|
embedding_func = EmbeddingFunc(
|
|
embedding_dim=1536, func=dummy_embedding_func, model_name="text-embedding-ada-002"
|
|
)
|
|
|
|
# Setup global_config
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"qdrant_url": "http://localhost:6333",
|
|
"embedding_func": embedding_func,
|
|
}
|
|
|
|
# Create Qdrant storage instance
|
|
storage = QdrantVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is not empty
|
|
# 2. final_namespace has correct format
|
|
model_suffix = storage._generate_collection_suffix()
|
|
assert model_suffix != "", "model_suffix should not be empty"
|
|
assert "_" in storage.final_namespace, (
|
|
"final_namespace should contain underscore as separator"
|
|
)
|
|
|
|
# Expected format: lightrag_vdb_namespace_model_suffix
|
|
expected_model_id = embedding_func.get_model_identifier()
|
|
expected_collection_name = f"lightrag_vdb_chunks_{expected_model_id}"
|
|
|
|
assert storage.final_namespace == expected_collection_name, (
|
|
f"final_namespace format incorrect. Expected: {expected_collection_name}, Got: {storage.final_namespace}"
|
|
)
|
|
|
|
def test_suffix_generation_fallback_chain(self):
|
|
"""
|
|
Test the fallback chain in _generate_collection_suffix.
|
|
|
|
Verification:
|
|
1. Direct method: embedding_func.get_model_identifier()
|
|
2. Global config fallback: global_config["embedding_func"].get_model_identifier()
|
|
3. Final fallback: return empty string
|
|
"""
|
|
from lightrag.base import BaseVectorStorage
|
|
|
|
# Create a concrete implementation for testing
|
|
class TestStorage(BaseVectorStorage):
|
|
async def query(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def upsert(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def delete_entity(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def delete_entity_relation(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def get_by_id(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def get_by_ids(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def delete(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def get_vectors_by_ids(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def index_done_callback(self):
|
|
pass
|
|
|
|
async def drop(self):
|
|
pass
|
|
|
|
# Case 1: Direct method available
|
|
embedding_func = EmbeddingFunc(
|
|
embedding_dim=1536, func=dummy_embedding_func, model_name="test-model"
|
|
)
|
|
storage = TestStorage(
|
|
namespace="test",
|
|
workspace="test",
|
|
global_config={},
|
|
embedding_func=embedding_func,
|
|
)
|
|
assert (
|
|
storage._generate_collection_suffix() == "test_model_1536d"
|
|
), "Should use direct method when available"
|
|
|
|
# Case 2: Global config fallback
|
|
mock_embedding_func = Mock(spec=[]) # No get_model_identifier
|
|
storage = TestStorage(
|
|
namespace="test",
|
|
workspace="test",
|
|
global_config={"embedding_func": embedding_func},
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
assert (
|
|
storage._generate_collection_suffix() == "test_model_1536d"
|
|
), "Should fallback to global_config embedding_func"
|
|
|
|
# Case 3: Final fallback (no embedding_func anywhere)
|
|
storage = TestStorage(
|
|
namespace="test",
|
|
workspace="test",
|
|
global_config={},
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
assert storage._generate_collection_suffix() == "", (
|
|
"Should return empty string when no model_identifier available"
|
|
)
|
|
|