Why this change is needed: The test file contained assert statements using tuple syntax `assert (condition, message)`, which Python interprets as asserting a non-empty tuple (always True). This meant the tests were passing even when the actual conditions failed, creating a false sense of test coverage. Additionally, there were unused imports (pytest, patch, MagicMock) that needed cleanup. How it solves it: - Fixed assert statements on lines 61-63 and 105-109 to use correct syntax: `assert condition, message` instead of `assert (condition, message)` - Removed unused imports to satisfy linter requirements - Applied automatic formatting via ruff-format and ruff Impact: - Tests now correctly validate the empty model suffix behavior - Prevents false positive test results that could hide bugs - Passes all pre-commit hooks (F631 error resolved) Testing: - Verified with `uv run pre-commit run --all-files` - all checks pass - Assert statements now properly fail when conditions are not met
296 lines
10 KiB
Python
296 lines
10 KiB
Python
"""
|
|
Tests for handling empty model suffix in PostgreSQL and Qdrant storage.
|
|
|
|
This test module verifies that both storage backends gracefully handle
|
|
the case when _generate_collection_suffix() returns an empty string.
|
|
"""
|
|
|
|
from unittest.mock import Mock
|
|
|
|
from lightrag.base import BaseVectorStorage
|
|
from lightrag.utils import EmbeddingFunc
|
|
|
|
|
|
def dummy_embedding_func(*args, **kwargs):
|
|
"""Dummy embedding function for testing."""
|
|
pass
|
|
|
|
|
|
class TestEmptyModelSuffix:
|
|
"""Test suite for handling empty model suffix scenarios."""
|
|
|
|
def test_postgres_table_name_with_empty_suffix(self):
|
|
"""
|
|
Test PostgreSQL table name generation when model_suffix is empty.
|
|
|
|
Bug Fix Verification:
|
|
- Before: table_name = "LIGHTRAG_VDB_CHUNKS_" (trailing underscore)
|
|
- After: table_name = "LIGHTRAG_VDB_CHUNKS" (fallback to base name)
|
|
"""
|
|
from lightrag.kg.postgres_impl import PostgresVectorDBStorage
|
|
from lightrag.kg.shared_storage import namespace_to_table_name
|
|
|
|
# Create a mock embedding function without get_model_identifier
|
|
mock_embedding_func = Mock(spec=["embedding_dim"])
|
|
mock_embedding_func.embedding_dim = 1536
|
|
|
|
# Setup global_config without embedding_func
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"pgvector_precision": "hybrid",
|
|
"pg_host": "localhost",
|
|
"pg_port": 5432,
|
|
"pg_user": "user",
|
|
"pg_password": "password",
|
|
"pg_database": "lightrag",
|
|
}
|
|
|
|
# Create PostgreSQL storage instance
|
|
storage = PostgresVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is empty
|
|
# 2. table_name doesn't have trailing underscore
|
|
# 3. table_name equals the base table name
|
|
assert storage.model_suffix == "", "model_suffix should be empty"
|
|
assert not storage.table_name.endswith(
|
|
"_"
|
|
), f"table_name should not have trailing underscore: {storage.table_name}"
|
|
|
|
# Expected base table name
|
|
expected_base = namespace_to_table_name("chunks")
|
|
assert storage.table_name == expected_base, (
|
|
f"table_name should fallback to base name when model_suffix is empty. "
|
|
f"Expected: {expected_base}, Got: {storage.table_name}"
|
|
)
|
|
|
|
def test_qdrant_collection_name_with_empty_suffix(self):
|
|
"""
|
|
Test Qdrant collection name generation when model_suffix is empty.
|
|
|
|
Bug Fix Verification:
|
|
- Before: final_namespace = "lightrag_vdb_chunks_" (trailing underscore)
|
|
- After: final_namespace = "lightrag_vdb_chunks" (fallback to legacy name)
|
|
"""
|
|
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
|
|
|
|
# Create a mock embedding function without get_model_identifier
|
|
mock_embedding_func = Mock(spec=["embedding_dim"])
|
|
mock_embedding_func.embedding_dim = 1536
|
|
|
|
# Setup global_config without embedding_func
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"qdrant_url": "http://localhost:6333",
|
|
}
|
|
|
|
# Create Qdrant storage instance
|
|
storage = QdrantVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is empty
|
|
# 2. final_namespace doesn't have trailing underscore
|
|
# 3. final_namespace equals the legacy namespace
|
|
assert (
|
|
storage._generate_collection_suffix() == ""
|
|
), "model_suffix should be empty"
|
|
assert not storage.final_namespace.endswith(
|
|
"_"
|
|
), f"final_namespace should not have trailing underscore: {storage.final_namespace}"
|
|
assert storage.final_namespace == storage.legacy_namespace, (
|
|
f"final_namespace should fallback to legacy_namespace when model_suffix is empty. "
|
|
f"Expected: {storage.legacy_namespace}, Got: {storage.final_namespace}"
|
|
)
|
|
|
|
def test_postgres_table_name_with_valid_suffix(self):
|
|
"""
|
|
Test PostgreSQL table name generation with valid model suffix.
|
|
|
|
Verification:
|
|
- When embedding_func has get_model_identifier, use it
|
|
- table_name has proper format: base_table_model_suffix
|
|
"""
|
|
from lightrag.kg.postgres_impl import PostgresVectorDBStorage
|
|
from lightrag.kg.shared_storage import namespace_to_table_name
|
|
|
|
# Create a proper embedding function with model_name
|
|
embedding_func = EmbeddingFunc(
|
|
embedding_dim=1536,
|
|
func=dummy_embedding_func,
|
|
model_name="text-embedding-ada-002",
|
|
)
|
|
|
|
# Setup global_config
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"pgvector_precision": "hybrid",
|
|
"pg_host": "localhost",
|
|
"pg_port": 5432,
|
|
"pg_user": "user",
|
|
"pg_password": "password",
|
|
"pg_database": "lightrag",
|
|
"embedding_func": embedding_func,
|
|
}
|
|
|
|
# Create PostgreSQL storage instance
|
|
storage = PostgresVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is not empty
|
|
# 2. table_name has correct format
|
|
assert storage.model_suffix != "", "model_suffix should not be empty"
|
|
assert (
|
|
"_" in storage.table_name
|
|
), "table_name should contain underscore as separator"
|
|
|
|
# Expected format: base_table_model_suffix
|
|
expected_base = namespace_to_table_name("chunks")
|
|
expected_model_id = embedding_func.get_model_identifier()
|
|
expected_table_name = f"{expected_base}_{expected_model_id}"
|
|
|
|
assert (
|
|
storage.table_name == expected_table_name
|
|
), f"table_name format incorrect. Expected: {expected_table_name}, Got: {storage.table_name}"
|
|
|
|
def test_qdrant_collection_name_with_valid_suffix(self):
|
|
"""
|
|
Test Qdrant collection name generation with valid model suffix.
|
|
|
|
Verification:
|
|
- When embedding_func has get_model_identifier, use it
|
|
- final_namespace has proper format: lightrag_vdb_namespace_model_suffix
|
|
"""
|
|
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
|
|
|
|
# Create a proper embedding function with model_name
|
|
embedding_func = EmbeddingFunc(
|
|
embedding_dim=1536,
|
|
func=dummy_embedding_func,
|
|
model_name="text-embedding-ada-002",
|
|
)
|
|
|
|
# Setup global_config
|
|
global_config = {
|
|
"embedding_batch_num": 100,
|
|
"qdrant_url": "http://localhost:6333",
|
|
"embedding_func": embedding_func,
|
|
}
|
|
|
|
# Create Qdrant storage instance
|
|
storage = QdrantVectorDBStorage(
|
|
namespace="chunks",
|
|
workspace="test",
|
|
global_config=global_config,
|
|
embedding_func=embedding_func,
|
|
)
|
|
|
|
# Verify that:
|
|
# 1. model_suffix is not empty
|
|
# 2. final_namespace has correct format
|
|
model_suffix = storage._generate_collection_suffix()
|
|
assert model_suffix != "", "model_suffix should not be empty"
|
|
assert (
|
|
"_" in storage.final_namespace
|
|
), "final_namespace should contain underscore as separator"
|
|
|
|
# Expected format: lightrag_vdb_namespace_model_suffix
|
|
expected_model_id = embedding_func.get_model_identifier()
|
|
expected_collection_name = f"lightrag_vdb_chunks_{expected_model_id}"
|
|
|
|
assert (
|
|
storage.final_namespace == expected_collection_name
|
|
), f"final_namespace format incorrect. Expected: {expected_collection_name}, Got: {storage.final_namespace}"
|
|
|
|
def test_suffix_generation_fallback_chain(self):
|
|
"""
|
|
Test the fallback chain in _generate_collection_suffix.
|
|
|
|
Verification:
|
|
1. Direct method: embedding_func.get_model_identifier()
|
|
2. Global config fallback: global_config["embedding_func"].get_model_identifier()
|
|
3. Final fallback: return empty string
|
|
"""
|
|
|
|
# Create a concrete implementation for testing
|
|
class TestStorage(BaseVectorStorage):
|
|
async def query(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def upsert(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def delete_entity(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def delete_entity_relation(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def get_by_id(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def get_by_ids(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def delete(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def get_vectors_by_ids(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def index_done_callback(self):
|
|
pass
|
|
|
|
async def drop(self):
|
|
pass
|
|
|
|
# Case 1: Direct method available
|
|
embedding_func = EmbeddingFunc(
|
|
embedding_dim=1536, func=dummy_embedding_func, model_name="test-model"
|
|
)
|
|
storage = TestStorage(
|
|
namespace="test",
|
|
workspace="test",
|
|
global_config={},
|
|
embedding_func=embedding_func,
|
|
)
|
|
assert (
|
|
storage._generate_collection_suffix() == "test_model_1536d"
|
|
), "Should use direct method when available"
|
|
|
|
# Case 2: Global config fallback
|
|
mock_embedding_func = Mock(spec=[]) # No get_model_identifier
|
|
storage = TestStorage(
|
|
namespace="test",
|
|
workspace="test",
|
|
global_config={"embedding_func": embedding_func},
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
assert (
|
|
storage._generate_collection_suffix() == "test_model_1536d"
|
|
), "Should fallback to global_config embedding_func"
|
|
|
|
# Case 3: Final fallback (no embedding_func anywhere)
|
|
storage = TestStorage(
|
|
namespace="test",
|
|
workspace="test",
|
|
global_config={},
|
|
embedding_func=mock_embedding_func,
|
|
)
|
|
assert (
|
|
storage._generate_collection_suffix() == ""
|
|
), "Should return empty string when no model_identifier available"
|