LightRAG/tests/test_empty_model_suffix.py
BukeLy 7d0c356702 fix: correct assert syntax in test_empty_model_suffix to prevent false positives
Why this change is needed:
The test file contained assert statements using tuple syntax `assert (condition, message)`,
which Python interprets as asserting a non-empty tuple (always True). This meant the tests
were passing even when the actual conditions failed, creating a false sense of test coverage.
Additionally, there were unused imports (pytest, patch, MagicMock) that needed cleanup.

How it solves it:
- Fixed assert statements on lines 61-63 and 105-109 to use correct syntax:
  `assert condition, message` instead of `assert (condition, message)`
- Removed unused imports to satisfy linter requirements
- Applied automatic formatting via ruff-format and ruff

Impact:
- Tests now correctly validate the empty model suffix behavior
- Prevents false positive test results that could hide bugs
- Passes all pre-commit hooks (F631 error resolved)

Testing:
- Verified with `uv run pre-commit run --all-files` - all checks pass
- Assert statements now properly fail when conditions are not met
2025-11-20 01:57:47 +08:00

296 lines
10 KiB
Python

"""
Tests for handling empty model suffix in PostgreSQL and Qdrant storage.
This test module verifies that both storage backends gracefully handle
the case when _generate_collection_suffix() returns an empty string.
"""
from unittest.mock import Mock
from lightrag.base import BaseVectorStorage
from lightrag.utils import EmbeddingFunc
def dummy_embedding_func(*args, **kwargs):
"""Dummy embedding function for testing."""
pass
class TestEmptyModelSuffix:
"""Test suite for handling empty model suffix scenarios."""
def test_postgres_table_name_with_empty_suffix(self):
"""
Test PostgreSQL table name generation when model_suffix is empty.
Bug Fix Verification:
- Before: table_name = "LIGHTRAG_VDB_CHUNKS_" (trailing underscore)
- After: table_name = "LIGHTRAG_VDB_CHUNKS" (fallback to base name)
"""
from lightrag.kg.postgres_impl import PostgresVectorDBStorage
from lightrag.kg.shared_storage import namespace_to_table_name
# Create a mock embedding function without get_model_identifier
mock_embedding_func = Mock(spec=["embedding_dim"])
mock_embedding_func.embedding_dim = 1536
# Setup global_config without embedding_func
global_config = {
"embedding_batch_num": 100,
"pgvector_precision": "hybrid",
"pg_host": "localhost",
"pg_port": 5432,
"pg_user": "user",
"pg_password": "password",
"pg_database": "lightrag",
}
# Create PostgreSQL storage instance
storage = PostgresVectorDBStorage(
namespace="chunks",
workspace="test",
global_config=global_config,
embedding_func=mock_embedding_func,
)
# Verify that:
# 1. model_suffix is empty
# 2. table_name doesn't have trailing underscore
# 3. table_name equals the base table name
assert storage.model_suffix == "", "model_suffix should be empty"
assert not storage.table_name.endswith(
"_"
), f"table_name should not have trailing underscore: {storage.table_name}"
# Expected base table name
expected_base = namespace_to_table_name("chunks")
assert storage.table_name == expected_base, (
f"table_name should fallback to base name when model_suffix is empty. "
f"Expected: {expected_base}, Got: {storage.table_name}"
)
def test_qdrant_collection_name_with_empty_suffix(self):
"""
Test Qdrant collection name generation when model_suffix is empty.
Bug Fix Verification:
- Before: final_namespace = "lightrag_vdb_chunks_" (trailing underscore)
- After: final_namespace = "lightrag_vdb_chunks" (fallback to legacy name)
"""
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
# Create a mock embedding function without get_model_identifier
mock_embedding_func = Mock(spec=["embedding_dim"])
mock_embedding_func.embedding_dim = 1536
# Setup global_config without embedding_func
global_config = {
"embedding_batch_num": 100,
"qdrant_url": "http://localhost:6333",
}
# Create Qdrant storage instance
storage = QdrantVectorDBStorage(
namespace="chunks",
workspace="test",
global_config=global_config,
embedding_func=mock_embedding_func,
)
# Verify that:
# 1. model_suffix is empty
# 2. final_namespace doesn't have trailing underscore
# 3. final_namespace equals the legacy namespace
assert (
storage._generate_collection_suffix() == ""
), "model_suffix should be empty"
assert not storage.final_namespace.endswith(
"_"
), f"final_namespace should not have trailing underscore: {storage.final_namespace}"
assert storage.final_namespace == storage.legacy_namespace, (
f"final_namespace should fallback to legacy_namespace when model_suffix is empty. "
f"Expected: {storage.legacy_namespace}, Got: {storage.final_namespace}"
)
def test_postgres_table_name_with_valid_suffix(self):
"""
Test PostgreSQL table name generation with valid model suffix.
Verification:
- When embedding_func has get_model_identifier, use it
- table_name has proper format: base_table_model_suffix
"""
from lightrag.kg.postgres_impl import PostgresVectorDBStorage
from lightrag.kg.shared_storage import namespace_to_table_name
# Create a proper embedding function with model_name
embedding_func = EmbeddingFunc(
embedding_dim=1536,
func=dummy_embedding_func,
model_name="text-embedding-ada-002",
)
# Setup global_config
global_config = {
"embedding_batch_num": 100,
"pgvector_precision": "hybrid",
"pg_host": "localhost",
"pg_port": 5432,
"pg_user": "user",
"pg_password": "password",
"pg_database": "lightrag",
"embedding_func": embedding_func,
}
# Create PostgreSQL storage instance
storage = PostgresVectorDBStorage(
namespace="chunks",
workspace="test",
global_config=global_config,
embedding_func=embedding_func,
)
# Verify that:
# 1. model_suffix is not empty
# 2. table_name has correct format
assert storage.model_suffix != "", "model_suffix should not be empty"
assert (
"_" in storage.table_name
), "table_name should contain underscore as separator"
# Expected format: base_table_model_suffix
expected_base = namespace_to_table_name("chunks")
expected_model_id = embedding_func.get_model_identifier()
expected_table_name = f"{expected_base}_{expected_model_id}"
assert (
storage.table_name == expected_table_name
), f"table_name format incorrect. Expected: {expected_table_name}, Got: {storage.table_name}"
def test_qdrant_collection_name_with_valid_suffix(self):
"""
Test Qdrant collection name generation with valid model suffix.
Verification:
- When embedding_func has get_model_identifier, use it
- final_namespace has proper format: lightrag_vdb_namespace_model_suffix
"""
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
# Create a proper embedding function with model_name
embedding_func = EmbeddingFunc(
embedding_dim=1536,
func=dummy_embedding_func,
model_name="text-embedding-ada-002",
)
# Setup global_config
global_config = {
"embedding_batch_num": 100,
"qdrant_url": "http://localhost:6333",
"embedding_func": embedding_func,
}
# Create Qdrant storage instance
storage = QdrantVectorDBStorage(
namespace="chunks",
workspace="test",
global_config=global_config,
embedding_func=embedding_func,
)
# Verify that:
# 1. model_suffix is not empty
# 2. final_namespace has correct format
model_suffix = storage._generate_collection_suffix()
assert model_suffix != "", "model_suffix should not be empty"
assert (
"_" in storage.final_namespace
), "final_namespace should contain underscore as separator"
# Expected format: lightrag_vdb_namespace_model_suffix
expected_model_id = embedding_func.get_model_identifier()
expected_collection_name = f"lightrag_vdb_chunks_{expected_model_id}"
assert (
storage.final_namespace == expected_collection_name
), f"final_namespace format incorrect. Expected: {expected_collection_name}, Got: {storage.final_namespace}"
def test_suffix_generation_fallback_chain(self):
"""
Test the fallback chain in _generate_collection_suffix.
Verification:
1. Direct method: embedding_func.get_model_identifier()
2. Global config fallback: global_config["embedding_func"].get_model_identifier()
3. Final fallback: return empty string
"""
# Create a concrete implementation for testing
class TestStorage(BaseVectorStorage):
async def query(self, *args, **kwargs):
pass
async def upsert(self, *args, **kwargs):
pass
async def delete_entity(self, *args, **kwargs):
pass
async def delete_entity_relation(self, *args, **kwargs):
pass
async def get_by_id(self, *args, **kwargs):
pass
async def get_by_ids(self, *args, **kwargs):
pass
async def delete(self, *args, **kwargs):
pass
async def get_vectors_by_ids(self, *args, **kwargs):
pass
async def index_done_callback(self):
pass
async def drop(self):
pass
# Case 1: Direct method available
embedding_func = EmbeddingFunc(
embedding_dim=1536, func=dummy_embedding_func, model_name="test-model"
)
storage = TestStorage(
namespace="test",
workspace="test",
global_config={},
embedding_func=embedding_func,
)
assert (
storage._generate_collection_suffix() == "test_model_1536d"
), "Should use direct method when available"
# Case 2: Global config fallback
mock_embedding_func = Mock(spec=[]) # No get_model_identifier
storage = TestStorage(
namespace="test",
workspace="test",
global_config={"embedding_func": embedding_func},
embedding_func=mock_embedding_func,
)
assert (
storage._generate_collection_suffix() == "test_model_1536d"
), "Should fallback to global_config embedding_func"
# Case 3: Final fallback (no embedding_func anywhere)
storage = TestStorage(
namespace="test",
workspace="test",
global_config={},
embedding_func=mock_embedding_func,
)
assert (
storage._generate_collection_suffix() == ""
), "Should return empty string when no model_identifier available"