diff --git a/tests/test_e2e_multi_instance.py b/tests/test_e2e_multi_instance.py index f859d75d..c32a473c 100644 --- a/tests/test_e2e_multi_instance.py +++ b/tests/test_e2e_multi_instance.py @@ -23,7 +23,13 @@ import shutil from lightrag import LightRAG from lightrag.utils import EmbeddingFunc from lightrag.kg.postgres_impl import PostgreSQLDB -from qdrant_client import QdrantClient + +# Conditional import for E2E dependencies +# This prevents offline tests from failing due to missing E2E dependencies +qdrant_client = pytest.importorskip( + "qdrant_client", reason="Qdrant client required for E2E tests" +) +QdrantClient = qdrant_client.QdrantClient # Configuration fixtures diff --git a/tests/test_postgres_migration.py b/tests/test_postgres_migration.py index 84cec898..501bd447 100644 --- a/tests/test_postgres_migration.py +++ b/tests/test_postgres_migration.py @@ -22,8 +22,25 @@ def mock_pg_db(): return [] # Return empty list for multirows return {"exists": False, "count": 0} + # Strict mock for execute that validates parameter types + async def mock_execute(sql, *args, **kwargs): + """ + Strict mock that mimics AsyncPG behavior: + - Positional parameters must be passed as separate arguments (*args) + - Raises TypeError if a dict/list is passed as a single argument + """ + if args and len(args) == 1: + # Check if single argument is a dict or list (wrong usage) + if isinstance(args[0], (dict, list)): + raise TypeError( + "AsyncPG execute() expects positional parameters as separate arguments, " + f"not as {type(args[0]).__name__}. Use: execute(query, val1, val2, ...) " + "or execute(query, *values)" + ) + return None + db.query = AsyncMock(side_effect=mock_query) - db.execute = AsyncMock() + db.execute = AsyncMock(side_effect=mock_execute) return db diff --git a/tests/test_postgres_migration_params.py b/tests/test_postgres_migration_params.py new file mode 100644 index 00000000..59df2d89 --- /dev/null +++ b/tests/test_postgres_migration_params.py @@ -0,0 +1,168 @@ +""" +Strict test to verify PostgreSQL migration parameter passing. + +This test specifically validates that the migration code passes parameters +to AsyncPG execute() in the correct format (positional args, not dict). +""" + +import pytest +from unittest.mock import patch, AsyncMock +from lightrag.utils import EmbeddingFunc +from lightrag.kg.postgres_impl import PGVectorStorage +from lightrag.namespace import NameSpace + + +@pytest.mark.asyncio +async def test_migration_parameter_passing(): + """ + Verify that migration passes positional parameters correctly to execute(). + + This test specifically checks that execute() is called with: + - SQL query as first argument + - Values as separate positional arguments (*values) + NOT as a dictionary or list + """ + + # Track all execute calls + execute_calls = [] + + async def strict_execute(sql, *args, **kwargs): + """Record all execute calls with their arguments""" + execute_calls.append( + { + "sql": sql, + "args": args, # Should be tuple of values + "kwargs": kwargs, + } + ) + + # Validate: if args has only one element and it's a dict/list, that's wrong + if args and len(args) == 1 and isinstance(args[0], (dict, list)): + raise TypeError( + f"BUG DETECTED: execute() called with {type(args[0]).__name__} " + "instead of positional parameters! " + f"Got: execute(sql, {args[0]!r})" + ) + return None + + # Create mocks + mock_db = AsyncMock() + mock_db.workspace = "test_workspace" + mock_db.execute = AsyncMock(side_effect=strict_execute) + + # Mock query to simulate legacy table with data + mock_rows = [ + { + "id": "row1", + "content": "content1", + "workspace": "test", + "vector": [0.1] * 1536, + }, + { + "id": "row2", + "content": "content2", + "workspace": "test", + "vector": [0.2] * 1536, + }, + ] + + async def mock_query(sql, params=None, multirows=False, **kwargs): + if "COUNT(*)" in sql: + return {"count": len(mock_rows)} + elif multirows and "SELECT *" in sql: + return mock_rows + return {} + + mock_db.query = AsyncMock(side_effect=mock_query) + + # Mock table existence: only legacy table exists + async def mock_table_exists(db, table_name): + return "test_model_1536d" not in table_name # Legacy exists, new doesn't + + # Setup embedding function + async def embed_func(texts, **kwargs): + import numpy as np + + return np.array([[0.1] * 1536 for _ in texts]) + + embedding_func = EmbeddingFunc( + embedding_dim=1536, func=embed_func, model_name="test-model" + ) + + config = { + "embedding_batch_num": 10, + "vector_db_storage_cls_kwargs": {"cosine_better_than_threshold": 0.8}, + } + + storage = PGVectorStorage( + namespace=NameSpace.VECTOR_STORE_CHUNKS, + global_config=config, + embedding_func=embedding_func, + workspace="test", + ) + + with ( + patch("lightrag.kg.postgres_impl.get_data_init_lock") as mock_lock, + patch("lightrag.kg.postgres_impl.ClientManager") as mock_manager, + patch( + "lightrag.kg.postgres_impl._pg_table_exists", side_effect=mock_table_exists + ), + patch("lightrag.kg.postgres_impl._pg_create_table", AsyncMock()), + ): + mock_lock_ctx = AsyncMock() + mock_lock.return_value = mock_lock_ctx + mock_manager.get_client = AsyncMock(return_value=mock_db) + mock_manager.release_client = AsyncMock() + + # This should trigger migration + await storage.initialize() + + # Verify execute was called (migration happened) + assert len(execute_calls) > 0, "Migration should have called execute()" + + # Verify parameter format for INSERT statements + insert_calls = [c for c in execute_calls if "INSERT INTO" in c["sql"]] + assert len(insert_calls) > 0, "Should have INSERT statements from migration" + + print(f"\n✓ Migration executed {len(insert_calls)} INSERT statements") + + # Check each INSERT call + for i, call_info in enumerate(insert_calls): + args = call_info["args"] + sql = call_info["sql"] + + print(f"\n INSERT #{i+1}:") + print(f" SQL: {sql[:100]}...") + print(f" Args count: {len(args)}") + print(f" Args types: {[type(arg).__name__ for arg in args]}") + + # Key validation: args should be a tuple of values, not a single dict/list + if args: + # Check if first (and only) arg is a dict or list - that's the bug! + if len(args) == 1 and isinstance(args[0], (dict, list)): + pytest.fail( + f"BUG: execute() called with {type(args[0]).__name__} instead of " + f"positional parameters!\n" + f" SQL: {sql}\n" + f" Args: {args[0]}\n" + f"Expected: execute(sql, val1, val2, val3, ...)\n" + f"Got: execute(sql, {type(args[0]).__name__})" + ) + + # Validate all args are primitive types (not collections) + for j, arg in enumerate(args): + if isinstance(arg, (dict, list)) and not isinstance(arg, (str, bytes)): + # Exception: vector columns might be lists, that's OK + if "vector" not in sql: + pytest.fail( + f"BUG: Parameter #{j} is {type(arg).__name__}, " + f"expected primitive type" + ) + + print( + f"\n✅ All {len(insert_calls)} INSERT statements use correct parameter format" + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"])