Relational DB migration test search (#1752)
<!-- .github/pull_request_template.md --> ## Description Add deterministic Cognee search test after rel DB migration. Test gathers all relevant relationships regarding Customers and their Invoices from relational DB that was migrated and then tries to get the same results with Cognee search. ## Type of Change <!-- Please check the relevant option --> - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
d328f2f612
commit
a5bd504daa
1 changed files with 51 additions and 2 deletions
|
|
@ -1,6 +1,5 @@
|
|||
import pathlib
|
||||
import os
|
||||
from typing import List
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.relational import (
|
||||
get_migration_relational_engine,
|
||||
|
|
@ -10,7 +9,7 @@ from cognee.infrastructure.databases.vector.pgvector import (
|
|||
create_db_and_tables as create_pgvector_db_and_tables,
|
||||
)
|
||||
from cognee.tasks.ingestion import migrate_relational_database
|
||||
from cognee.modules.search.types import SearchResult, SearchType
|
||||
from cognee.modules.search.types import SearchType
|
||||
import cognee
|
||||
|
||||
|
||||
|
|
@ -274,6 +273,55 @@ async def test_schema_only_migration():
|
|||
print(f"Edge counts: {edge_counts}")
|
||||
|
||||
|
||||
async def test_search_result_quality():
|
||||
from cognee.infrastructure.databases.relational import (
|
||||
get_migration_relational_engine,
|
||||
)
|
||||
|
||||
# Get relational database with original data
|
||||
migration_engine = get_migration_relational_engine()
|
||||
from sqlalchemy import text
|
||||
|
||||
async with migration_engine.engine.connect() as conn:
|
||||
result = await conn.execute(
|
||||
text("""
|
||||
SELECT
|
||||
c.CustomerId,
|
||||
c.FirstName,
|
||||
c.LastName,
|
||||
GROUP_CONCAT(i.InvoiceId, ',') AS invoice_ids
|
||||
FROM Customer AS c
|
||||
LEFT JOIN Invoice AS i ON c.CustomerId = i.CustomerId
|
||||
GROUP BY c.CustomerId, c.FirstName, c.LastName
|
||||
""")
|
||||
)
|
||||
|
||||
for row in result:
|
||||
# Get expected invoice IDs from relational DB for each Customer
|
||||
customer_id = row.CustomerId
|
||||
invoice_ids = row.invoice_ids.split(",") if row.invoice_ids else []
|
||||
print(f"Relational DB Customer {customer_id}: {invoice_ids}")
|
||||
|
||||
# Use Cognee search to get invoice IDs for the same Customer but by providing Customer name
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text=f"List me all the invoices of Customer:{row.FirstName} {row.LastName}.",
|
||||
top_k=50,
|
||||
system_prompt="Just return me the invoiceID as a number without any text. This is an example output: ['1', '2', '3']. Where 1, 2, 3 are invoiceIDs of an invoice",
|
||||
)
|
||||
print(f"Cognee search result: {search_results}")
|
||||
|
||||
import ast
|
||||
|
||||
lst = ast.literal_eval(search_results[0]) # converts string -> Python list
|
||||
# Transfrom both lists to int for comparison, sorting and type consistency
|
||||
lst = sorted([int(x) for x in lst])
|
||||
invoice_ids = sorted([int(x) for x in invoice_ids])
|
||||
assert lst == invoice_ids, (
|
||||
f"Search results {lst} do not match expected invoice IDs {invoice_ids} for Customer:{customer_id}"
|
||||
)
|
||||
|
||||
|
||||
async def test_migration_sqlite():
|
||||
database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
|
||||
|
||||
|
|
@ -286,6 +334,7 @@ async def test_migration_sqlite():
|
|||
)
|
||||
|
||||
await relational_db_migration()
|
||||
await test_search_result_quality()
|
||||
await test_schema_only_migration()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue