Relational DB migration test search (#1752)

<!-- .github/pull_request_template.md -->

## Description
Add deterministic Cognee search test after rel DB migration. 
Test gathers all relevant relationships regarding Customers and their
Invoices from relational DB that was migrated and then tries to get the
same results with Cognee search.

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-11-12 21:32:22 +01:00 committed by GitHub
parent d328f2f612
commit a5bd504daa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,5 @@
import pathlib
import os
from typing import List
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.relational import (
get_migration_relational_engine,
@ -10,7 +9,7 @@ from cognee.infrastructure.databases.vector.pgvector import (
create_db_and_tables as create_pgvector_db_and_tables,
)
from cognee.tasks.ingestion import migrate_relational_database
from cognee.modules.search.types import SearchResult, SearchType
from cognee.modules.search.types import SearchType
import cognee
@ -274,6 +273,55 @@ async def test_schema_only_migration():
print(f"Edge counts: {edge_counts}")
async def test_search_result_quality():
from cognee.infrastructure.databases.relational import (
get_migration_relational_engine,
)
# Get relational database with original data
migration_engine = get_migration_relational_engine()
from sqlalchemy import text
async with migration_engine.engine.connect() as conn:
result = await conn.execute(
text("""
SELECT
c.CustomerId,
c.FirstName,
c.LastName,
GROUP_CONCAT(i.InvoiceId, ',') AS invoice_ids
FROM Customer AS c
LEFT JOIN Invoice AS i ON c.CustomerId = i.CustomerId
GROUP BY c.CustomerId, c.FirstName, c.LastName
""")
)
for row in result:
# Get expected invoice IDs from relational DB for each Customer
customer_id = row.CustomerId
invoice_ids = row.invoice_ids.split(",") if row.invoice_ids else []
print(f"Relational DB Customer {customer_id}: {invoice_ids}")
# Use Cognee search to get invoice IDs for the same Customer but by providing Customer name
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=f"List me all the invoices of Customer:{row.FirstName} {row.LastName}.",
top_k=50,
system_prompt="Just return me the invoiceID as a number without any text. This is an example output: ['1', '2', '3']. Where 1, 2, 3 are invoiceIDs of an invoice",
)
print(f"Cognee search result: {search_results}")
import ast
lst = ast.literal_eval(search_results[0]) # converts string -> Python list
# Transfrom both lists to int for comparison, sorting and type consistency
lst = sorted([int(x) for x in lst])
invoice_ids = sorted([int(x) for x in invoice_ids])
assert lst == invoice_ids, (
f"Search results {lst} do not match expected invoice IDs {invoice_ids} for Customer:{customer_id}"
)
async def test_migration_sqlite():
database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
@ -286,6 +334,7 @@ async def test_migration_sqlite():
)
await relational_db_migration()
await test_search_result_quality()
await test_schema_only_migration()