From 39fa0180f32edfe6815f4db7e5c40400383fbebc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 26 Sep 2025 22:42:39 +0200 Subject: [PATCH] refactor: Make relational database search more effective --- .../ingestion/migrate_relational_database.py | 6 +-- .../relational_database_migration_example.py | 46 +++++++++++++------ 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/cognee/tasks/ingestion/migrate_relational_database.py b/cognee/tasks/ingestion/migrate_relational_database.py index 936ea59e0..82319e9f5 100644 --- a/cognee/tasks/ingestion/migrate_relational_database.py +++ b/cognee/tasks/ingestion/migrate_relational_database.py @@ -38,7 +38,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True table_node = TableType( id=uuid5(NAMESPACE_OID, name=table_name), name=table_name, - description=f"Table: {table_name}", + description=f'Relational database table with the following name: "{table_name}".', ) # Add TableType node to mapping ( node will be added to the graph later based on this mapping ) @@ -75,7 +75,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True name=node_id, is_a=table_node, properties=str(row_properties), - description=f"Row in {table_name} with {primary_key_col}={primary_key_value}", + description=f'Row in relational database table from the table with the name: "{table_name}" with the following row data {str(row_properties)} where the dictionary key value is the column name and the value is the column value. This row has the id of: {node_id}', ) # Store the node object in our mapping @@ -113,7 +113,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True id=uuid5(NAMESPACE_OID, name=column_node_id), name=column_node_id, properties=f"{key} {value} {table_name}", - description=f"Column name={key} and value={value} from column from table={table_name}", + description=f"column from relational database table={table_name}. Column name={key} and value={value}. The value of the column is related to the following row with this id: {row_node.id}. This column has the following ID: {column_node_id}", ) node_mapping[column_node_id] = column_node diff --git a/examples/python/relational_database_migration_example.py b/examples/python/relational_database_migration_example.py index fae8cfb3d..6a5c3b78b 100644 --- a/examples/python/relational_database_migration_example.py +++ b/examples/python/relational_database_migration_example.py @@ -1,16 +1,15 @@ +from pathlib import Path import asyncio - -import cognee import os +import cognee +from cognee.infrastructure.databases.relational.config import get_migration_config from cognee.infrastructure.databases.graph import get_graph_engine from cognee.api.v1.visualize.visualize import visualize_graph from cognee.infrastructure.databases.relational import ( get_migration_relational_engine, ) - from cognee.modules.search.types import SearchType - from cognee.infrastructure.databases.relational import ( create_db_and_tables as create_relational_db_and_tables, ) @@ -32,16 +31,29 @@ from cognee.infrastructure.databases.vector.pgvector import ( async def main(): - engine = get_migration_relational_engine() - # Clean all data stored in Cognee await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - # Needed to create appropriate tables only on the Cognee side + # Needed to create appropriate database tables only on the Cognee side await create_relational_db_and_tables() await create_vector_db_and_tables() + # In case environment variables are not set use the example database from the Cognee repo + migration_db_provider = os.environ.get("MIGRATION_DB_PROVIDER", "sqlite") + migration_db_path = os.environ.get( + "MIGRATION_DB_PATH", + os.path.join(Path(__file__).resolve().parent.parent.parent, "cognee/tests/test_data"), + ) + migration_db_name = os.environ.get("MIGRATION_DB_NAME", "migration_database.sqlite") + + migration_config = get_migration_config() + migration_config.migration_db_provider = migration_db_provider + migration_config.migration_db_path = migration_db_path + migration_config.migration_db_name = migration_db_name + + engine = get_migration_relational_engine() + print("\nExtracting schema of database to migrate.") schema = await engine.extract_schema() print(f"Migrated database schema:\n{schema}") @@ -53,10 +65,6 @@ async def main(): await migrate_relational_database(graph, schema=schema) print("Relational database migration complete.") - # Define location where to store html visualization of graph of the migrated database - home_dir = os.path.expanduser("~") - destination_file_path = os.path.join(home_dir, "graph_visualization.html") - # Make sure to set top_k at a high value for a broader search, the default value is only 10! # top_k represent the number of graph tripplets to supply to the LLM to answer your question search_results = await cognee.search( @@ -69,13 +77,25 @@ async def main(): # Having a top_k value set to too high might overwhelm the LLM context when specific questions need to be answered. # For this kind of question we've set the top_k to 30 search_results = await cognee.search( - query_type=SearchType.GRAPH_COMPLETION_COT, + query_type=SearchType.GRAPH_COMPLETION, query_text="What invoices are related to Leonie Köhler?", top_k=30, ) print(f"Search results: {search_results}") - # test.html is a file with visualized data migration + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What invoices are related to Luís Gonçalves?", + top_k=30, + ) + print(f"Search results: {search_results}") + + # If you check the relational database for this example you can see that the search results successfully found all + # the invoices related to the two customers, without any hallucinations or additional information + + # Define location where to store html visualization of graph of the migrated database + home_dir = os.path.expanduser("~") + destination_file_path = os.path.join(home_dir, "graph_visualization.html") print("Adding html visualization of graph database after migration.") await visualize_graph(destination_file_path) print(f"Visualization can be found at: {destination_file_path}")