refactor: Make relational database search more effective
This commit is contained in:
parent
726d4d8535
commit
39fa0180f3
2 changed files with 36 additions and 16 deletions
|
|
@ -38,7 +38,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
||||||
table_node = TableType(
|
table_node = TableType(
|
||||||
id=uuid5(NAMESPACE_OID, name=table_name),
|
id=uuid5(NAMESPACE_OID, name=table_name),
|
||||||
name=table_name,
|
name=table_name,
|
||||||
description=f"Table: {table_name}",
|
description=f'Relational database table with the following name: "{table_name}".',
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add TableType node to mapping ( node will be added to the graph later based on this mapping )
|
# Add TableType node to mapping ( node will be added to the graph later based on this mapping )
|
||||||
|
|
@ -75,7 +75,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
||||||
name=node_id,
|
name=node_id,
|
||||||
is_a=table_node,
|
is_a=table_node,
|
||||||
properties=str(row_properties),
|
properties=str(row_properties),
|
||||||
description=f"Row in {table_name} with {primary_key_col}={primary_key_value}",
|
description=f'Row in relational database table from the table with the name: "{table_name}" with the following row data {str(row_properties)} where the dictionary key value is the column name and the value is the column value. This row has the id of: {node_id}',
|
||||||
)
|
)
|
||||||
|
|
||||||
# Store the node object in our mapping
|
# Store the node object in our mapping
|
||||||
|
|
@ -113,7 +113,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
||||||
id=uuid5(NAMESPACE_OID, name=column_node_id),
|
id=uuid5(NAMESPACE_OID, name=column_node_id),
|
||||||
name=column_node_id,
|
name=column_node_id,
|
||||||
properties=f"{key} {value} {table_name}",
|
properties=f"{key} {value} {table_name}",
|
||||||
description=f"Column name={key} and value={value} from column from table={table_name}",
|
description=f"column from relational database table={table_name}. Column name={key} and value={value}. The value of the column is related to the following row with this id: {row_node.id}. This column has the following ID: {column_node_id}",
|
||||||
)
|
)
|
||||||
node_mapping[column_node_id] = column_node
|
node_mapping[column_node_id] = column_node
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,15 @@
|
||||||
|
from pathlib import Path
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import cognee
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import cognee
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_migration_config
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||||
from cognee.infrastructure.databases.relational import (
|
from cognee.infrastructure.databases.relational import (
|
||||||
get_migration_relational_engine,
|
get_migration_relational_engine,
|
||||||
)
|
)
|
||||||
|
|
||||||
from cognee.modules.search.types import SearchType
|
from cognee.modules.search.types import SearchType
|
||||||
|
|
||||||
from cognee.infrastructure.databases.relational import (
|
from cognee.infrastructure.databases.relational import (
|
||||||
create_db_and_tables as create_relational_db_and_tables,
|
create_db_and_tables as create_relational_db_and_tables,
|
||||||
)
|
)
|
||||||
|
|
@ -32,16 +31,29 @@ from cognee.infrastructure.databases.vector.pgvector import (
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
engine = get_migration_relational_engine()
|
|
||||||
|
|
||||||
# Clean all data stored in Cognee
|
# Clean all data stored in Cognee
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
# Needed to create appropriate tables only on the Cognee side
|
# Needed to create appropriate database tables only on the Cognee side
|
||||||
await create_relational_db_and_tables()
|
await create_relational_db_and_tables()
|
||||||
await create_vector_db_and_tables()
|
await create_vector_db_and_tables()
|
||||||
|
|
||||||
|
# In case environment variables are not set use the example database from the Cognee repo
|
||||||
|
migration_db_provider = os.environ.get("MIGRATION_DB_PROVIDER", "sqlite")
|
||||||
|
migration_db_path = os.environ.get(
|
||||||
|
"MIGRATION_DB_PATH",
|
||||||
|
os.path.join(Path(__file__).resolve().parent.parent.parent, "cognee/tests/test_data"),
|
||||||
|
)
|
||||||
|
migration_db_name = os.environ.get("MIGRATION_DB_NAME", "migration_database.sqlite")
|
||||||
|
|
||||||
|
migration_config = get_migration_config()
|
||||||
|
migration_config.migration_db_provider = migration_db_provider
|
||||||
|
migration_config.migration_db_path = migration_db_path
|
||||||
|
migration_config.migration_db_name = migration_db_name
|
||||||
|
|
||||||
|
engine = get_migration_relational_engine()
|
||||||
|
|
||||||
print("\nExtracting schema of database to migrate.")
|
print("\nExtracting schema of database to migrate.")
|
||||||
schema = await engine.extract_schema()
|
schema = await engine.extract_schema()
|
||||||
print(f"Migrated database schema:\n{schema}")
|
print(f"Migrated database schema:\n{schema}")
|
||||||
|
|
@ -53,10 +65,6 @@ async def main():
|
||||||
await migrate_relational_database(graph, schema=schema)
|
await migrate_relational_database(graph, schema=schema)
|
||||||
print("Relational database migration complete.")
|
print("Relational database migration complete.")
|
||||||
|
|
||||||
# Define location where to store html visualization of graph of the migrated database
|
|
||||||
home_dir = os.path.expanduser("~")
|
|
||||||
destination_file_path = os.path.join(home_dir, "graph_visualization.html")
|
|
||||||
|
|
||||||
# Make sure to set top_k at a high value for a broader search, the default value is only 10!
|
# Make sure to set top_k at a high value for a broader search, the default value is only 10!
|
||||||
# top_k represent the number of graph tripplets to supply to the LLM to answer your question
|
# top_k represent the number of graph tripplets to supply to the LLM to answer your question
|
||||||
search_results = await cognee.search(
|
search_results = await cognee.search(
|
||||||
|
|
@ -69,13 +77,25 @@ async def main():
|
||||||
# Having a top_k value set to too high might overwhelm the LLM context when specific questions need to be answered.
|
# Having a top_k value set to too high might overwhelm the LLM context when specific questions need to be answered.
|
||||||
# For this kind of question we've set the top_k to 30
|
# For this kind of question we've set the top_k to 30
|
||||||
search_results = await cognee.search(
|
search_results = await cognee.search(
|
||||||
query_type=SearchType.GRAPH_COMPLETION_COT,
|
query_type=SearchType.GRAPH_COMPLETION,
|
||||||
query_text="What invoices are related to Leonie Köhler?",
|
query_text="What invoices are related to Leonie Köhler?",
|
||||||
top_k=30,
|
top_k=30,
|
||||||
)
|
)
|
||||||
print(f"Search results: {search_results}")
|
print(f"Search results: {search_results}")
|
||||||
|
|
||||||
# test.html is a file with visualized data migration
|
search_results = await cognee.search(
|
||||||
|
query_type=SearchType.GRAPH_COMPLETION,
|
||||||
|
query_text="What invoices are related to Luís Gonçalves?",
|
||||||
|
top_k=30,
|
||||||
|
)
|
||||||
|
print(f"Search results: {search_results}")
|
||||||
|
|
||||||
|
# If you check the relational database for this example you can see that the search results successfully found all
|
||||||
|
# the invoices related to the two customers, without any hallucinations or additional information
|
||||||
|
|
||||||
|
# Define location where to store html visualization of graph of the migrated database
|
||||||
|
home_dir = os.path.expanduser("~")
|
||||||
|
destination_file_path = os.path.join(home_dir, "graph_visualization.html")
|
||||||
print("Adding html visualization of graph database after migration.")
|
print("Adding html visualization of graph database after migration.")
|
||||||
await visualize_graph(destination_file_path)
|
await visualize_graph(destination_file_path)
|
||||||
print(f"Visualization can be found at: {destination_file_path}")
|
print(f"Visualization can be found at: {destination_file_path}")
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue