* Fix FalkoDB tests * Add support for graph memory using Kuzu * Fix lints * Fix queries * Add tests * Add comments * Add more test coverage * Add mocked tests * Format * Add mocked tests II * Refactor community queries * Add more mocked tests * Refactor tests to always cleanup * Add more mocked tests * Update kuzu * Refactor how filters are built * Add more mocked tests * Refactor and cleanup * Fix tests * Fix lints * Refactor tests * Disable neptune * Fix * Update kuzu version * Update kuzu to latest release * Fix filter * Fix query * Fix Neptune query * Fix bulk queries * Fix lints * Fix deletes * Comments and format * Add Kuzu to the README * Fix bulk queries * Test all fields of nodes and edges * Fix lints * Update search_utils.py --------- Co-authored-by: Preston Rasmussen <109292228+prasmussen15@users.noreply.github.com>
133 lines
6.5 KiB
Python
133 lines
6.5 KiB
Python
"""
|
|
Database query utilities for different graph database backends.
|
|
|
|
This module provides database-agnostic query generation for Neo4j and FalkorDB,
|
|
supporting index creation, fulltext search, and bulk operations.
|
|
"""
|
|
|
|
from typing_extensions import LiteralString
|
|
|
|
from graphiti_core.driver.driver import GraphProvider
|
|
|
|
# Mapping from Neo4j fulltext index names to FalkorDB node labels
|
|
NEO4J_TO_FALKORDB_MAPPING = {
|
|
'node_name_and_summary': 'Entity',
|
|
'community_name': 'Community',
|
|
'episode_content': 'Episodic',
|
|
'edge_name_and_fact': 'RELATES_TO',
|
|
}
|
|
# Mapping from fulltext index names to Kuzu node labels
|
|
INDEX_TO_LABEL_KUZU_MAPPING = {
|
|
'node_name_and_summary': 'Entity',
|
|
'community_name': 'Community',
|
|
'episode_content': 'Episodic',
|
|
'edge_name_and_fact': 'RelatesToNode_',
|
|
}
|
|
|
|
|
|
def get_range_indices(provider: GraphProvider) -> list[LiteralString]:
|
|
if provider == GraphProvider.FALKORDB:
|
|
return [
|
|
# Entity node
|
|
'CREATE INDEX FOR (n:Entity) ON (n.uuid, n.group_id, n.name, n.created_at)',
|
|
# Episodic node
|
|
'CREATE INDEX FOR (n:Episodic) ON (n.uuid, n.group_id, n.created_at, n.valid_at)',
|
|
# Community node
|
|
'CREATE INDEX FOR (n:Community) ON (n.uuid)',
|
|
# RELATES_TO edge
|
|
'CREATE INDEX FOR ()-[e:RELATES_TO]-() ON (e.uuid, e.group_id, e.name, e.created_at, e.expired_at, e.valid_at, e.invalid_at)',
|
|
# MENTIONS edge
|
|
'CREATE INDEX FOR ()-[e:MENTIONS]-() ON (e.uuid, e.group_id)',
|
|
# HAS_MEMBER edge
|
|
'CREATE INDEX FOR ()-[e:HAS_MEMBER]-() ON (e.uuid)',
|
|
]
|
|
|
|
if provider == GraphProvider.KUZU:
|
|
return []
|
|
|
|
return [
|
|
'CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)',
|
|
'CREATE INDEX episode_uuid IF NOT EXISTS FOR (n:Episodic) ON (n.uuid)',
|
|
'CREATE INDEX community_uuid IF NOT EXISTS FOR (n:Community) ON (n.uuid)',
|
|
'CREATE INDEX relation_uuid IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.uuid)',
|
|
'CREATE INDEX mention_uuid IF NOT EXISTS FOR ()-[e:MENTIONS]-() ON (e.uuid)',
|
|
'CREATE INDEX has_member_uuid IF NOT EXISTS FOR ()-[e:HAS_MEMBER]-() ON (e.uuid)',
|
|
'CREATE INDEX entity_group_id IF NOT EXISTS FOR (n:Entity) ON (n.group_id)',
|
|
'CREATE INDEX episode_group_id IF NOT EXISTS FOR (n:Episodic) ON (n.group_id)',
|
|
'CREATE INDEX community_group_id IF NOT EXISTS FOR (n:Community) ON (n.group_id)',
|
|
'CREATE INDEX relation_group_id IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.group_id)',
|
|
'CREATE INDEX mention_group_id IF NOT EXISTS FOR ()-[e:MENTIONS]-() ON (e.group_id)',
|
|
'CREATE INDEX name_entity_index IF NOT EXISTS FOR (n:Entity) ON (n.name)',
|
|
'CREATE INDEX created_at_entity_index IF NOT EXISTS FOR (n:Entity) ON (n.created_at)',
|
|
'CREATE INDEX created_at_episodic_index IF NOT EXISTS FOR (n:Episodic) ON (n.created_at)',
|
|
'CREATE INDEX valid_at_episodic_index IF NOT EXISTS FOR (n:Episodic) ON (n.valid_at)',
|
|
'CREATE INDEX name_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.name)',
|
|
'CREATE INDEX created_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.created_at)',
|
|
'CREATE INDEX expired_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.expired_at)',
|
|
'CREATE INDEX valid_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.valid_at)',
|
|
'CREATE INDEX invalid_at_edge_index IF NOT EXISTS FOR ()-[e:RELATES_TO]-() ON (e.invalid_at)',
|
|
]
|
|
|
|
|
|
def get_fulltext_indices(provider: GraphProvider) -> list[LiteralString]:
|
|
if provider == GraphProvider.FALKORDB:
|
|
return [
|
|
"""CREATE FULLTEXT INDEX FOR (e:Episodic) ON (e.content, e.source, e.source_description, e.group_id)""",
|
|
"""CREATE FULLTEXT INDEX FOR (n:Entity) ON (n.name, n.summary, n.group_id)""",
|
|
"""CREATE FULLTEXT INDEX FOR (n:Community) ON (n.name, n.group_id)""",
|
|
"""CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""",
|
|
]
|
|
|
|
if provider == GraphProvider.KUZU:
|
|
return [
|
|
"CALL CREATE_FTS_INDEX('Episodic', 'episode_content', ['content', 'source', 'source_description']);",
|
|
"CALL CREATE_FTS_INDEX('Entity', 'node_name_and_summary', ['name', 'summary']);",
|
|
"CALL CREATE_FTS_INDEX('Community', 'community_name', ['name']);",
|
|
"CALL CREATE_FTS_INDEX('RelatesToNode_', 'edge_name_and_fact', ['name', 'fact']);",
|
|
]
|
|
|
|
return [
|
|
"""CREATE FULLTEXT INDEX episode_content IF NOT EXISTS
|
|
FOR (e:Episodic) ON EACH [e.content, e.source, e.source_description, e.group_id]""",
|
|
"""CREATE FULLTEXT INDEX node_name_and_summary IF NOT EXISTS
|
|
FOR (n:Entity) ON EACH [n.name, n.summary, n.group_id]""",
|
|
"""CREATE FULLTEXT INDEX community_name IF NOT EXISTS
|
|
FOR (n:Community) ON EACH [n.name, n.group_id]""",
|
|
"""CREATE FULLTEXT INDEX edge_name_and_fact IF NOT EXISTS
|
|
FOR ()-[e:RELATES_TO]-() ON EACH [e.name, e.fact, e.group_id]""",
|
|
]
|
|
|
|
|
|
def get_nodes_query(name: str, query: str, limit: int, provider: GraphProvider) -> str:
|
|
if provider == GraphProvider.FALKORDB:
|
|
label = NEO4J_TO_FALKORDB_MAPPING[name]
|
|
return f"CALL db.idx.fulltext.queryNodes('{label}', {query})"
|
|
|
|
if provider == GraphProvider.KUZU:
|
|
label = INDEX_TO_LABEL_KUZU_MAPPING[name]
|
|
return f"CALL QUERY_FTS_INDEX('{label}', '{name}', {query}, TOP := $limit)"
|
|
|
|
return f'CALL db.index.fulltext.queryNodes("{name}", {query}, {{limit: $limit}})'
|
|
|
|
|
|
def get_vector_cosine_func_query(vec1, vec2, provider: GraphProvider) -> str:
|
|
if provider == GraphProvider.FALKORDB:
|
|
# FalkorDB uses a different syntax for regular cosine similarity and Neo4j uses normalized cosine similarity
|
|
return f'(2 - vec.cosineDistance({vec1}, vecf32({vec2})))/2'
|
|
|
|
if provider == GraphProvider.KUZU:
|
|
return f'array_cosine_similarity({vec1}, {vec2})'
|
|
|
|
return f'vector.similarity.cosine({vec1}, {vec2})'
|
|
|
|
|
|
def get_relationships_query(name: str, limit: int, provider: GraphProvider) -> str:
|
|
if provider == GraphProvider.FALKORDB:
|
|
label = NEO4J_TO_FALKORDB_MAPPING[name]
|
|
return f"CALL db.idx.fulltext.queryRelationships('{label}', $query)"
|
|
|
|
if provider == GraphProvider.KUZU:
|
|
label = INDEX_TO_LABEL_KUZU_MAPPING[name]
|
|
return f"CALL QUERY_FTS_INDEX('{label}', '{name}', cast($query AS STRING), TOP := $limit)"
|
|
|
|
return f'CALL db.index.fulltext.queryRelationships("{name}", $query, {{limit: $limit}})'
|