add indexes

This commit is contained in:
prestonrasmussen 2025-09-05 13:51:10 -04:00
parent 8ba998b9bb
commit 67812fe3d1
5 changed files with 195 additions and 56 deletions

View file

@ -46,12 +46,25 @@ aoss_indices = [
'name': {'type': 'text'},
'summary': {'type': 'text'},
'group_id': {'type': 'text'},
'created_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
'name_embedding': {
'type': 'dense_vector',
'dims': 1024,
'index': True,
'similarity': 'cosine',
},
}
}
},
'query': {
'query': {'multi_match': {'query': '', 'fields': ['name', 'summary', 'group_id']}},
'size': DEFAULT_SIZE,
'knn': {
'field': 'name_embedding',
'query_vector': [],
'k': DEFAULT_SIZE,
'num_candidates': 100,
},
},
},
{
@ -80,6 +93,8 @@ aoss_indices = [
'source': {'type': 'text'},
'source_description': {'type': 'text'},
'group_id': {'type': 'text'},
'created_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
'valid_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
}
}
},
@ -102,12 +117,28 @@ aoss_indices = [
'name': {'type': 'text'},
'fact': {'type': 'text'},
'group_id': {'type': 'text'},
'created_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
'valid_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
'expired_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
'invalid_at': {'type': 'date', 'format': "yyyy-MM-dd'T'HH:mm:ss.SSSZ"},
'fact_embedding': {
'type': 'dense_vector',
'dims': 1024,
'index': True,
'similarity': 'cosine',
},
}
}
},
'query': {
'query': {'multi_match': {'query': '', 'fields': ['name', 'fact', 'group_id']}},
'size': DEFAULT_SIZE,
'knn': {
'field': 'fact_embedding',
'query_vector': [], # supply vector at runtime
'k': DEFAULT_SIZE,
'num_candidates': 100,
},
},
},
]

View file

@ -298,7 +298,7 @@ class EntityEdge(Edge):
else:
edge_data.update(self.attributes or {})
if driver.provider == GraphProvider.NEPTUNE:
if driver.aoss_client:
driver.save_to_aoss('edge_name_and_fact', [edge_data]) # pyright: ignore reportAttributeAccessIssue
result = await driver.execute_query(

View file

@ -273,7 +273,7 @@ class EpisodicNode(Node):
)
async def save(self, driver: GraphDriver):
if driver.provider == GraphProvider.NEPTUNE:
if driver.aoss_client:
driver.save_to_aoss( # pyright: ignore reportAttributeAccessIssue
'episode_content',
[
@ -470,7 +470,7 @@ class EntityNode(Node):
entity_data.update(self.attributes or {})
labels = ':'.join(self.labels + ['Entity'])
if driver.provider == GraphProvider.NEPTUNE:
if driver.aoss_client:
driver.save_to_aoss('node_name_and_summary', [entity_data]) # pyright: ignore reportAttributeAccessIssue
result = await driver.execute_query(

View file

@ -208,11 +208,11 @@ async def edge_fulltext_search(
# Match the edge ids and return the values
query = (
"""
UNWIND $ids as id
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
WHERE e.group_id IN $group_ids
AND id(e)=id
"""
UNWIND $ids as id
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
WHERE e.group_id IN $group_ids
AND id(e)=id
"""
+ filter_query
+ """
AND id(e)=id
@ -244,6 +244,41 @@ async def edge_fulltext_search(
)
else:
return []
elif driver.aoss_client:
res = driver.run_aoss_query('edge_name_and_fact', query) # pyright: ignore reportAttributeAccessIssue
if res['hits']['total']['value'] > 0:
# Calculate Cosine similarity then return the edge ids
input_uuids = []
for r in res['hits']['hits']:
input_uuids.append({'uuid': r['_source']['uuid'], 'score': r['_score']})
# Match the edge ids and return the values
query = (
"""
UNWIND $uuids as uuid
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
WHERE e.group_id IN $group_ids
AND e.uuid=uuid
"""
+ filter_query
+ """
AND e.uuid=uuid
WITH e, id.score as score, startNode(e) AS n, endNode(e) AS m"""
+ get_entity_edge_return_query(driver.provider)
+ """ORDER BY score DESC LIMIT $limit
"""
)
records, _, _ = await driver.execute_query(
query,
query=fuzzy_query,
uuids=input_uuids,
limit=limit,
routing_='r',
**filter_params,
)
else:
return []
else:
query = (
get_relationships_query('edge_name_and_fact', limit=limit, provider=driver.provider)
@ -318,8 +353,8 @@ async def edge_similarity_search(
if driver.provider == GraphProvider.NEPTUNE:
query = (
"""
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
"""
MATCH (n:Entity)-[e:RELATES_TO]->(m:Entity)
"""
+ filter_query
+ """
RETURN DISTINCT id(e) as id, e.fact_embedding as embedding
@ -571,11 +606,11 @@ async def node_fulltext_search(
# Match the edge ides and return the values
query = (
"""
UNWIND $ids as i
MATCH (n:Entity)
WHERE n.uuid=i.id
RETURN
"""
UNWIND $ids as i
MATCH (n:Entity)
WHERE n.uuid=i.id
RETURN
"""
+ get_entity_node_return_query(driver.provider)
+ """
ORDER BY i.score DESC
@ -592,6 +627,38 @@ async def node_fulltext_search(
)
else:
return []
elif driver.aoss_client:
res = driver.run_aoss_query('node_name_and_summary', query, limit=limit) # pyright: ignore reportAttributeAccessIssue
if res['hits']['total']['value'] > 0:
# Calculate Cosine similarity then return the edge ids
input_uuids = []
for r in res['hits']['hits']:
input_uuids.append({'uuid': r['_source']['uuid'], 'score': r['_score']})
# Match the edge ides and return the values
query = (
"""
UNWIND $uuids as i
MATCH (n:Entity)
WHERE n.uuid=i.uuid
RETURN
"""
+ get_entity_node_return_query(driver.provider)
+ """
ORDER BY i.score DESC
LIMIT $limit
"""
)
records, _, _ = await driver.execute_query(
query,
uuids=input_uuids,
query=fuzzy_query,
limit=limit,
routing_='r',
**filter_params,
)
else:
return []
else:
query = (
get_nodes_query(
@ -648,8 +715,8 @@ async def node_similarity_search(
if driver.provider == GraphProvider.NEPTUNE:
query = (
"""
MATCH (n:Entity)
"""
MATCH (n:Entity)
"""
+ filter_query
+ """
RETURN DISTINCT id(n) as id, n.name_embedding as embedding
@ -678,11 +745,11 @@ async def node_similarity_search(
# Match the edge ides and return the values
query = (
"""
UNWIND $ids as i
MATCH (n:Entity)
WHERE id(n)=i.id
RETURN
"""
UNWIND $ids as i
MATCH (n:Entity)
WHERE id(n)=i.id
RETURN
"""
+ get_entity_node_return_query(driver.provider)
+ """
ORDER BY i.score DESC
@ -703,8 +770,8 @@ async def node_similarity_search(
else:
query = (
"""
MATCH (n:Entity)
"""
MATCH (n:Entity)
"""
+ filter_query
+ """
WITH n, """
@ -852,7 +919,7 @@ async def episode_fulltext_search(
query = """
UNWIND $ids as i
MATCH (e:Episodic)
WHERE e.uuid=i.id
WHERE e.uuid=i.uuid
RETURN
e.content AS content,
e.created_at AS created_at,
@ -876,6 +943,42 @@ async def episode_fulltext_search(
)
else:
return []
elif driver.aoss_client:
res = driver.run_aoss_query('episode_content', query, limit=limit) # pyright: ignore reportAttributeAccessIssue
if res['hits']['total']['value'] > 0:
# Calculate Cosine similarity then return the edge ids
input_uuids = []
for r in res['hits']['hits']:
input_uuids.append({'uuid': r['_source']['uuid'], 'score': r['_score']})
# Match the edge ides and return the values
query = """
UNWIND $uuids as i
MATCH (e:Episodic)
WHERE e.uuid=i.uuid
RETURN
e.content AS content,
e.created_at AS created_at,
e.valid_at AS valid_at,
e.uuid AS uuid,
e.name AS name,
e.group_id AS group_id,
e.source_description AS source_description,
e.source AS source,
e.entity_edges AS entity_edges
ORDER BY i.score DESC
LIMIT $limit
"""
records, _, _ = await driver.execute_query(
query,
uuids=input_uuids,
query=fuzzy_query,
limit=limit,
routing_='r',
**filter_params,
)
else:
return []
else:
query = (
get_nodes_query('episode_content', '$query', limit=limit, provider=driver.provider)
@ -1003,8 +1106,8 @@ async def community_similarity_search(
if driver.provider == GraphProvider.NEPTUNE:
query = (
"""
MATCH (n:Community)
"""
MATCH (n:Community)
"""
+ group_filter_query
+ """
RETURN DISTINCT id(n) as id, n.name_embedding as embedding
@ -1063,8 +1166,8 @@ async def community_similarity_search(
query = (
"""
MATCH (c:Community)
"""
MATCH (c:Community)
"""
+ group_filter_query
+ """
WITH c,
@ -1206,9 +1309,9 @@ async def get_relevant_nodes(
# FIXME: Kuzu currently does not support using variables such as `node.fulltext_query` as an input to FTS, which means `get_relevant_nodes()` won't work with Kuzu as the graph driver.
query = (
"""
UNWIND $nodes AS node
MATCH (n:Entity {group_id: $group_id})
"""
UNWIND $nodes AS node
MATCH (n:Entity {group_id: $group_id})
"""
+ filter_query
+ """
WITH node, n, """
@ -1253,9 +1356,9 @@ async def get_relevant_nodes(
else:
query = (
"""
UNWIND $nodes AS node
MATCH (n:Entity {group_id: $group_id})
"""
UNWIND $nodes AS node
MATCH (n:Entity {group_id: $group_id})
"""
+ filter_query
+ """
WITH node, n, """
@ -1344,9 +1447,9 @@ async def get_relevant_edges(
if driver.provider == GraphProvider.NEPTUNE:
query = (
"""
UNWIND $edges AS edge
MATCH (n:Entity {uuid: edge.source_node_uuid})-[e:RELATES_TO {group_id: edge.group_id}]-(m:Entity {uuid: edge.target_node_uuid})
"""
UNWIND $edges AS edge
MATCH (n:Entity {uuid: edge.source_node_uuid})-[e:RELATES_TO {group_id: edge.group_id}]-(m:Entity {uuid: edge.target_node_uuid})
"""
+ filter_query
+ """
WITH e, edge
@ -1416,9 +1519,9 @@ async def get_relevant_edges(
query = (
"""
UNWIND $edges AS edge
MATCH (n:Entity {uuid: edge.source_node_uuid})-[:RELATES_TO]-(e:RelatesToNode_ {group_id: edge.group_id})-[:RELATES_TO]-(m:Entity {uuid: edge.target_node_uuid})
"""
UNWIND $edges AS edge
MATCH (n:Entity {uuid: edge.source_node_uuid})-[:RELATES_TO]-(e:RelatesToNode_ {group_id: edge.group_id})-[:RELATES_TO]-(m:Entity {uuid: edge.target_node_uuid})
"""
+ filter_query
+ """
WITH e, edge, n, m, """
@ -1454,9 +1557,9 @@ async def get_relevant_edges(
else:
query = (
"""
UNWIND $edges AS edge
MATCH (n:Entity {uuid: edge.source_node_uuid})-[e:RELATES_TO {group_id: edge.group_id}]-(m:Entity {uuid: edge.target_node_uuid})
"""
UNWIND $edges AS edge
MATCH (n:Entity {uuid: edge.source_node_uuid})-[e:RELATES_TO {group_id: edge.group_id}]-(m:Entity {uuid: edge.target_node_uuid})
"""
+ filter_query
+ """
WITH e, edge, """
@ -1529,10 +1632,10 @@ async def get_edge_invalidation_candidates(
if driver.provider == GraphProvider.NEPTUNE:
query = (
"""
UNWIND $edges AS edge
MATCH (n:Entity)-[e:RELATES_TO {group_id: edge.group_id}]->(m:Entity)
WHERE n.uuid IN [edge.source_node_uuid, edge.target_node_uuid] OR m.uuid IN [edge.target_node_uuid, edge.source_node_uuid]
"""
UNWIND $edges AS edge
MATCH (n:Entity)-[e:RELATES_TO {group_id: edge.group_id}]->(m:Entity)
WHERE n.uuid IN [edge.source_node_uuid, edge.target_node_uuid] OR m.uuid IN [edge.target_node_uuid, edge.source_node_uuid]
"""
+ filter_query
+ """
WITH e, edge
@ -1602,10 +1705,10 @@ async def get_edge_invalidation_candidates(
query = (
"""
UNWIND $edges AS edge
MATCH (n:Entity)-[:RELATES_TO]->(e:RelatesToNode_ {group_id: edge.group_id})-[:RELATES_TO]->(m:Entity)
WHERE (n.uuid IN [edge.source_node_uuid, edge.target_node_uuid] OR m.uuid IN [edge.target_node_uuid, edge.source_node_uuid])
"""
UNWIND $edges AS edge
MATCH (n:Entity)-[:RELATES_TO]->(e:RelatesToNode_ {group_id: edge.group_id})-[:RELATES_TO]->(m:Entity)
WHERE (n.uuid IN [edge.source_node_uuid, edge.target_node_uuid] OR m.uuid IN [edge.target_node_uuid, edge.source_node_uuid])
"""
+ filter_query
+ """
WITH edge, e, n, m, """
@ -1641,10 +1744,10 @@ async def get_edge_invalidation_candidates(
else:
query = (
"""
UNWIND $edges AS edge
MATCH (n:Entity)-[e:RELATES_TO {group_id: edge.group_id}]->(m:Entity)
WHERE n.uuid IN [edge.source_node_uuid, edge.target_node_uuid] OR m.uuid IN [edge.target_node_uuid, edge.source_node_uuid]
"""
UNWIND $edges AS edge
MATCH (n:Entity)-[e:RELATES_TO {group_id: edge.group_id}]->(m:Entity)
WHERE n.uuid IN [edge.source_node_uuid, edge.target_node_uuid] OR m.uuid IN [edge.target_node_uuid, edge.source_node_uuid]
"""
+ filter_query
+ """
WITH edge, e, """

View file

@ -194,6 +194,11 @@ async def add_nodes_and_edges_bulk_tx(
)
await tx.run(get_entity_edge_save_bulk_query(driver.provider), entity_edges=edges)
if driver.aoss_client:
driver.save_to_aoss('episode_content', episodes)
driver.save_to_aoss('node_name_and_summary', nodes)
driver.save_to_aoss('edge_name_and_summary', edges)
async def extract_nodes_and_edges_bulk(
clients: GraphitiClients,