From 19a6ebc6feb8fc55b71ce87f6ab29c70f3db7594 Mon Sep 17 00:00:00 2001 From: Pavlo Paliychuk Date: Mon, 16 Sep 2024 16:23:07 -0400 Subject: [PATCH] Fix groupless search (#118) * fix(search): :bug: Search across null group_ids * chore: Version bump * chore: Set group_ids to none if it's an empty list * fix: Check for group ids being a list before setting it to None if empty * fix check * chore: Simplify group_ids check * chore: Simplify the check further --- graphiti_core/search/search.py | 3 +- graphiti_core/search/search_utils.py | 50 ++++++++++++++++++++-------- pyproject.toml | 2 +- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/graphiti_core/search/search.py b/graphiti_core/search/search.py index 210586e5..c1a8979f 100644 --- a/graphiti_core/search/search.py +++ b/graphiti_core/search/search.py @@ -61,7 +61,8 @@ async def search( ) -> SearchResults: start = time() query = query.replace('\n', ' ') - + # if group_ids is empty, set it to None + group_ids = group_ids if group_ids else None edges = ( await edge_search( driver, embedder, query, group_ids, config.edge_config, center_node_uuid, config.limit diff --git a/graphiti_core/search/search_utils.py b/graphiti_core/search/search_utils.py index 702bd6e5..ef17236c 100644 --- a/graphiti_core/search/search_utils.py +++ b/graphiti_core/search/search_utils.py @@ -65,14 +65,15 @@ async def edge_fulltext_search( group_ids: list[str | None] | None = None, limit=RELEVANT_SCHEMA_LIMIT, ) -> list[EntityEdge]: - group_ids = group_ids if group_ids is not None else [None] - # fulltext search over facts cypher_query = Query(""" CALL db.index.fulltext.queryRelationships("name_and_fact", $query) YIELD relationship AS rel, score MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid}) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN n.group_id IS NULL + ELSE n.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -94,7 +95,10 @@ async def edge_fulltext_search( CALL db.index.fulltext.queryRelationships("name_and_fact", $query) YIELD relationship AS rel, score MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -115,7 +119,10 @@ async def edge_fulltext_search( CALL db.index.fulltext.queryRelationships("name_and_fact", $query) YIELD relationship AS rel, score MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid}) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -136,7 +143,10 @@ async def edge_fulltext_search( CALL db.index.fulltext.queryRelationships("name_and_fact", $query) YIELD relationship AS rel, score MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -177,13 +187,15 @@ async def edge_similarity_search( group_ids: list[str | None] | None = None, limit: int = RELEVANT_SCHEMA_LIMIT, ) -> list[EntityEdge]: - group_ids = group_ids if group_ids is not None else [None] # vector similarity search over embedded facts query = Query(""" CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector) YIELD relationship AS rel, score MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid}) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -205,7 +217,10 @@ async def edge_similarity_search( CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector) YIELD relationship AS rel, score MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -226,7 +241,10 @@ async def edge_similarity_search( CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector) YIELD relationship AS rel, score MATCH (n:Entity)-[r {uuid: rel.uuid}]-(m:Entity {uuid: $target_uuid}) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -247,7 +265,10 @@ async def edge_similarity_search( CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector) YIELD relationship AS rel, score MATCH (n:Entity {uuid: $source_uuid})-[r {uuid: rel.uuid}]-(m:Entity) - WHERE r.group_id IN $group_ids + WHERE CASE + WHEN $group_ids IS NULL THEN r.group_id IS NULL + ELSE r.group_id IN $group_ids + END RETURN r.uuid AS uuid, r.group_id AS group_id, @@ -284,15 +305,16 @@ async def node_fulltext_search( group_ids: list[str | None] | None = None, limit=RELEVANT_SCHEMA_LIMIT, ) -> list[EntityNode]: - group_ids = group_ids if group_ids is not None else [None] - # BM25 search to get top nodes fuzzy_query = re.sub(r'[^\w\s]', '', query) + '~' records, _, _ = await driver.execute_query( """ CALL db.index.fulltext.queryNodes("name_and_summary", $query) YIELD node AS n, score - MATCH (n WHERE n.group_id in $group_ids) + WHERE CASE + WHEN $group_ids IS NULL THEN n.group_id IS NULL + ELSE n.group_id IN $group_ids + END RETURN n.uuid AS uuid, n.group_id AS group_id, diff --git a/pyproject.toml b/pyproject.toml index 0986a44b..64197bc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "graphiti-core" -version = "0.3.0" +version = "0.3.1" description = "A temporal graph building library" authors = [ "Paul Paliychuk ",