From 36056ad1413ee20cffdb940fb8afe8cf81d46cca Mon Sep 17 00:00:00 2001 From: Preston Rasmussen <109292228+prasmussen15@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:53:39 -0400 Subject: [PATCH] Graph quality updates (#922) duplicate_of updates --- graphiti_core/driver/driver.py | 6 ++-- graphiti_core/driver/falkordb_driver.py | 46 +++++++++++++++++++++---- graphiti_core/graph_queries.py | 21 ++++++----- pyproject.toml | 2 +- uv.lock | 2 +- 5 files changed, 57 insertions(+), 20 deletions(-) diff --git a/graphiti_core/driver/driver.py b/graphiti_core/driver/driver.py index df2195a3..52c0710c 100644 --- a/graphiti_core/driver/driver.py +++ b/graphiti_core/driver/driver.py @@ -312,9 +312,11 @@ class GraphDriver(ABC): return 0 - def build_fulltext_query(self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128) -> str: + def build_fulltext_query( + self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128 + ) -> str: """ Specific fulltext query builder for database providers. Only implemented by providers that need custom fulltext query building. """ - raise NotImplementedError(f"build_fulltext_query not implemented for {self.provider}") + raise NotImplementedError(f'build_fulltext_query not implemented for {self.provider}') diff --git a/graphiti_core/driver/falkordb_driver.py b/graphiti_core/driver/falkordb_driver.py index 2e855db5..09353158 100644 --- a/graphiti_core/driver/falkordb_driver.py +++ b/graphiti_core/driver/falkordb_driver.py @@ -37,11 +37,42 @@ from graphiti_core.utils.datetime_utils import convert_datetimes_to_strings logger = logging.getLogger(__name__) STOPWORDS = [ - 'a', 'is', 'the', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', - 'if', 'in', 'into', 'it', 'no', 'not', 'of', 'on', 'or', 'such', 'that', 'their', - 'then', 'there', 'these', 'they', 'this', 'to', 'was', 'will', 'with' + 'a', + 'is', + 'the', + 'an', + 'and', + 'are', + 'as', + 'at', + 'be', + 'but', + 'by', + 'for', + 'if', + 'in', + 'into', + 'it', + 'no', + 'not', + 'of', + 'on', + 'or', + 'such', + 'that', + 'their', + 'then', + 'there', + 'these', + 'they', + 'this', + 'to', + 'was', + 'will', + 'with', ] + class FalkorDriverSession(GraphDriverSession): provider = GraphProvider.FALKORDB @@ -173,7 +204,6 @@ class FalkorDriver(GraphDriver): return cloned - def sanitize(self, query: str) -> str: """ Replace FalkorDB special characters with whitespace. @@ -216,7 +246,9 @@ class FalkorDriver(GraphDriver): sanitized = ' '.join(sanitized.split()) return sanitized - def build_fulltext_query(self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128) -> str: + def build_fulltext_query( + self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128 + ) -> str: """ Build a fulltext query string for FalkorDB using RedisSearch syntax. FalkorDB uses RedisSearch-like syntax where: @@ -230,7 +262,7 @@ class FalkorDriver(GraphDriver): group_filter = '' else: group_values = '|'.join(group_ids) - group_filter = f"(@group_id:{group_values})" + group_filter = f'(@group_id:{group_values})' sanitized_query = self.sanitize(query) @@ -245,4 +277,4 @@ class FalkorDriver(GraphDriver): full_query = group_filter + ' (' + sanitized_query + ')' - return full_query \ No newline at end of file + return full_query diff --git a/graphiti_core/graph_queries.py b/graphiti_core/graph_queries.py index 42017949..8e4cca4e 100644 --- a/graphiti_core/graph_queries.py +++ b/graphiti_core/graph_queries.py @@ -72,37 +72,40 @@ def get_range_indices(provider: GraphProvider) -> list[LiteralString]: def get_fulltext_indices(provider: GraphProvider) -> list[LiteralString]: if provider == GraphProvider.FALKORDB: from typing import cast - + from graphiti_core.driver.falkordb_driver import STOPWORDS - + # Convert to string representation for embedding in queries stopwords_str = str(STOPWORDS) - + # Use type: ignore to satisfy LiteralString requirement while maintaining single source of truth - return cast(list[LiteralString], [ - f"""CALL db.idx.fulltext.createNodeIndex( + return cast( + list[LiteralString], + [ + f"""CALL db.idx.fulltext.createNodeIndex( {{ label: 'Episodic', stopwords: {stopwords_str} }}, 'content', 'source', 'source_description', 'group_id' )""", - f"""CALL db.idx.fulltext.createNodeIndex( + f"""CALL db.idx.fulltext.createNodeIndex( {{ label: 'Entity', stopwords: {stopwords_str} }}, 'name', 'summary', 'group_id' )""", - f"""CALL db.idx.fulltext.createNodeIndex( + f"""CALL db.idx.fulltext.createNodeIndex( {{ label: 'Community', stopwords: {stopwords_str} }}, 'name', 'group_id' )""", - """CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""", - ]) + """CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""", + ], + ) if provider == GraphProvider.KUZU: return [ diff --git a/pyproject.toml b/pyproject.toml index c77d9ca8..b417a328 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "graphiti-core" description = "A temporal graph building library" -version = "0.21.0pre3" +version = "0.21.0pre4" authors = [ { name = "Paul Paliychuk", email = "paul@getzep.com" }, { name = "Preston Rasmussen", email = "preston@getzep.com" }, diff --git a/uv.lock b/uv.lock index 0b75961a..0ff56ad8 100644 --- a/uv.lock +++ b/uv.lock @@ -783,7 +783,7 @@ wheels = [ [[package]] name = "graphiti-core" -version = "0.21.0rc3" +version = "0.21.0rc4" source = { editable = "." } dependencies = [ { name = "diskcache" },