Graph quality updates (#922)

duplicate_of updates
This commit is contained in:
Preston Rasmussen 2025-09-23 17:53:39 -04:00 committed by GitHub
parent d725fcdf8e
commit 36056ad141
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 57 additions and 20 deletions

View file

@ -312,9 +312,11 @@ class GraphDriver(ABC):
return 0 return 0
def build_fulltext_query(self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128) -> str: def build_fulltext_query(
self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128
) -> str:
""" """
Specific fulltext query builder for database providers. Specific fulltext query builder for database providers.
Only implemented by providers that need custom fulltext query building. Only implemented by providers that need custom fulltext query building.
""" """
raise NotImplementedError(f"build_fulltext_query not implemented for {self.provider}") raise NotImplementedError(f'build_fulltext_query not implemented for {self.provider}')

View file

@ -37,11 +37,42 @@ from graphiti_core.utils.datetime_utils import convert_datetimes_to_strings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
STOPWORDS = [ STOPWORDS = [
'a', 'is', 'the', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'a',
'if', 'in', 'into', 'it', 'no', 'not', 'of', 'on', 'or', 'such', 'that', 'their', 'is',
'then', 'there', 'these', 'they', 'this', 'to', 'was', 'will', 'with' 'the',
'an',
'and',
'are',
'as',
'at',
'be',
'but',
'by',
'for',
'if',
'in',
'into',
'it',
'no',
'not',
'of',
'on',
'or',
'such',
'that',
'their',
'then',
'there',
'these',
'they',
'this',
'to',
'was',
'will',
'with',
] ]
class FalkorDriverSession(GraphDriverSession): class FalkorDriverSession(GraphDriverSession):
provider = GraphProvider.FALKORDB provider = GraphProvider.FALKORDB
@ -173,7 +204,6 @@ class FalkorDriver(GraphDriver):
return cloned return cloned
def sanitize(self, query: str) -> str: def sanitize(self, query: str) -> str:
""" """
Replace FalkorDB special characters with whitespace. Replace FalkorDB special characters with whitespace.
@ -216,7 +246,9 @@ class FalkorDriver(GraphDriver):
sanitized = ' '.join(sanitized.split()) sanitized = ' '.join(sanitized.split())
return sanitized return sanitized
def build_fulltext_query(self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128) -> str: def build_fulltext_query(
self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128
) -> str:
""" """
Build a fulltext query string for FalkorDB using RedisSearch syntax. Build a fulltext query string for FalkorDB using RedisSearch syntax.
FalkorDB uses RedisSearch-like syntax where: FalkorDB uses RedisSearch-like syntax where:
@ -230,7 +262,7 @@ class FalkorDriver(GraphDriver):
group_filter = '' group_filter = ''
else: else:
group_values = '|'.join(group_ids) group_values = '|'.join(group_ids)
group_filter = f"(@group_id:{group_values})" group_filter = f'(@group_id:{group_values})'
sanitized_query = self.sanitize(query) sanitized_query = self.sanitize(query)

View file

@ -79,30 +79,33 @@ def get_fulltext_indices(provider: GraphProvider) -> list[LiteralString]:
stopwords_str = str(STOPWORDS) stopwords_str = str(STOPWORDS)
# Use type: ignore to satisfy LiteralString requirement while maintaining single source of truth # Use type: ignore to satisfy LiteralString requirement while maintaining single source of truth
return cast(list[LiteralString], [ return cast(
f"""CALL db.idx.fulltext.createNodeIndex( list[LiteralString],
[
f"""CALL db.idx.fulltext.createNodeIndex(
{{ {{
label: 'Episodic', label: 'Episodic',
stopwords: {stopwords_str} stopwords: {stopwords_str}
}}, }},
'content', 'source', 'source_description', 'group_id' 'content', 'source', 'source_description', 'group_id'
)""", )""",
f"""CALL db.idx.fulltext.createNodeIndex( f"""CALL db.idx.fulltext.createNodeIndex(
{{ {{
label: 'Entity', label: 'Entity',
stopwords: {stopwords_str} stopwords: {stopwords_str}
}}, }},
'name', 'summary', 'group_id' 'name', 'summary', 'group_id'
)""", )""",
f"""CALL db.idx.fulltext.createNodeIndex( f"""CALL db.idx.fulltext.createNodeIndex(
{{ {{
label: 'Community', label: 'Community',
stopwords: {stopwords_str} stopwords: {stopwords_str}
}}, }},
'name', 'group_id' 'name', 'group_id'
)""", )""",
"""CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""", """CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""",
]) ],
)
if provider == GraphProvider.KUZU: if provider == GraphProvider.KUZU:
return [ return [

View file

@ -1,7 +1,7 @@
[project] [project]
name = "graphiti-core" name = "graphiti-core"
description = "A temporal graph building library" description = "A temporal graph building library"
version = "0.21.0pre3" version = "0.21.0pre4"
authors = [ authors = [
{ name = "Paul Paliychuk", email = "paul@getzep.com" }, { name = "Paul Paliychuk", email = "paul@getzep.com" },
{ name = "Preston Rasmussen", email = "preston@getzep.com" }, { name = "Preston Rasmussen", email = "preston@getzep.com" },

2
uv.lock generated
View file

@ -783,7 +783,7 @@ wheels = [
[[package]] [[package]]
name = "graphiti-core" name = "graphiti-core"
version = "0.21.0rc3" version = "0.21.0rc4"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "diskcache" }, { name = "diskcache" },