diff --git a/graphiti_core/graphiti.py b/graphiti_core/graphiti.py index 860b8afa..635200af 100644 --- a/graphiti_core/graphiti.py +++ b/graphiti_core/graphiti.py @@ -33,6 +33,7 @@ from graphiti_core.nodes import CommunityNode, EntityNode, EpisodeType, Episodic from graphiti_core.search.search import SearchConfig, search from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResults from graphiti_core.search.search_config_recipes import ( + COMBINED_HYBRID_SEARCH_CROSS_ENCODER, EDGE_HYBRID_SEARCH_NODE_DISTANCE, EDGE_HYBRID_SEARCH_RRF, ) @@ -647,7 +648,10 @@ class Graphiti: Perform a hybrid search on the knowledge graph. This method executes a search query on the graph, combining vector and - text-based search techniques to retrieve relevant facts. + text-based search techniques to retrieve relevant facts, returning the edges as a string. + + This is our basic out-of-the-box search, for more robust results we recommend using our more advanced + search method graphiti.search_(). Parameters ---------- @@ -668,8 +672,7 @@ class Graphiti: Notes ----- This method uses a SearchConfig with num_episodes set to 0 and - num_results set to the provided num_results parameter. It then calls - the hybrid_search function to perform the actual search operation. + num_results set to the provided num_results parameter. The search is performed using the current date and time as the reference point for temporal relevance. @@ -703,6 +706,27 @@ class Graphiti: bfs_origin_node_uuids: list[str] | None = None, search_filter: SearchFilters | None = None, ) -> SearchResults: + """DEPRECATED""" + return await self.search_( + query, config, group_ids, center_node_uuid, bfs_origin_node_uuids, search_filter + ) + + async def search_( + self, + query: str, + config: SearchConfig = COMBINED_HYBRID_SEARCH_CROSS_ENCODER, + group_ids: list[str] | None = None, + center_node_uuid: str | None = None, + bfs_origin_node_uuids: list[str] | None = None, + search_filter: SearchFilters | None = None, + ) -> SearchResults: + """search_ (replaces _search) is our advanced search method that returns Graph objects (nodes and edges) rather + than a list of facts. This endpoint allows the end user to utilize more advanced features such as filters and + different search and reranker methodologies across different layers in the graph. + + For different config recipes refer to search/search_config_recipes. + """ + return await search( self.driver, self.embedder, diff --git a/graphiti_core/search/search_config.py b/graphiti_core/search/search_config.py index 9aa23daa..f5148842 100644 --- a/graphiti_core/search/search_config.py +++ b/graphiti_core/search/search_config.py @@ -26,7 +26,7 @@ from graphiti_core.search.search_utils import ( MAX_SEARCH_DEPTH, ) -DEFAULT_SEARCH_LIMIT = 10 +DEFAULT_SEARCH_LIMIT = 20 class EdgeSearchMethod(Enum): diff --git a/graphiti_core/search/search_helpers.py b/graphiti_core/search/search_helpers.py new file mode 100644 index 00000000..aae66bc3 --- /dev/null +++ b/graphiti_core/search/search_helpers.py @@ -0,0 +1,47 @@ +""" +Copyright 2024, Zep Software, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from graphiti_core.edges import EntityEdge +from graphiti_core.search.search_config import SearchResults + + +def format_edge_date_range(edge: EntityEdge) -> str: + # return f"{datetime(edge.valid_at).strftime('%Y-%m-%d %H:%M:%S') if edge.valid_at else 'date unknown'} - {(edge.invalid_at.strftime('%Y-%m-%d %H:%M:%S') if edge.invalid_at else 'present')}" + return f'{edge.valid_at if edge.valid_at else "date unknown"} - {(edge.invalid_at if edge.invalid_at else "present")}' + + +def search_results_to_context_string(search_results: SearchResults) -> str: + """Reformats a set of SearchResults into a single string to pass directly to an LLM as context""" + context_string = """FACTS and ENTITIES represent relevant context to the current conversation. + COMMUNITIES represent a cluster of closely related entities. + + # These are the most relevant facts and their valid date ranges + # format: FACT (Date range: from - to) + """ + context_string += '\n' + for edge in search_results.edges: + context_string += f'- {edge.fact} ({format_edge_date_range(edge)})\n' + context_string += '\n' + context_string += '\n' + for node in search_results.nodes: + context_string += f'- {node.name}: {node.summary}\n' + context_string += '\n' + context_string += '\n' + for community in search_results.communities: + context_string += f'- {community.name}: {community.summary}\n' + context_string += '\n' + + return context_string diff --git a/graphiti_core/search/search_utils.py b/graphiti_core/search/search_utils.py index 10bb0d3e..3f720736 100644 --- a/graphiti_core/search/search_utils.py +++ b/graphiti_core/search/search_utils.py @@ -229,8 +229,8 @@ async def edge_similarity_search( query: LiteralString = ( """ - MATCH (n:Entity)-[r:RELATES_TO]->(m:Entity) - """ + MATCH (n:Entity)-[r:RELATES_TO]->(m:Entity) + """ + group_filter_query + filter_query + """\nWITH DISTINCT r, vector.similarity.cosine(r.fact_embedding, $search_vector) AS score diff --git a/poetry.lock b/poetry.lock index 6fb1b928..c66a98fa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1030,6 +1030,17 @@ requests = ">=2.28.1,<3.0.0" typing-extensions = ">=4.11.0,<5.0.0" websockets = ">=13.0.0,<15.1.0" +[[package]] +name = "graph-service" +version = "1.0.0.7" +description = "project descriptions here" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "graph_service-1.0.0.7-py3-none-any.whl", hash = "sha256:d34cd38529ff0704b83fa4e8692b4f8fcf1524dce7110f89d4ee33423eeff0bd"}, +] + [[package]] name = "groq" version = "0.11.0" @@ -3148,6 +3159,7 @@ optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ + {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, ] @@ -5316,4 +5328,4 @@ groq = ["groq"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4" -content-hash = "4ffa68a8953cb18a1d2a43335cf2187e609659cd414bcd7f65ab285971ba72aa" +content-hash = "fa0608494d973b528ccad56576f42c04cff04a66c1325796d224dedab292aa16" diff --git a/pyproject.toml b/pyproject.toml index e65e381f..49ba6c53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [project] name = "graphiti-core" description = "A temporal graph building library" -version = "0.9.4" +version = "0.9.5" authors = [ - {"name" = "Paul Paliychuk", "email" = "paul@getzep.com"}, - {"name" = "Preston Rasmussen", "email" = "preston@getzep.com"}, - {"name" = "Daniel Chalef", "email" = "daniel@getzep.com"}, + { "name" = "Paul Paliychuk", "email" = "paul@getzep.com" }, + { "name" = "Preston Rasmussen", "email" = "preston@getzep.com" }, + { "name" = "Daniel Chalef", "email" = "daniel@getzep.com" }, ] readme = "README.md" license = "Apache-2.0" @@ -19,6 +19,7 @@ dependencies = [ "tenacity>=9.0.0", "numpy>=1.0.0", "python-dotenv>=1.0.1", + "graph-service (>=1.0.0.7,<2.0.0.0)", ] [project.urls]