diff --git a/graphiti_core/prompts/dedupe_nodes.py b/graphiti_core/prompts/dedupe_nodes.py index b9e9cd78..72718c40 100644 --- a/graphiti_core/prompts/dedupe_nodes.py +++ b/graphiti_core/prompts/dedupe_nodes.py @@ -77,11 +77,6 @@ def node(context: dict[str, Any]) -> list[Message]: duplicate_entity_id should be set to -1. Also return the most complete name for the entity. - - Guidelines: - 1. Entities with the same name should be considered duplicates - 2. Duplicate entities may refer to the same real-world entity even if names differ. Use context clues from the MESSAGES - to determine if the NEW ENTITY represents a duplicate entity of one of the EXISTING ENTITIES. """, ), ] diff --git a/graphiti_core/utils/maintenance/node_operations.py b/graphiti_core/utils/maintenance/node_operations.py index 8cf5df9a..b9d181da 100644 --- a/graphiti_core/utils/maintenance/node_operations.py +++ b/graphiti_core/utils/maintenance/node_operations.py @@ -34,8 +34,10 @@ from graphiti_core.prompts.extract_nodes import ( ExtractedEntity, MissedEntities, ) +from graphiti_core.search.search import search +from graphiti_core.search.search_config import SearchResults +from graphiti_core.search.search_config_recipes import NODE_HYBRID_SEARCH_RRF from graphiti_core.search.search_filters import SearchFilters -from graphiti_core.search.search_utils import get_relevant_nodes from graphiti_core.utils.datetime_utils import utc_now logger = logging.getLogger(__name__) @@ -227,13 +229,23 @@ async def resolve_extracted_nodes( entity_types: dict[str, BaseModel] | None = None, ) -> tuple[list[EntityNode], dict[str, str]]: llm_client = clients.llm_client - driver = clients.driver - # Find relevant nodes already in the graph - existing_nodes_lists: list[list[EntityNode]] = await get_relevant_nodes( - driver, extracted_nodes, SearchFilters() + search_results: list[SearchResults] = await semaphore_gather( + *[ + search( + clients=clients, + query=node.name, + query_vector=node.name_embedding, + group_ids=[node.group_id], + search_filter=SearchFilters(), + config=NODE_HYBRID_SEARCH_RRF, + ) + for node in extracted_nodes + ] ) + existing_nodes_lists: list[list[EntityNode]] = [result.nodes for result in search_results] + resolved_nodes: list[EntityNode] = await semaphore_gather( *[ resolve_extracted_node( @@ -282,6 +294,7 @@ async def resolve_extracted_node( 'id': i, 'name': node.name, 'entity_types': node.labels, + 'summary': node.summary, }, **node.attributes, } diff --git a/pyproject.toml b/pyproject.toml index e4a7f3ba..67137a87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "graphiti-core" description = "A temporal graph building library" -version = "0.11.4" +version = "0.11.5" authors = [ { "name" = "Paul Paliychuk", "email" = "paul@getzep.com" }, { "name" = "Preston Rasmussen", "email" = "preston@getzep.com" },