be more explicit about edge type signatures (#600)

* be more explicit about edge type signatures * bump version * update
2025-06-18 16:01:00 -04:00 · 2025-06-18 16:01:00 -04:00 · 2b0bc21b21
commit 2b0bc21b21
parent 645e86094c
6 changed files with 25 additions and 3 deletions
--- a/graphiti_core/graphiti.py
+++ b/graphiti_core/graphiti.py
@ -385,7 +385,13 @@ class Graphiti:
                    entity_types,
                ),
                extract_edges(
-                    self.clients, episode, extracted_nodes, previous_episodes, group_id, edge_types
+                    self.clients,
+                    episode,
+                    extracted_nodes,
+                    previous_episodes,
+                    edge_type_map or edge_type_map_default,
+                    group_id,
+                    edge_types,
                ),
            )

--- a/graphiti_core/prompts/extract_edges.py
+++ b/graphiti_core/prompts/extract_edges.py
@ -97,6 +97,8 @@ Only extract facts that:
 - The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
 - The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
    of the FACT TYPES
+- The FACT TYPES each contain their fact_type_signature which represents the entity types which that fact_type is defined for.
+    A Type of Entity in the signature represents any extracted entity (it is a generic universal type for all entities).

 You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.

--- a/graphiti_core/prompts/extract_nodes.py
+++ b/graphiti_core/prompts/extract_nodes.py
@ -90,6 +90,8 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
 Instructions:

 You are given a conversation context and a CURRENT MESSAGE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT MESSAGE.
+Pronoun references such as he/she/they or this/that/those should be disambiguated to the names of the 
+reference entities.

 1. **Speaker Extraction**: Always extract the speaker (the part before the colon `:` in each dialogue line) as the first entity node.
   - If the speaker is mentioned again in the message, treat both mentions as a **single entity**.
--- a/graphiti_core/utils/bulk_utils.py
+++ b/graphiti_core/utils/bulk_utils.py
@ -198,6 +198,7 @@ async def extract_nodes_and_edges_bulk(
                episode,
                extracted_nodes_bulk[i],
                previous_episodes_list[i],
+                {},
                episode.group_id,
            )
            for i, episode in enumerate(episodes)
--- a/graphiti_core/utils/maintenance/edge_operations.py
+++ b/graphiti_core/utils/maintenance/edge_operations.py
@ -108,6 +108,7 @@ async def extract_edges(
    episode: EpisodicNode,
    nodes: list[EntityNode],
    previous_episodes: list[EpisodicNode],
+    edge_type_map: dict[tuple[str, str], list[str]],
    group_id: str = '',
    edge_types: dict[str, BaseModel] | None = None,
 ) -> list[EntityEdge]:
@ -116,10 +117,17 @@ async def extract_edges(
    extract_edges_max_tokens = 16384
    llm_client = clients.llm_client

+    edge_type_signature_map: dict[str, tuple[str, str]] = {
+        edge_type: signature
+        for signature, edge_types in edge_type_map.items()
+        for edge_type in edge_types
+    }
+
    edge_types_context = (
        [
            {
                'fact_type_name': type_name,
+                'fact_type_signature': edge_type_signature_map.get(type_name, ('Entity', 'Entity')),
                'fact_type_description': type_model.__doc__,
            }
            for type_name, type_model in edge_types.items()
@ -131,7 +139,10 @@ async def extract_edges(
    # Prepare context for LLM
    context = {
        'episode_content': episode.content,
-        'nodes': [{'id': idx, 'name': node.name} for idx, node in enumerate(nodes)],
+        'nodes': [
+            {'id': idx, 'name': node.name, 'entity_types': node.labels}
+            for idx, node in enumerate(nodes)
+        ],
        'previous_episodes': [ep.content for ep in previous_episodes],
        'reference_time': episode.valid_at,
        'edge_types': edge_types_context,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "graphiti-core"
 description = "A temporal graph building library"
-version = "0.12.1"
+version = "0.12.2"
 authors = [
    { "name" = "Paul Paliychuk", "email" = "paul@getzep.com" },
    { "name" = "Preston Rasmussen", "email" = "preston@getzep.com" },