diff --git a/graphiti_core/prompts/dedupe_edges.py b/graphiti_core/prompts/dedupe_edges.py
index 35acb31f..c5b55427 100644
--- a/graphiti_core/prompts/dedupe_edges.py
+++ b/graphiti_core/prompts/dedupe_edges.py
@@ -25,11 +25,11 @@ from .prompt_helpers import to_prompt_json
class EdgeDuplicate(BaseModel):
duplicate_facts: list[int] = Field(
...,
- description='List of ids of any duplicate facts. If no duplicate facts are found, default to empty list.',
+ description='List of idx values of any duplicate facts. If no duplicate facts are found, default to empty list.',
)
contradicted_facts: list[int] = Field(
...,
- description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
+ description='List of idx values of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
)
fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
@@ -124,37 +124,48 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
Message(
role='user',
content=f"""
-
- {context['new_edge']}
-
-
-
- {context['existing_edges']}
-
-
- {context['edge_invalidation_candidates']}
-
-
-
- {context['edge_types']}
-
-
-
Task:
- If the NEW FACT represents identical factual information of one or more in EXISTING FACTS, return the idx of the duplicate facts.
- Facts with similar information that contain key differences should not be marked as duplicates.
- If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return an empty list.
-
- Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
- Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
-
- Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
- Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
- If there are no contradicted facts, return an empty list.
+ You will receive TWO separate lists of facts. Each list uses 'idx' as its index field, starting from 0.
+
+ 1. DUPLICATE DETECTION:
+ - If the NEW FACT represents identical factual information as any fact in EXISTING FACTS, return those idx values in duplicate_facts.
+ - Facts with similar information that contain key differences should NOT be marked as duplicates.
+ - Return idx values from EXISTING FACTS.
+ - If no duplicates, return an empty list for duplicate_facts.
+
+ 2. FACT TYPE CLASSIFICATION:
+ - Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
+ - Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
+
+ 3. CONTRADICTION DETECTION:
+ - Based on FACT INVALIDATION CANDIDATES and NEW FACT, determine which facts the new fact contradicts.
+ - Return idx values from FACT INVALIDATION CANDIDATES.
+ - If no contradictions, return an empty list for contradicted_facts.
+
+ IMPORTANT:
+ - duplicate_facts: Use ONLY 'idx' values from EXISTING FACTS
+ - contradicted_facts: Use ONLY 'idx' values from FACT INVALIDATION CANDIDATES
+ - These are two separate lists with independent idx ranges starting from 0
Guidelines:
1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
Do not mark these facts as duplicates.
+
+
+ {context['edge_types']}
+
+
+
+ {context['existing_edges']}
+
+
+
+ {context['edge_invalidation_candidates']}
+
+
+
+ {context['new_edge']}
+
""",
),
]
diff --git a/graphiti_core/utils/maintenance/edge_operations.py b/graphiti_core/utils/maintenance/edge_operations.py
index a6760a40..3e8f6990 100644
--- a/graphiti_core/utils/maintenance/edge_operations.py
+++ b/graphiti_core/utils/maintenance/edge_operations.py
@@ -475,20 +475,19 @@ async def resolve_extracted_edge(
start = time()
# Prepare context for LLM
- related_edges_context = [{'id': i, 'fact': edge.fact} for i, edge in enumerate(related_edges)]
+ related_edges_context = [{'idx': i, 'fact': edge.fact} for i, edge in enumerate(related_edges)]
invalidation_edge_candidates_context = [
- {'id': i, 'fact': existing_edge.fact} for i, existing_edge in enumerate(existing_edges)
+ {'idx': i, 'fact': existing_edge.fact} for i, existing_edge in enumerate(existing_edges)
]
edge_types_context = (
[
{
- 'fact_type_id': i,
'fact_type_name': type_name,
'fact_type_description': type_model.__doc__,
}
- for i, (type_name, type_model) in enumerate(edge_type_candidates.items())
+ for type_name, type_model in edge_type_candidates.items()
]
if edge_type_candidates is not None
else []
@@ -501,6 +500,15 @@ async def resolve_extracted_edge(
'edge_types': edge_types_context,
}
+ if related_edges or existing_edges:
+ logger.debug(
+ 'Resolving edge: sent %d EXISTING FACTS%s and %d INVALIDATION CANDIDATES%s',
+ len(related_edges),
+ f' (idx 0-{len(related_edges) - 1})' if related_edges else '',
+ len(existing_edges),
+ f' (idx 0-{len(existing_edges) - 1})' if existing_edges else '',
+ )
+
llm_response = await llm_client.generate_response(
prompt_library.dedupe_edges.resolve_edge(context),
response_model=EdgeDuplicate,
@@ -509,6 +517,15 @@ async def resolve_extracted_edge(
response_object = EdgeDuplicate(**llm_response)
duplicate_facts = response_object.duplicate_facts
+ # Validate duplicate_facts are in valid range for EXISTING FACTS
+ invalid_duplicates = [i for i in duplicate_facts if i < 0 or i >= len(related_edges)]
+ if invalid_duplicates:
+ logger.warning(
+ 'LLM returned invalid duplicate_facts idx values %s (valid range: 0-%d for EXISTING FACTS)',
+ invalid_duplicates,
+ len(related_edges) - 1,
+ )
+
duplicate_fact_ids: list[int] = [i for i in duplicate_facts if 0 <= i < len(related_edges)]
resolved_edge = extracted_edge
@@ -521,6 +538,15 @@ async def resolve_extracted_edge(
contradicted_facts: list[int] = response_object.contradicted_facts
+ # Validate contradicted_facts are in valid range for INVALIDATION CANDIDATES
+ invalid_contradictions = [i for i in contradicted_facts if i < 0 or i >= len(existing_edges)]
+ if invalid_contradictions:
+ logger.warning(
+ 'LLM returned invalid contradicted_facts idx values %s (valid range: 0-%d for INVALIDATION CANDIDATES)',
+ invalid_contradictions,
+ len(existing_edges) - 1,
+ )
+
invalidation_candidates: list[EntityEdge] = [
existing_edges[i] for i in contradicted_facts if 0 <= i < len(existing_edges)
]