fix: Improve edge extraction entity ID validation
Fixes invalid entity ID references in edge extraction that caused warnings like: "WARNING: source or target node not filled WILL_FIND. source_node_uuid: 23 and target_node_uuid: 3" Changes: - Format ENTITIES list as proper JSON in prompt for better LLM parsing - Clarify field descriptions to reference entity id from ENTITIES list - Add explicit entity ID validation as #1 extraction rule with examples - Improve error logging (removed PII, added entity count and valid range) These changes follow patterns from extract_nodes.py and dedupe_nodes.py where entity referencing works reliably. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
b28bd92c16
commit
dc0088f936
2 changed files with 12 additions and 5 deletions
|
|
@ -24,8 +24,12 @@ from .prompt_helpers import to_prompt_json
|
|||
|
||||
class Edge(BaseModel):
|
||||
relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
|
||||
source_entity_id: int = Field(..., description='The id of the source entity of the fact.')
|
||||
target_entity_id: int = Field(..., description='The id of the target entity of the fact.')
|
||||
source_entity_id: int = Field(
|
||||
..., description='The id of the source entity from the ENTITIES list'
|
||||
)
|
||||
target_entity_id: int = Field(
|
||||
..., description='The id of the target entity from the ENTITIES list'
|
||||
)
|
||||
fact: str = Field(..., description='')
|
||||
valid_at: str | None = Field(
|
||||
None,
|
||||
|
|
@ -81,7 +85,7 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|||
</CURRENT_MESSAGE>
|
||||
|
||||
<ENTITIES>
|
||||
{context['nodes']}
|
||||
{to_prompt_json(context['nodes'], ensure_ascii=context.get('ensure_ascii', False), indent=2)}
|
||||
</ENTITIES>
|
||||
|
||||
<REFERENCE_TIME>
|
||||
|
|
@ -107,7 +111,8 @@ You may use information from the PREVIOUS MESSAGES only to disambiguate referenc
|
|||
|
||||
# EXTRACTION RULES
|
||||
|
||||
1. Only emit facts where both the subject and object match IDs in ENTITIES.
|
||||
1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
|
||||
- **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
|
||||
2. Each fact must involve two **distinct** entities.
|
||||
3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
|
||||
4. Do not emit duplicate or semantically redundant facts.
|
||||
|
|
|
|||
|
|
@ -181,7 +181,9 @@ async def extract_edges(
|
|||
target_node_idx = edge_data.target_entity_id
|
||||
if not (-1 < source_node_idx < len(nodes) and -1 < target_node_idx < len(nodes)):
|
||||
logger.warning(
|
||||
f'WARNING: source or target node not filled {edge_data.relation_type}. source_node_uuid: {source_node_idx} and target_node_uuid: {target_node_idx} '
|
||||
f'Invalid entity IDs in edge extraction for {edge_data.relation_type}. '
|
||||
f'source_entity_id: {source_node_idx}, target_entity_id: {target_node_idx}, '
|
||||
f'but only {len(nodes)} entities available (valid range: 0-{len(nodes) - 1})'
|
||||
)
|
||||
continue
|
||||
source_node_uuid = nodes[source_node_idx].uuid
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue