Update dedupe prompt (#457)
* improve dedupe logic * cut summary length * update unit tests
This commit is contained in:
parent
8ce9b1e157
commit
fd9969b5a1
5 changed files with 20 additions and 15 deletions
|
|
@ -44,7 +44,7 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|||
return [
|
||||
Message(
|
||||
role='system',
|
||||
content='You are a helpful assistant that de-duplicates entities from entity lists.',
|
||||
content='You are a helpful assistant that determines whether or not a NEW ENTITY is a duplicate of any EXISTING ENTITIES.',
|
||||
),
|
||||
Message(
|
||||
role='user',
|
||||
|
|
@ -69,14 +69,21 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|||
Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
|
||||
is a duplicate entity of one of the EXISTING ENTITIES.
|
||||
|
||||
The ENTITY TYPE DESCRIPTION gives more insight into what the entity type means for the NEW ENTITY.
|
||||
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
||||
|
||||
Do NOT mark entities as duplicates if:
|
||||
- They are related but distinct.
|
||||
- They have similar names or purposes but refer to separate instances or concepts.
|
||||
|
||||
Task:
|
||||
If the NEW ENTITY represents a duplicate entity of any entity in EXISTING ENTITIES, set duplicate_entity_id to the
|
||||
id of the EXISTING ENTITY that is the duplicate. If the NEW ENTITY is not a duplicate of any of the EXISTING ENTITIES,
|
||||
id of the EXISTING ENTITY that is the duplicate.
|
||||
|
||||
If the NEW ENTITY is not a duplicate of any of the EXISTING ENTITIES,
|
||||
duplicate_entity_id should be set to -1.
|
||||
|
||||
Also return the most complete name for the entity.
|
||||
Also return the name that best describes the NEW ENTITY (whether it is the name of the NEW ENTITY, a node it
|
||||
is a duplicate of, or a combination of the two).
|
||||
""",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|||
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
||||
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
||||
3. The summary attribute represents a summary of the ENTITY, and should be updated with new information about the Entity from the MESSAGES.
|
||||
Summaries must be no longer than 500 words.
|
||||
Summaries must be no longer than 250 words.
|
||||
|
||||
<ENTITY>
|
||||
{context['node']}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .models import Message, PromptFunction, PromptVersion
|
|||
class Summary(BaseModel):
|
||||
summary: str = Field(
|
||||
...,
|
||||
description='Summary containing the important information about the entity. Under 500 words',
|
||||
description='Summary containing the important information about the entity. Under 250 words',
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ def summarize_pair(context: dict[str, Any]) -> list[Message]:
|
|||
content=f"""
|
||||
Synthesize the information from the following two summaries into a single succinct summary.
|
||||
|
||||
Summaries must be under 500 words.
|
||||
Summaries must be under 250 words.
|
||||
|
||||
Summaries:
|
||||
{json.dumps(context['node_summaries'], indent=2)}
|
||||
|
|
@ -82,7 +82,7 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
|
|||
|
||||
Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use
|
||||
information from the provided MESSAGES. Your summary should also only contain information relevant to the
|
||||
provided ENTITY. Summaries must be under 500 words.
|
||||
provided ENTITY. Summaries must be under 250 words.
|
||||
|
||||
In addition, extract any values for the provided entity properties based on their descriptions.
|
||||
If the value of the entity property cannot be found in the current context, set the value of the property to the Python value None.
|
||||
|
|
@ -117,7 +117,7 @@ def summary_description(context: dict[str, Any]) -> list[Message]:
|
|||
role='user',
|
||||
content=f"""
|
||||
Create a short one sentence description of the summary that explains what kind of information is summarized.
|
||||
Summaries must be under 500 words.
|
||||
Summaries must be under 250 words.
|
||||
|
||||
Summary:
|
||||
{json.dumps(context['summary'], indent=2)}
|
||||
|
|
|
|||
|
|
@ -290,7 +290,6 @@ async def resolve_extracted_node(
|
|||
'id': i,
|
||||
'name': node.name,
|
||||
'entity_types': node.labels,
|
||||
'summary': node.summary,
|
||||
},
|
||||
**node.attributes,
|
||||
}
|
||||
|
|
@ -384,7 +383,7 @@ async def extract_attributes_from_node(
|
|||
'summary': (
|
||||
str,
|
||||
Field(
|
||||
description='Summary containing the important information about the entity. Under 500 words',
|
||||
description='Summary containing the important information about the entity. Under 250 words',
|
||||
),
|
||||
)
|
||||
}
|
||||
|
|
@ -436,10 +435,7 @@ async def dedupe_node_list(
|
|||
node_map[node.uuid] = node
|
||||
|
||||
# Prepare context for LLM
|
||||
nodes_context = [
|
||||
{'uuid': node.uuid, 'name': node.name, 'summary': node.summary, **node.attributes}
|
||||
for node in nodes
|
||||
]
|
||||
nodes_context = [{'uuid': node.uuid, 'name': node.name, **node.attributes} for node in nodes]
|
||||
|
||||
context = {
|
||||
'nodes': nodes_context,
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ async def test_resolve_extracted_edge_no_changes(
|
|||
mock_extracted_edge,
|
||||
mock_related_edges,
|
||||
mock_existing_edges,
|
||||
mock_current_episode,
|
||||
)
|
||||
|
||||
assert resolved_edge.uuid == mock_extracted_edge.uuid
|
||||
|
|
@ -170,6 +171,7 @@ async def test_resolve_extracted_edge_with_invalidation(
|
|||
mock_extracted_edge,
|
||||
mock_related_edges,
|
||||
mock_existing_edges,
|
||||
mock_current_episode,
|
||||
)
|
||||
|
||||
assert resolved_edge.uuid == mock_extracted_edge.uuid
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue