dedupe fixes (#35)
This commit is contained in:
parent
57aed456fa
commit
0d2942daea
4 changed files with 11 additions and 7 deletions
|
|
@ -47,7 +47,7 @@ class OpenAIClient(LLMClient):
|
||||||
response = await self.client.chat.completions.create(
|
response = await self.client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=openai_messages,
|
messages=openai_messages,
|
||||||
temperature=0.1,
|
temperature=0,
|
||||||
max_tokens=3000,
|
max_tokens=3000,
|
||||||
response_format={'type': 'json_object'},
|
response_format={'type': 'json_object'},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -54,8 +54,9 @@ def v1(context: dict[str, Any]) -> list[Message]:
|
||||||
do not return it in the list of unique facts.
|
do not return it in the list of unique facts.
|
||||||
|
|
||||||
Guidelines:
|
Guidelines:
|
||||||
1. The facts do not have to be completely identical to be duplicates,
|
1. identical or near identical facts are duplicates
|
||||||
they just need to have similar factual content
|
2. Facts are also duplicates if they are represented by similar sentences
|
||||||
|
3. Facts will often discuss the same or similar relation between identical entities
|
||||||
|
|
||||||
Respond with a JSON object in the following format:
|
Respond with a JSON object in the following format:
|
||||||
{{
|
{{
|
||||||
|
|
@ -130,8 +131,10 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
|
||||||
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
|
||||||
|
|
||||||
Guidelines:
|
Guidelines:
|
||||||
1. The facts do not have to be completely identical to be duplicates, they just need to have similar content
|
1. identical or near identical facts are duplicates
|
||||||
2. The final list should have only unique facts. If 3 facts are all duplicates of each other, only one of their
|
2. Facts are also duplicates if they are represented by similar sentences
|
||||||
|
3. Facts will often discuss the same or similar relation between identical entities
|
||||||
|
4. The final list should have only unique facts. If 3 facts are all duplicates of each other, only one of their
|
||||||
facts should be in the response
|
facts should be in the response
|
||||||
|
|
||||||
Respond with a JSON object in the following format:
|
Respond with a JSON object in the following format:
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,7 @@ def v2(context: dict[str, Any]) -> list[Message]:
|
||||||
"relation_type": "RELATION_TYPE_IN_CAPS",
|
"relation_type": "RELATION_TYPE_IN_CAPS",
|
||||||
"source_node_uuid": "uuid of the source entity node",
|
"source_node_uuid": "uuid of the source entity node",
|
||||||
"target_node_uuid": "uuid of the target entity node",
|
"target_node_uuid": "uuid of the target entity node",
|
||||||
"fact": "Detailed description of the relationship",
|
"fact": "brief description of the relationship",
|
||||||
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
|
"valid_at": "YYYY-MM-DDTHH:MM:SSZ or null if not explicitly mentioned",
|
||||||
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
|
"invalid_at": "YYYY-MM-DDTHH:MM:SSZ or null if ongoing or not explicitly mentioned"
|
||||||
}}
|
}}
|
||||||
|
|
|
||||||
|
|
@ -125,10 +125,11 @@ def v3(context: dict[str, Any]) -> list[Message]:
|
||||||
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
|
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
|
||||||
|
|
||||||
user_prompt = f"""
|
user_prompt = f"""
|
||||||
Given the following conversation, extract entity nodes that are explicitly or implicitly mentioned:
|
Given the following conversation, extract entity nodes from the CURRENT MESSAGE that are explicitly or implicitly mentioned:
|
||||||
|
|
||||||
Conversation:
|
Conversation:
|
||||||
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
{json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
|
||||||
|
<CURRENT MESSAGE>
|
||||||
{context["episode_content"]}
|
{context["episode_content"]}
|
||||||
|
|
||||||
Guidelines:
|
Guidelines:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue