refactor string formatting to use single quotes in node operations
This commit is contained in:
parent
152deb930d
commit
859d7aee5e
1 changed files with 70 additions and 90 deletions
|
|
@ -65,16 +65,16 @@ async def extract_nodes_reflexion(
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
# Prepare context for LLM
|
# Prepare context for LLM
|
||||||
context = {
|
context = {
|
||||||
"episode_content": episode.content,
|
'episode_content': episode.content,
|
||||||
"previous_episodes": [ep.content for ep in previous_episodes],
|
'previous_episodes': [ep.content for ep in previous_episodes],
|
||||||
"extracted_entities": node_names,
|
'extracted_entities': node_names,
|
||||||
"ensure_ascii": ensure_ascii,
|
'ensure_ascii': ensure_ascii,
|
||||||
}
|
}
|
||||||
|
|
||||||
llm_response = await llm_client.generate_response(
|
llm_response = await llm_client.generate_response(
|
||||||
prompt_library.extract_nodes.reflexion(context), MissedEntities
|
prompt_library.extract_nodes.reflexion(context), MissedEntities
|
||||||
)
|
)
|
||||||
missed_entities = llm_response.get("missed_entities", [])
|
missed_entities = llm_response.get('missed_entities', [])
|
||||||
|
|
||||||
return missed_entities
|
return missed_entities
|
||||||
|
|
||||||
|
|
@ -89,24 +89,24 @@ async def extract_nodes(
|
||||||
start = time()
|
start = time()
|
||||||
llm_client = clients.llm_client
|
llm_client = clients.llm_client
|
||||||
llm_response = {}
|
llm_response = {}
|
||||||
custom_prompt = ""
|
custom_prompt = ''
|
||||||
entities_missed = True
|
entities_missed = True
|
||||||
reflexion_iterations = 0
|
reflexion_iterations = 0
|
||||||
|
|
||||||
entity_types_context = [
|
entity_types_context = [
|
||||||
{
|
{
|
||||||
"entity_type_id": 0,
|
'entity_type_id': 0,
|
||||||
"entity_type_name": "Entity",
|
'entity_type_name': 'Entity',
|
||||||
"entity_type_description": "Default entity classification. Use this entity type if the entity is not one of the other listed types.",
|
'entity_type_description': 'Default entity classification. Use this entity type if the entity is not one of the other listed types.',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
entity_types_context += (
|
entity_types_context += (
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"entity_type_id": i + 1,
|
'entity_type_id': i + 1,
|
||||||
"entity_type_name": type_name,
|
'entity_type_name': type_name,
|
||||||
"entity_type_description": type_model.__doc__,
|
'entity_type_description': type_model.__doc__,
|
||||||
}
|
}
|
||||||
for i, (type_name, type_model) in enumerate(entity_types.items())
|
for i, (type_name, type_model) in enumerate(entity_types.items())
|
||||||
]
|
]
|
||||||
|
|
@ -115,13 +115,13 @@ async def extract_nodes(
|
||||||
)
|
)
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"episode_content": episode.content,
|
'episode_content': episode.content,
|
||||||
"episode_timestamp": episode.valid_at.isoformat(),
|
'episode_timestamp': episode.valid_at.isoformat(),
|
||||||
"previous_episodes": [ep.content for ep in previous_episodes],
|
'previous_episodes': [ep.content for ep in previous_episodes],
|
||||||
"custom_prompt": custom_prompt,
|
'custom_prompt': custom_prompt,
|
||||||
"entity_types": entity_types_context,
|
'entity_types': entity_types_context,
|
||||||
"source_description": episode.source_description,
|
'source_description': episode.source_description,
|
||||||
"ensure_ascii": clients.ensure_ascii,
|
'ensure_ascii': clients.ensure_ascii,
|
||||||
}
|
}
|
||||||
|
|
||||||
while entities_missed and reflexion_iterations <= MAX_REFLEXION_ITERATIONS:
|
while entities_missed and reflexion_iterations <= MAX_REFLEXION_ITERATIONS:
|
||||||
|
|
@ -157,48 +157,42 @@ async def extract_nodes(
|
||||||
|
|
||||||
entities_missed = len(missing_entities) != 0
|
entities_missed = len(missing_entities) != 0
|
||||||
|
|
||||||
custom_prompt = "Make sure that the following entities are extracted: "
|
custom_prompt = 'Make sure that the following entities are extracted: '
|
||||||
for entity in missing_entities:
|
for entity in missing_entities:
|
||||||
custom_prompt += f"\n{entity},"
|
custom_prompt += f'\n{entity},'
|
||||||
|
|
||||||
filtered_extracted_entities = [
|
filtered_extracted_entities = [entity for entity in extracted_entities if entity.name.strip()]
|
||||||
entity for entity in extracted_entities if entity.name.strip()
|
|
||||||
]
|
|
||||||
end = time()
|
end = time()
|
||||||
logger.debug(
|
logger.debug(f'Extracted new nodes: {filtered_extracted_entities} in {(end - start) * 1000} ms')
|
||||||
f"Extracted new nodes: {filtered_extracted_entities} in {(end - start) * 1000} ms"
|
|
||||||
)
|
|
||||||
# Convert the extracted data into EntityNode objects
|
# Convert the extracted data into EntityNode objects
|
||||||
extracted_nodes = []
|
extracted_nodes = []
|
||||||
for extracted_entity in filtered_extracted_entities:
|
for extracted_entity in filtered_extracted_entities:
|
||||||
type_id = extracted_entity.entity_type_id
|
type_id = extracted_entity.entity_type_id
|
||||||
if 0 <= type_id < len(entity_types_context):
|
if 0 <= type_id < len(entity_types_context):
|
||||||
entity_type_name = entity_types_context[
|
entity_type_name = entity_types_context[extracted_entity.entity_type_id].get(
|
||||||
extracted_entity.entity_type_id
|
'entity_type_name'
|
||||||
].get("entity_type_name")
|
)
|
||||||
else:
|
else:
|
||||||
entity_type_name = "Entity"
|
entity_type_name = 'Entity'
|
||||||
|
|
||||||
# Check if this entity type should be excluded
|
# Check if this entity type should be excluded
|
||||||
if excluded_entity_types and entity_type_name in excluded_entity_types:
|
if excluded_entity_types and entity_type_name in excluded_entity_types:
|
||||||
logger.debug(
|
logger.debug(f'Excluding entity "{extracted_entity.name}" of type "{entity_type_name}"')
|
||||||
f'Excluding entity "{extracted_entity.name}" of type "{entity_type_name}"'
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
labels: list[str] = list({"Entity", str(entity_type_name)})
|
labels: list[str] = list({'Entity', str(entity_type_name)})
|
||||||
|
|
||||||
new_node = EntityNode(
|
new_node = EntityNode(
|
||||||
name=extracted_entity.name,
|
name=extracted_entity.name,
|
||||||
group_id=episode.group_id,
|
group_id=episode.group_id,
|
||||||
labels=labels,
|
labels=labels,
|
||||||
summary="",
|
summary='',
|
||||||
created_at=utc_now(),
|
created_at=utc_now(),
|
||||||
)
|
)
|
||||||
extracted_nodes.append(new_node)
|
extracted_nodes.append(new_node)
|
||||||
logger.debug(f"Created new node: {new_node.name} (UUID: {new_node.uuid})")
|
logger.debug(f'Created new node: {new_node.name} (UUID: {new_node.uuid})')
|
||||||
|
|
||||||
logger.debug(f"Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}")
|
logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
||||||
return extracted_nodes
|
return extracted_nodes
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -221,9 +215,7 @@ async def _collect_candidate_nodes(
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
candidate_nodes: list[EntityNode] = [
|
candidate_nodes: list[EntityNode] = [node for result in search_results for node in result.nodes]
|
||||||
node for result in search_results for node in result.nodes
|
|
||||||
]
|
|
||||||
|
|
||||||
if existing_nodes_override is not None:
|
if existing_nodes_override is not None:
|
||||||
candidate_nodes.extend(existing_nodes_override)
|
candidate_nodes.extend(existing_nodes_override)
|
||||||
|
|
@ -253,21 +245,19 @@ async def _resolve_with_llm(
|
||||||
if not state.unresolved_indices:
|
if not state.unresolved_indices:
|
||||||
return
|
return
|
||||||
|
|
||||||
entity_types_dict: dict[str, type[BaseModel]] = (
|
entity_types_dict: dict[str, type[BaseModel]] = entity_types if entity_types is not None else {}
|
||||||
entity_types if entity_types is not None else {}
|
|
||||||
)
|
|
||||||
|
|
||||||
llm_extracted_nodes = [extracted_nodes[i] for i in state.unresolved_indices]
|
llm_extracted_nodes = [extracted_nodes[i] for i in state.unresolved_indices]
|
||||||
|
|
||||||
extracted_nodes_context = [
|
extracted_nodes_context = [
|
||||||
{
|
{
|
||||||
"id": i,
|
'id': i,
|
||||||
"name": node.name,
|
'name': node.name,
|
||||||
"entity_type": node.labels,
|
'entity_type': node.labels,
|
||||||
"entity_type_description": entity_types_dict.get(
|
'entity_type_description': entity_types_dict.get(
|
||||||
next((item for item in node.labels if item != "Entity"), "")
|
next((item for item in node.labels if item != 'Entity'), '')
|
||||||
).__doc__
|
).__doc__
|
||||||
or "Default Entity Type",
|
or 'Default Entity Type',
|
||||||
}
|
}
|
||||||
for i, node in enumerate(llm_extracted_nodes)
|
for i, node in enumerate(llm_extracted_nodes)
|
||||||
]
|
]
|
||||||
|
|
@ -275,9 +265,9 @@ async def _resolve_with_llm(
|
||||||
existing_nodes_context = [
|
existing_nodes_context = [
|
||||||
{
|
{
|
||||||
**{
|
**{
|
||||||
"idx": i,
|
'idx': i,
|
||||||
"name": candidate.name,
|
'name': candidate.name,
|
||||||
"entity_types": candidate.labels,
|
'entity_types': candidate.labels,
|
||||||
},
|
},
|
||||||
**candidate.attributes,
|
**candidate.attributes,
|
||||||
}
|
}
|
||||||
|
|
@ -285,15 +275,13 @@ async def _resolve_with_llm(
|
||||||
]
|
]
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"extracted_nodes": extracted_nodes_context,
|
'extracted_nodes': extracted_nodes_context,
|
||||||
"existing_nodes": existing_nodes_context,
|
'existing_nodes': existing_nodes_context,
|
||||||
"episode_content": episode.content if episode is not None else "",
|
'episode_content': episode.content if episode is not None else '',
|
||||||
"previous_episodes": (
|
'previous_episodes': (
|
||||||
[ep.content for ep in previous_episodes]
|
[ep.content for ep in previous_episodes] if previous_episodes is not None else []
|
||||||
if previous_episodes is not None
|
|
||||||
else []
|
|
||||||
),
|
),
|
||||||
"ensure_ascii": ensure_ascii,
|
'ensure_ascii': ensure_ascii,
|
||||||
}
|
}
|
||||||
|
|
||||||
llm_response = await llm_client.generate_response(
|
llm_response = await llm_client.generate_response(
|
||||||
|
|
@ -301,9 +289,7 @@ async def _resolve_with_llm(
|
||||||
response_model=NodeResolutions,
|
response_model=NodeResolutions,
|
||||||
)
|
)
|
||||||
|
|
||||||
node_resolutions: list[NodeDuplicate] = NodeResolutions(
|
node_resolutions: list[NodeDuplicate] = NodeResolutions(**llm_response).entity_resolutions
|
||||||
**llm_response
|
|
||||||
).entity_resolutions
|
|
||||||
|
|
||||||
for resolution in node_resolutions:
|
for resolution in node_resolutions:
|
||||||
relative_id: int = resolution.id
|
relative_id: int = resolution.id
|
||||||
|
|
@ -367,13 +353,13 @@ async def resolve_extracted_nodes(
|
||||||
state.uuid_map[node.uuid] = node.uuid
|
state.uuid_map[node.uuid] = node.uuid
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Resolved nodes: %s",
|
'Resolved nodes: %s',
|
||||||
[(node.name, node.uuid) for node in state.resolved_nodes if node is not None],
|
[(node.name, node.uuid) for node in state.resolved_nodes if node is not None],
|
||||||
)
|
)
|
||||||
|
|
||||||
new_node_duplicates: list[tuple[EntityNode, EntityNode]] = (
|
new_node_duplicates: list[
|
||||||
await filter_existing_duplicate_of_edges(driver, node_duplicates)
|
tuple[EntityNode, EntityNode]
|
||||||
)
|
] = await filter_existing_duplicate_of_edges(driver, node_duplicates)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
[node for node in state.resolved_nodes if node is not None],
|
[node for node in state.resolved_nodes if node is not None],
|
||||||
|
|
@ -399,9 +385,7 @@ async def extract_attributes_from_nodes(
|
||||||
episode,
|
episode,
|
||||||
previous_episodes,
|
previous_episodes,
|
||||||
(
|
(
|
||||||
entity_types.get(
|
entity_types.get(next((item for item in node.labels if item != 'Entity'), ''))
|
||||||
next((item for item in node.labels if item != "Entity"), "")
|
|
||||||
)
|
|
||||||
if entity_types is not None
|
if entity_types is not None
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
|
@ -425,32 +409,28 @@ async def extract_attributes_from_node(
|
||||||
ensure_ascii: bool = False,
|
ensure_ascii: bool = False,
|
||||||
) -> EntityNode:
|
) -> EntityNode:
|
||||||
node_context: dict[str, Any] = {
|
node_context: dict[str, Any] = {
|
||||||
"name": node.name,
|
'name': node.name,
|
||||||
"summary": node.summary,
|
'summary': node.summary,
|
||||||
"entity_types": node.labels,
|
'entity_types': node.labels,
|
||||||
"attributes": node.attributes,
|
'attributes': node.attributes,
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes_context: dict[str, Any] = {
|
attributes_context: dict[str, Any] = {
|
||||||
"node": node_context,
|
'node': node_context,
|
||||||
"episode_content": episode.content if episode is not None else "",
|
'episode_content': episode.content if episode is not None else '',
|
||||||
"previous_episodes": (
|
'previous_episodes': (
|
||||||
[ep.content for ep in previous_episodes]
|
[ep.content for ep in previous_episodes] if previous_episodes is not None else []
|
||||||
if previous_episodes is not None
|
|
||||||
else []
|
|
||||||
),
|
),
|
||||||
"ensure_ascii": ensure_ascii,
|
'ensure_ascii': ensure_ascii,
|
||||||
}
|
}
|
||||||
|
|
||||||
summary_context: dict[str, Any] = {
|
summary_context: dict[str, Any] = {
|
||||||
"node": node_context,
|
'node': node_context,
|
||||||
"episode_content": episode.content if episode is not None else "",
|
'episode_content': episode.content if episode is not None else '',
|
||||||
"previous_episodes": (
|
'previous_episodes': (
|
||||||
[ep.content for ep in previous_episodes]
|
[ep.content for ep in previous_episodes] if previous_episodes is not None else []
|
||||||
if previous_episodes is not None
|
|
||||||
else []
|
|
||||||
),
|
),
|
||||||
"ensure_ascii": ensure_ascii,
|
'ensure_ascii': ensure_ascii,
|
||||||
}
|
}
|
||||||
|
|
||||||
has_entity_attributes: bool = bool(
|
has_entity_attributes: bool = bool(
|
||||||
|
|
@ -478,7 +458,7 @@ async def extract_attributes_from_node(
|
||||||
if has_entity_attributes and entity_type is not None:
|
if has_entity_attributes and entity_type is not None:
|
||||||
entity_type(**llm_response)
|
entity_type(**llm_response)
|
||||||
|
|
||||||
node.summary = summary_response.get("summary", "")
|
node.summary = summary_response.get('summary', '')
|
||||||
node_attributes = {key: value for key, value in llm_response.items()}
|
node_attributes = {key: value for key, value in llm_response.items()}
|
||||||
|
|
||||||
node.attributes.update(node_attributes)
|
node.attributes.update(node_attributes)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue