Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
Daniel Chalef
c8d2f147ea format 2025-10-02 15:07:09 -07:00
Daniel Chalef
5ab8eee576 Remove ensure_ascii configuration parameter
- Changed to_prompt_json default from ensure_ascii=True to False
- Removed ensure_ascii parameter from Graphiti.__init__ and GraphitiClients
- Removed ensure_ascii from all function signatures and context dictionaries
- Removed ensure_ascii from all test files
- All JSON serialization now preserves Unicode characters by default

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 15:03:03 -07:00
18 changed files with 46 additions and 114 deletions

View file

@ -136,7 +136,6 @@ class Graphiti:
store_raw_episode_content: bool = True, store_raw_episode_content: bool = True,
graph_driver: GraphDriver | None = None, graph_driver: GraphDriver | None = None,
max_coroutines: int | None = None, max_coroutines: int | None = None,
ensure_ascii: bool = False,
): ):
""" """
Initialize a Graphiti instance. Initialize a Graphiti instance.
@ -169,10 +168,6 @@ class Graphiti:
max_coroutines : int | None, optional max_coroutines : int | None, optional
The maximum number of concurrent operations allowed. Overrides SEMAPHORE_LIMIT set in the environment. The maximum number of concurrent operations allowed. Overrides SEMAPHORE_LIMIT set in the environment.
If not set, the Graphiti default is used. If not set, the Graphiti default is used.
ensure_ascii : bool, optional
Whether to escape non-ASCII characters in JSON serialization for prompts. Defaults to False.
Set as False to preserve non-ASCII characters (e.g., Korean, Japanese, Chinese) in their
original form, making them readable in LLM logs and improving model understanding.
Returns Returns
------- -------
@ -202,7 +197,6 @@ class Graphiti:
self.store_raw_episode_content = store_raw_episode_content self.store_raw_episode_content = store_raw_episode_content
self.max_coroutines = max_coroutines self.max_coroutines = max_coroutines
self.ensure_ascii = ensure_ascii
if llm_client: if llm_client:
self.llm_client = llm_client self.llm_client = llm_client
else: else:
@ -221,7 +215,6 @@ class Graphiti:
llm_client=self.llm_client, llm_client=self.llm_client,
embedder=self.embedder, embedder=self.embedder,
cross_encoder=self.cross_encoder, cross_encoder=self.cross_encoder,
ensure_ascii=self.ensure_ascii,
) )
# Capture telemetry event # Capture telemetry event
@ -559,9 +552,7 @@ class Graphiti:
if update_communities: if update_communities:
communities, community_edges = await semaphore_gather( communities, community_edges = await semaphore_gather(
*[ *[
update_community( update_community(self.driver, self.llm_client, self.embedder, node)
self.driver, self.llm_client, self.embedder, node, self.ensure_ascii
)
for node in nodes for node in nodes
], ],
max_coroutines=self.max_coroutines, max_coroutines=self.max_coroutines,
@ -1071,7 +1062,6 @@ class Graphiti:
), ),
None, None,
None, None,
self.ensure_ascii,
) )
edges: list[EntityEdge] = [resolved_edge] + invalidated_edges edges: list[EntityEdge] = [resolved_edge] + invalidated_edges

View file

@ -27,6 +27,5 @@ class GraphitiClients(BaseModel):
llm_client: LLMClient llm_client: LLMClient
embedder: EmbedderClient embedder: EmbedderClient
cross_encoder: CrossEncoderClient cross_encoder: CrossEncoderClient
ensure_ascii: bool = False
model_config = ConfigDict(arbitrary_types_allowed=True) model_config = ConfigDict(arbitrary_types_allowed=True)

View file

@ -67,11 +67,11 @@ def edge(context: dict[str, Any]) -> list[Message]:
Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges. Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
<EXISTING EDGES> <EXISTING EDGES>
{to_prompt_json(context['related_edges'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['related_edges'], indent=2)}
</EXISTING EDGES> </EXISTING EDGES>
<NEW EDGE> <NEW EDGE>
{to_prompt_json(context['extracted_edges'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['extracted_edges'], indent=2)}
</NEW EDGE> </NEW EDGE>
Task: Task:
@ -98,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
Given the following context, find all of the duplicates in a list of facts: Given the following context, find all of the duplicates in a list of facts:
Facts: Facts:
{to_prompt_json(context['edges'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['edges'], indent=2)}
Task: Task:
If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's. If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.

View file

@ -64,20 +64,20 @@ def node(context: dict[str, Any]) -> list[Message]:
role='user', role='user',
content=f""" content=f"""
<PREVIOUS MESSAGES> <PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS MESSAGES> </PREVIOUS MESSAGES>
<CURRENT MESSAGE> <CURRENT MESSAGE>
{context['episode_content']} {context['episode_content']}
</CURRENT MESSAGE> </CURRENT MESSAGE>
<NEW ENTITY> <NEW ENTITY>
{to_prompt_json(context['extracted_node'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['extracted_node'], indent=2)}
</NEW ENTITY> </NEW ENTITY>
<ENTITY TYPE DESCRIPTION> <ENTITY TYPE DESCRIPTION>
{to_prompt_json(context['entity_type_description'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['entity_type_description'], indent=2)}
</ENTITY TYPE DESCRIPTION> </ENTITY TYPE DESCRIPTION>
<EXISTING ENTITIES> <EXISTING ENTITIES>
{to_prompt_json(context['existing_nodes'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['existing_nodes'], indent=2)}
</EXISTING ENTITIES> </EXISTING ENTITIES>
Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
@ -125,7 +125,7 @@ def nodes(context: dict[str, Any]) -> list[Message]:
role='user', role='user',
content=f""" content=f"""
<PREVIOUS MESSAGES> <PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS MESSAGES> </PREVIOUS MESSAGES>
<CURRENT MESSAGE> <CURRENT MESSAGE>
{context['episode_content']} {context['episode_content']}
@ -142,11 +142,11 @@ def nodes(context: dict[str, Any]) -> list[Message]:
}} }}
<ENTITIES> <ENTITIES>
{to_prompt_json(context['extracted_nodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['extracted_nodes'], indent=2)}
</ENTITIES> </ENTITIES>
<EXISTING ENTITIES> <EXISTING ENTITIES>
{to_prompt_json(context['existing_nodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['existing_nodes'], indent=2)}
</EXISTING ENTITIES> </EXISTING ENTITIES>
Each entry in EXISTING ENTITIES is an object with the following structure: Each entry in EXISTING ENTITIES is an object with the following structure:
@ -197,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
Given the following context, deduplicate a list of nodes: Given the following context, deduplicate a list of nodes:
Nodes: Nodes:
{to_prompt_json(context['nodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['nodes'], indent=2)}
Task: Task:
1. Group nodes together such that all duplicate nodes are in the same list of uuids 1. Group nodes together such that all duplicate nodes are in the same list of uuids

View file

@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
that maintains the relevant context? that maintains the relevant context?
<QUESTION> <QUESTION>
{to_prompt_json(context['query'], ensure_ascii=context.get('ensure_ascii', False))} {to_prompt_json(context['query'])}
</QUESTION> </QUESTION>
""" """
return [ return [
@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
Your task is to briefly answer the question in the way that you think Alice would answer the question. Your task is to briefly answer the question in the way that you think Alice would answer the question.
You are given the following entity summaries and facts to help you determine the answer to your question. You are given the following entity summaries and facts to help you determine the answer to your question.
<ENTITY_SUMMARIES> <ENTITY_SUMMARIES>
{to_prompt_json(context['entity_summaries'], ensure_ascii=context.get('ensure_ascii', False))} {to_prompt_json(context['entity_summaries'])}
</ENTITY_SUMMARIES> </ENTITY_SUMMARIES>
<FACTS> <FACTS>
{to_prompt_json(context['facts'], ensure_ascii=context.get('ensure_ascii', False))} {to_prompt_json(context['facts'])}
</FACTS> </FACTS>
<QUESTION> <QUESTION>
{context['query']} {context['query']}

View file

@ -73,7 +73,7 @@ def edge(context: dict[str, Any]) -> list[Message]:
</FACT TYPES> </FACT TYPES>
<PREVIOUS_MESSAGES> <PREVIOUS_MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS_MESSAGES> </PREVIOUS_MESSAGES>
<CURRENT_MESSAGE> <CURRENT_MESSAGE>
@ -133,7 +133,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
user_prompt = f""" user_prompt = f"""
<PREVIOUS MESSAGES> <PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS MESSAGES> </PREVIOUS MESSAGES>
<CURRENT MESSAGE> <CURRENT MESSAGE>
{context['episode_content']} {context['episode_content']}
@ -167,7 +167,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
content=f""" content=f"""
<MESSAGE> <MESSAGE>
{to_prompt_json(context['episode_content'], ensure_ascii=context.get('ensure_ascii', False), indent=2)} {to_prompt_json(context['episode_content'], indent=2)}
</MESSAGE> </MESSAGE>
<REFERENCE TIME> <REFERENCE TIME>
{context['reference_time']} {context['reference_time']}

View file

@ -89,7 +89,7 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
</ENTITY TYPES> </ENTITY TYPES>
<PREVIOUS MESSAGES> <PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS MESSAGES> </PREVIOUS MESSAGES>
<CURRENT MESSAGE> <CURRENT MESSAGE>
@ -197,7 +197,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
user_prompt = f""" user_prompt = f"""
<PREVIOUS MESSAGES> <PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS MESSAGES> </PREVIOUS MESSAGES>
<CURRENT MESSAGE> <CURRENT MESSAGE>
{context['episode_content']} {context['episode_content']}
@ -221,7 +221,7 @@ def classify_nodes(context: dict[str, Any]) -> list[Message]:
user_prompt = f""" user_prompt = f"""
<PREVIOUS MESSAGES> <PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json([ep for ep in context['previous_episodes']], indent=2)}
</PREVIOUS MESSAGES> </PREVIOUS MESSAGES>
<CURRENT MESSAGE> <CURRENT MESSAGE>
{context['episode_content']} {context['episode_content']}
@ -259,8 +259,8 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
content=f""" content=f"""
<MESSAGES> <MESSAGES>
{to_prompt_json(context['previous_episodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['previous_episodes'], indent=2)}
{to_prompt_json(context['episode_content'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['episode_content'], indent=2)}
</MESSAGES> </MESSAGES>
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
@ -289,8 +289,8 @@ def extract_summary(context: dict[str, Any]) -> list[Message]:
content=f""" content=f"""
<MESSAGES> <MESSAGES>
{to_prompt_json(context['previous_episodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['previous_episodes'], indent=2)}
{to_prompt_json(context['episode_content'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['episode_content'], indent=2)}
</MESSAGES> </MESSAGES>
Given the above MESSAGES and the following ENTITY, update the summary that combines relevant information about the entity Given the above MESSAGES and the following ENTITY, update the summary that combines relevant information about the entity

View file

@ -4,20 +4,20 @@ from typing import Any
DO_NOT_ESCAPE_UNICODE = '\nDo not escape unicode characters.\n' DO_NOT_ESCAPE_UNICODE = '\nDo not escape unicode characters.\n'
def to_prompt_json(data: Any, ensure_ascii: bool = True, indent: int = 2) -> str: def to_prompt_json(data: Any, ensure_ascii: bool = False, indent: int = 2) -> str:
""" """
Serialize data to JSON for use in prompts. Serialize data to JSON for use in prompts.
Args: Args:
data: The data to serialize data: The data to serialize
ensure_ascii: If True, escape non-ASCII characters. If False, preserve them. ensure_ascii: If True, escape non-ASCII characters. If False (default), preserve them.
indent: Number of spaces for indentation indent: Number of spaces for indentation
Returns: Returns:
JSON string representation of the data JSON string representation of the data
Notes: Notes:
When ensure_ascii=False, non-ASCII characters (e.g., Korean, Japanese, Chinese) By default (ensure_ascii=False), non-ASCII characters (e.g., Korean, Japanese, Chinese)
are preserved in their original form in the prompt, making them readable are preserved in their original form in the prompt, making them readable
in LLM logs and improving model understanding. in LLM logs and improving model understanding.
""" """

View file

@ -59,7 +59,7 @@ def summarize_pair(context: dict[str, Any]) -> list[Message]:
Summaries must be under 250 words. Summaries must be under 250 words.
Summaries: Summaries:
{to_prompt_json(context['node_summaries'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['node_summaries'], indent=2)}
""", """,
), ),
] ]
@ -76,8 +76,8 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
content=f""" content=f"""
<MESSAGES> <MESSAGES>
{to_prompt_json(context['previous_episodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['previous_episodes'], indent=2)}
{to_prompt_json(context['episode_content'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['episode_content'], indent=2)}
</MESSAGES> </MESSAGES>
Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use Given the above MESSAGES and the following ENTITY name, create a summary for the ENTITY. Your summary must only use
@ -100,7 +100,7 @@ def summarize_context(context: dict[str, Any]) -> list[Message]:
</ENTITY CONTEXT> </ENTITY CONTEXT>
<ATTRIBUTES> <ATTRIBUTES>
{to_prompt_json(context['attributes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['attributes'], indent=2)}
</ATTRIBUTES> </ATTRIBUTES>
""", """,
), ),
@ -120,7 +120,7 @@ def summary_description(context: dict[str, Any]) -> list[Message]:
Summaries must be under 250 words. Summaries must be under 250 words.
Summary: Summary:
{to_prompt_json(context['summary'], ensure_ascii=context.get('ensure_ascii', True), indent=2)} {to_prompt_json(context['summary'], indent=2)}
""", """,
), ),
] ]

View file

@ -24,9 +24,7 @@ def format_edge_date_range(edge: EntityEdge) -> str:
return f'{edge.valid_at if edge.valid_at else "date unknown"} - {(edge.invalid_at if edge.invalid_at else "present")}' return f'{edge.valid_at if edge.valid_at else "date unknown"} - {(edge.invalid_at if edge.invalid_at else "present")}'
def search_results_to_context_string( def search_results_to_context_string(search_results: SearchResults) -> str:
search_results: SearchResults, ensure_ascii: bool = False
) -> str:
"""Reformats a set of SearchResults into a single string to pass directly to an LLM as context""" """Reformats a set of SearchResults into a single string to pass directly to an LLM as context"""
fact_json = [ fact_json = [
{ {
@ -58,16 +56,16 @@ def search_results_to_context_string(
These are the most relevant facts and their valid and invalid dates. Facts are considered valid These are the most relevant facts and their valid and invalid dates. Facts are considered valid
between their valid_at and invalid_at dates. Facts with an invalid_at date of "Present" are considered valid. between their valid_at and invalid_at dates. Facts with an invalid_at date of "Present" are considered valid.
<FACTS> <FACTS>
{to_prompt_json(fact_json, ensure_ascii=ensure_ascii, indent=12)} {to_prompt_json(fact_json, indent=12)}
</FACTS> </FACTS>
<ENTITIES> <ENTITIES>
{to_prompt_json(entity_json, ensure_ascii=ensure_ascii, indent=12)} {to_prompt_json(entity_json, indent=12)}
</ENTITIES> </ENTITIES>
<EPISODES> <EPISODES>
{to_prompt_json(episode_json, ensure_ascii=ensure_ascii, indent=12)} {to_prompt_json(episode_json, indent=12)}
</EPISODES> </EPISODES>
<COMMUNITIES> <COMMUNITIES>
{to_prompt_json(community_json, ensure_ascii=ensure_ascii, indent=12)} {to_prompt_json(community_json, indent=12)}
</COMMUNITIES> </COMMUNITIES>
""" """

View file

@ -479,7 +479,6 @@ async def dedupe_edges_bulk(
episode, episode,
edge_types, edge_types,
set(edge_types), set(edge_types),
clients.ensure_ascii,
) )
for episode, edge, candidates in dedupe_tuples for episode, edge, candidates in dedupe_tuples
] ]

View file

@ -131,13 +131,10 @@ def label_propagation(projection: dict[str, list[Neighbor]]) -> list[list[str]]:
return clusters return clusters
async def summarize_pair( async def summarize_pair(llm_client: LLMClient, summary_pair: tuple[str, str]) -> str:
llm_client: LLMClient, summary_pair: tuple[str, str], ensure_ascii: bool = True
) -> str:
# Prepare context for LLM # Prepare context for LLM
context = { context = {
'node_summaries': [{'summary': summary} for summary in summary_pair], 'node_summaries': [{'summary': summary} for summary in summary_pair],
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(
@ -149,12 +146,9 @@ async def summarize_pair(
return pair_summary return pair_summary
async def generate_summary_description( async def generate_summary_description(llm_client: LLMClient, summary: str) -> str:
llm_client: LLMClient, summary: str, ensure_ascii: bool = True
) -> str:
context = { context = {
'summary': summary, 'summary': summary,
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(
@ -168,7 +162,7 @@ async def generate_summary_description(
async def build_community( async def build_community(
llm_client: LLMClient, community_cluster: list[EntityNode], ensure_ascii: bool = True llm_client: LLMClient, community_cluster: list[EntityNode]
) -> tuple[CommunityNode, list[CommunityEdge]]: ) -> tuple[CommunityNode, list[CommunityEdge]]:
summaries = [entity.summary for entity in community_cluster] summaries = [entity.summary for entity in community_cluster]
length = len(summaries) length = len(summaries)
@ -180,9 +174,7 @@ async def build_community(
new_summaries: list[str] = list( new_summaries: list[str] = list(
await semaphore_gather( await semaphore_gather(
*[ *[
summarize_pair( summarize_pair(llm_client, (str(left_summary), str(right_summary)))
llm_client, (str(left_summary), str(right_summary)), ensure_ascii
)
for left_summary, right_summary in zip( for left_summary, right_summary in zip(
summaries[: int(length / 2)], summaries[int(length / 2) :], strict=False summaries[: int(length / 2)], summaries[int(length / 2) :], strict=False
) )
@ -195,7 +187,7 @@ async def build_community(
length = len(summaries) length = len(summaries)
summary = summaries[0] summary = summaries[0]
name = await generate_summary_description(llm_client, summary, ensure_ascii) name = await generate_summary_description(llm_client, summary)
now = utc_now() now = utc_now()
community_node = CommunityNode( community_node = CommunityNode(
name=name, name=name,
@ -215,7 +207,6 @@ async def build_communities(
driver: GraphDriver, driver: GraphDriver,
llm_client: LLMClient, llm_client: LLMClient,
group_ids: list[str] | None, group_ids: list[str] | None,
ensure_ascii: bool = True,
) -> tuple[list[CommunityNode], list[CommunityEdge]]: ) -> tuple[list[CommunityNode], list[CommunityEdge]]:
community_clusters = await get_community_clusters(driver, group_ids) community_clusters = await get_community_clusters(driver, group_ids)
@ -223,7 +214,7 @@ async def build_communities(
async def limited_build_community(cluster): async def limited_build_community(cluster):
async with semaphore: async with semaphore:
return await build_community(llm_client, cluster, ensure_ascii) return await build_community(llm_client, cluster)
communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list( communities: list[tuple[CommunityNode, list[CommunityEdge]]] = list(
await semaphore_gather( await semaphore_gather(
@ -312,17 +303,14 @@ async def update_community(
llm_client: LLMClient, llm_client: LLMClient,
embedder: EmbedderClient, embedder: EmbedderClient,
entity: EntityNode, entity: EntityNode,
ensure_ascii: bool = True,
) -> tuple[list[CommunityNode], list[CommunityEdge]]: ) -> tuple[list[CommunityNode], list[CommunityEdge]]:
community, is_new = await determine_entity_community(driver, entity) community, is_new = await determine_entity_community(driver, entity)
if community is None: if community is None:
return [], [] return [], []
new_summary = await summarize_pair( new_summary = await summarize_pair(llm_client, (entity.summary, community.summary))
llm_client, (entity.summary, community.summary), ensure_ascii new_name = await generate_summary_description(llm_client, new_summary)
)
new_name = await generate_summary_description(llm_client, new_summary, ensure_ascii)
community.summary = new_summary community.summary = new_summary
community.name = new_name community.name = new_name

View file

@ -130,7 +130,6 @@ async def extract_edges(
'reference_time': episode.valid_at, 'reference_time': episode.valid_at,
'edge_types': edge_types_context, 'edge_types': edge_types_context,
'custom_prompt': '', 'custom_prompt': '',
'ensure_ascii': clients.ensure_ascii,
} }
facts_missed = True facts_missed = True
@ -358,7 +357,6 @@ async def resolve_extracted_edges(
episode, episode,
extracted_edge_types, extracted_edge_types,
custom_type_names, custom_type_names,
clients.ensure_ascii,
) )
for extracted_edge, related_edges, existing_edges, extracted_edge_types in zip( for extracted_edge, related_edges, existing_edges, extracted_edge_types in zip(
extracted_edges, extracted_edges,
@ -431,7 +429,6 @@ async def resolve_extracted_edge(
episode: EpisodicNode, episode: EpisodicNode,
edge_type_candidates: dict[str, type[BaseModel]] | None = None, edge_type_candidates: dict[str, type[BaseModel]] | None = None,
custom_edge_type_names: set[str] | None = None, custom_edge_type_names: set[str] | None = None,
ensure_ascii: bool = True,
) -> tuple[EntityEdge, list[EntityEdge], list[EntityEdge]]: ) -> tuple[EntityEdge, list[EntityEdge], list[EntityEdge]]:
"""Resolve an extracted edge against existing graph context. """Resolve an extracted edge against existing graph context.
@ -453,8 +450,6 @@ async def resolve_extracted_edge(
Full catalog of registered custom edge names. Used to distinguish Full catalog of registered custom edge names. Used to distinguish
between disallowed custom types (which fall back to the default label) between disallowed custom types (which fall back to the default label)
and ad-hoc labels emitted by the LLM. and ad-hoc labels emitted by the LLM.
ensure_ascii : bool
Whether prompt payloads should coerce ASCII output.
Returns Returns
------- -------
@ -504,7 +499,6 @@ async def resolve_extracted_edge(
'new_edge': extracted_edge.fact, 'new_edge': extracted_edge.fact,
'edge_invalidation_candidates': invalidation_edge_candidates_context, 'edge_invalidation_candidates': invalidation_edge_candidates_context,
'edge_types': edge_types_context, 'edge_types': edge_types_context,
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(
@ -548,7 +542,6 @@ async def resolve_extracted_edge(
'episode_content': episode.content, 'episode_content': episode.content,
'reference_time': episode.valid_at, 'reference_time': episode.valid_at,
'fact': resolved_edge.fact, 'fact': resolved_edge.fact,
'ensure_ascii': ensure_ascii,
} }
edge_model = edge_type_candidates.get(fact_type) if edge_type_candidates else None edge_model = edge_type_candidates.get(fact_type) if edge_type_candidates else None

View file

@ -64,14 +64,12 @@ async def extract_nodes_reflexion(
episode: EpisodicNode, episode: EpisodicNode,
previous_episodes: list[EpisodicNode], previous_episodes: list[EpisodicNode],
node_names: list[str], node_names: list[str],
ensure_ascii: bool = False,
) -> list[str]: ) -> list[str]:
# Prepare context for LLM # Prepare context for LLM
context = { context = {
'episode_content': episode.content, 'episode_content': episode.content,
'previous_episodes': [ep.content for ep in previous_episodes], 'previous_episodes': [ep.content for ep in previous_episodes],
'extracted_entities': node_names, 'extracted_entities': node_names,
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(
@ -124,7 +122,6 @@ async def extract_nodes(
'custom_prompt': custom_prompt, 'custom_prompt': custom_prompt,
'entity_types': entity_types_context, 'entity_types': entity_types_context,
'source_description': episode.source_description, 'source_description': episode.source_description,
'ensure_ascii': clients.ensure_ascii,
} }
while entities_missed and reflexion_iterations <= MAX_REFLEXION_ITERATIONS: while entities_missed and reflexion_iterations <= MAX_REFLEXION_ITERATIONS:
@ -155,7 +152,6 @@ async def extract_nodes(
episode, episode,
previous_episodes, previous_episodes,
[entity.name for entity in extracted_entities], [entity.name for entity in extracted_entities],
clients.ensure_ascii,
) )
entities_missed = len(missing_entities) != 0 entities_missed = len(missing_entities) != 0
@ -239,7 +235,6 @@ async def _resolve_with_llm(
extracted_nodes: list[EntityNode], extracted_nodes: list[EntityNode],
indexes: DedupCandidateIndexes, indexes: DedupCandidateIndexes,
state: DedupResolutionState, state: DedupResolutionState,
ensure_ascii: bool,
episode: EpisodicNode | None, episode: EpisodicNode | None,
previous_episodes: list[EpisodicNode] | None, previous_episodes: list[EpisodicNode] | None,
entity_types: dict[str, type[BaseModel]] | None, entity_types: dict[str, type[BaseModel]] | None,
@ -309,7 +304,6 @@ async def _resolve_with_llm(
'previous_episodes': ( 'previous_episodes': (
[ep.content for ep in previous_episodes] if previous_episodes is not None else [] [ep.content for ep in previous_episodes] if previous_episodes is not None else []
), ),
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(
@ -416,7 +410,6 @@ async def resolve_extracted_nodes(
extracted_nodes, extracted_nodes,
indexes, indexes,
state, state,
clients.ensure_ascii,
episode, episode,
previous_episodes, previous_episodes,
entity_types, entity_types,
@ -465,7 +458,6 @@ async def extract_attributes_from_nodes(
if entity_types is not None if entity_types is not None
else None else None
), ),
clients.ensure_ascii,
should_summarize_node, should_summarize_node,
) )
for node in nodes for node in nodes
@ -483,7 +475,6 @@ async def extract_attributes_from_node(
episode: EpisodicNode | None = None, episode: EpisodicNode | None = None,
previous_episodes: list[EpisodicNode] | None = None, previous_episodes: list[EpisodicNode] | None = None,
entity_type: type[BaseModel] | None = None, entity_type: type[BaseModel] | None = None,
ensure_ascii: bool = False,
should_summarize_node: NodeSummaryFilter | None = None, should_summarize_node: NodeSummaryFilter | None = None,
) -> EntityNode: ) -> EntityNode:
node_context: dict[str, Any] = { node_context: dict[str, Any] = {
@ -499,7 +490,6 @@ async def extract_attributes_from_node(
'previous_episodes': ( 'previous_episodes': (
[ep.content for ep in previous_episodes] if previous_episodes is not None else [] [ep.content for ep in previous_episodes] if previous_episodes is not None else []
), ),
'ensure_ascii': ensure_ascii,
} }
summary_context: dict[str, Any] = { summary_context: dict[str, Any] = {
@ -508,7 +498,6 @@ async def extract_attributes_from_node(
'previous_episodes': ( 'previous_episodes': (
[ep.content for ep in previous_episodes] if previous_episodes is not None else [] [ep.content for ep in previous_episodes] if previous_episodes is not None else []
), ),
'ensure_ascii': ensure_ascii,
} }
has_entity_attributes: bool = bool( has_entity_attributes: bool = bool(

View file

@ -35,14 +35,12 @@ async def extract_edge_dates(
edge: EntityEdge, edge: EntityEdge,
current_episode: EpisodicNode, current_episode: EpisodicNode,
previous_episodes: list[EpisodicNode], previous_episodes: list[EpisodicNode],
ensure_ascii: bool = False,
) -> tuple[datetime | None, datetime | None]: ) -> tuple[datetime | None, datetime | None]:
context = { context = {
'edge_fact': edge.fact, 'edge_fact': edge.fact,
'current_episode': current_episode.content, 'current_episode': current_episode.content,
'previous_episodes': [ep.content for ep in previous_episodes], 'previous_episodes': [ep.content for ep in previous_episodes],
'reference_timestamp': current_episode.valid_at.isoformat(), 'reference_timestamp': current_episode.valid_at.isoformat(),
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(
prompt_library.extract_edge_dates.v1(context), response_model=EdgeDates prompt_library.extract_edge_dates.v1(context), response_model=EdgeDates
@ -75,7 +73,6 @@ async def get_edge_contradictions(
llm_client: LLMClient, llm_client: LLMClient,
new_edge: EntityEdge, new_edge: EntityEdge,
existing_edges: list[EntityEdge], existing_edges: list[EntityEdge],
ensure_ascii: bool = False,
) -> list[EntityEdge]: ) -> list[EntityEdge]:
start = time() start = time()
@ -87,7 +84,6 @@ async def get_edge_contradictions(
context = { context = {
'new_edge': new_edge_context, 'new_edge': new_edge_context,
'existing_edges': existing_edge_context, 'existing_edges': existing_edge_context,
'ensure_ascii': ensure_ascii,
} }
llm_response = await llm_client.generate_response( llm_response = await llm_client.generate_response(

View file

@ -34,7 +34,6 @@ def _make_clients() -> GraphitiClients:
embedder=embedder, embedder=embedder,
cross_encoder=cross_encoder, cross_encoder=cross_encoder,
llm_client=llm_client, llm_client=llm_client,
ensure_ascii=False,
) )
@ -260,7 +259,6 @@ async def test_dedupe_edges_bulk_deduplicates_within_episode(monkeypatch):
episode, episode,
edge_type_candidates=None, edge_type_candidates=None,
custom_edge_type_names=None, custom_edge_type_names=None,
ensure_ascii=False,
): ):
# Track that this edge was compared against the related_edges # Track that this edge was compared against the related_edges
comparisons_made.append((extracted_edge.uuid, [r.uuid for r in related_edges])) comparisons_made.append((extracted_edge.uuid, [r.uuid for r in related_edges]))

View file

@ -143,7 +143,6 @@ async def test_resolve_extracted_edge_exact_fact_short_circuit(
mock_existing_edges, mock_existing_edges,
mock_current_episode, mock_current_episode,
edge_type_candidates=None, edge_type_candidates=None,
ensure_ascii=True,
) )
assert resolved_edge is related_edges[0] assert resolved_edge is related_edges[0]
@ -184,7 +183,6 @@ async def test_resolve_extracted_edges_resets_unmapped_names(monkeypatch):
llm_client=llm_client, llm_client=llm_client,
embedder=MagicMock(), embedder=MagicMock(),
cross_encoder=MagicMock(), cross_encoder=MagicMock(),
ensure_ascii=True,
) )
source_node = EntityNode( source_node = EntityNode(
@ -265,7 +263,6 @@ async def test_resolve_extracted_edges_keeps_unknown_names(monkeypatch):
llm_client=llm_client, llm_client=llm_client,
embedder=MagicMock(), embedder=MagicMock(),
cross_encoder=MagicMock(), cross_encoder=MagicMock(),
ensure_ascii=True,
) )
source_node = EntityNode( source_node = EntityNode(
@ -369,7 +366,6 @@ async def test_resolve_extracted_edge_rejects_unmapped_fact_type(mock_llm_client
episode, episode,
edge_type_candidates={}, edge_type_candidates={},
custom_edge_type_names={'OCCURRED_AT'}, custom_edge_type_names={'OCCURRED_AT'},
ensure_ascii=True,
) )
assert resolved_edge.name == DEFAULT_EDGE_NAME assert resolved_edge.name == DEFAULT_EDGE_NAME
@ -427,7 +423,6 @@ async def test_resolve_extracted_edge_accepts_unknown_fact_type(mock_llm_client)
episode, episode,
edge_type_candidates={'OCCURRED_AT': OccurredAtEdge}, edge_type_candidates={'OCCURRED_AT': OccurredAtEdge},
custom_edge_type_names={'OCCURRED_AT'}, custom_edge_type_names={'OCCURRED_AT'},
ensure_ascii=True,
) )
assert resolved_edge.name == 'INTERACTED_WITH' assert resolved_edge.name == 'INTERACTED_WITH'
@ -515,7 +510,6 @@ async def test_resolve_extracted_edge_uses_integer_indices_for_duplicates(mock_l
episode, episode,
edge_type_candidates=None, edge_type_candidates=None,
custom_edge_type_names=set(), custom_edge_type_names=set(),
ensure_ascii=True,
) )
# Verify LLM was called # Verify LLM was called
@ -553,7 +547,6 @@ async def test_resolve_extracted_edges_fast_path_deduplication(monkeypatch):
episode, episode,
edge_type_candidates=None, edge_type_candidates=None,
custom_edge_type_names=None, custom_edge_type_names=None,
ensure_ascii=False,
): ):
nonlocal resolve_call_count nonlocal resolve_call_count
resolve_call_count += 1 resolve_call_count += 1
@ -576,7 +569,6 @@ async def test_resolve_extracted_edges_fast_path_deduplication(monkeypatch):
llm_client=llm_client, llm_client=llm_client,
embedder=MagicMock(), embedder=MagicMock(),
cross_encoder=MagicMock(), cross_encoder=MagicMock(),
ensure_ascii=True,
) )
source_node = EntityNode( source_node = EntityNode(

View file

@ -46,7 +46,6 @@ def _make_clients():
embedder=embedder, embedder=embedder,
cross_encoder=cross_encoder, cross_encoder=cross_encoder,
llm_client=llm_client, llm_client=llm_client,
ensure_ascii=False,
) )
return clients, llm_generate return clients, llm_generate
@ -335,7 +334,6 @@ async def test_resolve_with_llm_updates_unresolved(monkeypatch):
[extracted], [extracted],
indexes, indexes,
state, state,
ensure_ascii=False,
episode=_make_episode(), episode=_make_episode(),
previous_episodes=[], previous_episodes=[],
entity_types=None, entity_types=None,
@ -380,7 +378,6 @@ async def test_resolve_with_llm_ignores_out_of_range_relative_ids(monkeypatch, c
[extracted], [extracted],
indexes, indexes,
state, state,
ensure_ascii=False,
episode=_make_episode(), episode=_make_episode(),
previous_episodes=[], previous_episodes=[],
entity_types=None, entity_types=None,
@ -428,7 +425,6 @@ async def test_resolve_with_llm_ignores_duplicate_relative_ids(monkeypatch):
[extracted], [extracted],
indexes, indexes,
state, state,
ensure_ascii=False,
episode=_make_episode(), episode=_make_episode(),
previous_episodes=[], previous_episodes=[],
entity_types=None, entity_types=None,
@ -470,7 +466,6 @@ async def test_resolve_with_llm_invalid_duplicate_idx_defaults_to_extracted(monk
[extracted], [extracted],
indexes, indexes,
state, state,
ensure_ascii=False,
episode=_make_episode(), episode=_make_episode(),
previous_episodes=[], previous_episodes=[],
entity_types=None, entity_types=None,
@ -498,7 +493,6 @@ async def test_extract_attributes_without_callback_generates_summary():
episode=episode, episode=episode,
previous_episodes=[], previous_episodes=[],
entity_type=None, entity_type=None,
ensure_ascii=False,
should_summarize_node=None, # No callback provided should_summarize_node=None, # No callback provided
) )
@ -529,7 +523,6 @@ async def test_extract_attributes_with_callback_skip_summary():
episode=episode, episode=episode,
previous_episodes=[], previous_episodes=[],
entity_type=None, entity_type=None,
ensure_ascii=False,
should_summarize_node=skip_summary_filter, should_summarize_node=skip_summary_filter,
) )
@ -560,7 +553,6 @@ async def test_extract_attributes_with_callback_generate_summary():
episode=episode, episode=episode,
previous_episodes=[], previous_episodes=[],
entity_type=None, entity_type=None,
ensure_ascii=False,
should_summarize_node=generate_summary_filter, should_summarize_node=generate_summary_filter,
) )
@ -595,7 +587,6 @@ async def test_extract_attributes_with_selective_callback():
episode=episode, episode=episode,
previous_episodes=[], previous_episodes=[],
entity_type=None, entity_type=None,
ensure_ascii=False,
should_summarize_node=selective_filter, should_summarize_node=selective_filter,
) )
@ -605,7 +596,6 @@ async def test_extract_attributes_with_selective_callback():
episode=episode, episode=episode,
previous_episodes=[], previous_episodes=[],
entity_type=None, entity_type=None,
ensure_ascii=False,
should_summarize_node=selective_filter, should_summarize_node=selective_filter,
) )