graphiti/graphiti_core/search/search_helpers.py
HUGO SON ce9ef3ca79
Add support for non-ASCII characters in LLM prompts (#805)
* Add support for non-ASCII characters in LLM prompts

- Add ensure_ascii parameter to Graphiti class (default: True)
- Create to_prompt_json helper function for consistent JSON serialization
- Update all prompt files to use new helper function
- Preserve Korean/Japanese/Chinese characters when ensure_ascii=False
- Maintain backward compatibility with existing behavior

Fixes issue where non-ASCII characters were escaped as unicode sequences
in prompts, making them unreadable in LLM logs and potentially affecting
model understanding.

* Remove unused json imports after replacing with to_prompt_json helper

- Fix ruff lint errors (F401) for unused json imports
- All prompt files now use to_prompt_json helper instead of json.dumps
- Maintains clean code style and passes lint checks

* Fix ensure_ascii propagation to all LLM calls

- Add ensure_ascii parameter to maintenance operation functions that were missing it
- Update function signatures in node_operations, community_operations, temporal_operations, and edge_operations
- Ensure all llm_client.generate_response calls receive proper ensure_ascii context
- Fix hardcoded ensure_ascii: True values that prevented non-ASCII character preservation
- Maintain backward compatibility with default ensure_ascii=True
- Complete the fix for issue #804 ensuring Korean/Japanese/Chinese characters are properly handled in LLM prompts
2025-08-08 11:07:32 -04:00

74 lines
2.8 KiB
Python

"""
Copyright 2024, Zep Software, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from graphiti_core.edges import EntityEdge
from graphiti_core.prompts.prompt_helpers import to_prompt_json
from graphiti_core.search.search_config import SearchResults
def format_edge_date_range(edge: EntityEdge) -> str:
# return f"{datetime(edge.valid_at).strftime('%Y-%m-%d %H:%M:%S') if edge.valid_at else 'date unknown'} - {(edge.invalid_at.strftime('%Y-%m-%d %H:%M:%S') if edge.invalid_at else 'present')}"
return f'{edge.valid_at if edge.valid_at else "date unknown"} - {(edge.invalid_at if edge.invalid_at else "present")}'
def search_results_to_context_string(
search_results: SearchResults, ensure_ascii: bool = True
) -> str:
"""Reformats a set of SearchResults into a single string to pass directly to an LLM as context"""
fact_json = [
{
'fact': edge.fact,
'valid_at': str(edge.valid_at),
'invalid_at': str(edge.invalid_at or 'Present'),
}
for edge in search_results.edges
]
entity_json = [
{'entity_name': node.name, 'summary': node.summary} for node in search_results.nodes
]
episode_json = [
{
'source_description': episode.source_description,
'content': episode.content,
}
for episode in search_results.episodes
]
community_json = [
{'community_name': community.name, 'summary': community.summary}
for community in search_results.communities
]
context_string = f"""
FACTS and ENTITIES represent relevant context to the current conversation.
COMMUNITIES represent a cluster of closely related entities.
These are the most relevant facts and their valid and invalid dates. Facts are considered valid
between their valid_at and invalid_at dates. Facts with an invalid_at date of "Present" are considered valid.
<FACTS>
{to_prompt_json(fact_json, ensure_ascii=ensure_ascii, indent=12)}
</FACTS>
<ENTITIES>
{to_prompt_json(entity_json, ensure_ascii=ensure_ascii, indent=12)}
</ENTITIES>
<EPISODES>
{to_prompt_json(episode_json, ensure_ascii=ensure_ascii, indent=12)}
</EPISODES>
<COMMUNITIES>
{to_prompt_json(community_json, ensure_ascii=ensure_ascii, indent=12)}
</COMMUNITIES>
"""
return context_string