* Add support for non-ASCII characters in LLM prompts - Add ensure_ascii parameter to Graphiti class (default: True) - Create to_prompt_json helper function for consistent JSON serialization - Update all prompt files to use new helper function - Preserve Korean/Japanese/Chinese characters when ensure_ascii=False - Maintain backward compatibility with existing behavior Fixes issue where non-ASCII characters were escaped as unicode sequences in prompts, making them unreadable in LLM logs and potentially affecting model understanding. * Remove unused json imports after replacing with to_prompt_json helper - Fix ruff lint errors (F401) for unused json imports - All prompt files now use to_prompt_json helper instead of json.dumps - Maintains clean code style and passes lint checks * Fix ensure_ascii propagation to all LLM calls - Add ensure_ascii parameter to maintenance operation functions that were missing it - Update function signatures in node_operations, community_operations, temporal_operations, and edge_operations - Ensure all llm_client.generate_response calls receive proper ensure_ascii context - Fix hardcoded ensure_ascii: True values that prevented non-ASCII character preservation - Maintain backward compatibility with default ensure_ascii=True - Complete the fix for issue #804 ensuring Korean/Japanese/Chinese characters are properly handled in LLM prompts
74 lines
2.8 KiB
Python
74 lines
2.8 KiB
Python
"""
|
|
Copyright 2024, Zep Software, Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
from graphiti_core.edges import EntityEdge
|
|
from graphiti_core.prompts.prompt_helpers import to_prompt_json
|
|
from graphiti_core.search.search_config import SearchResults
|
|
|
|
|
|
def format_edge_date_range(edge: EntityEdge) -> str:
|
|
# return f"{datetime(edge.valid_at).strftime('%Y-%m-%d %H:%M:%S') if edge.valid_at else 'date unknown'} - {(edge.invalid_at.strftime('%Y-%m-%d %H:%M:%S') if edge.invalid_at else 'present')}"
|
|
return f'{edge.valid_at if edge.valid_at else "date unknown"} - {(edge.invalid_at if edge.invalid_at else "present")}'
|
|
|
|
|
|
def search_results_to_context_string(
|
|
search_results: SearchResults, ensure_ascii: bool = True
|
|
) -> str:
|
|
"""Reformats a set of SearchResults into a single string to pass directly to an LLM as context"""
|
|
fact_json = [
|
|
{
|
|
'fact': edge.fact,
|
|
'valid_at': str(edge.valid_at),
|
|
'invalid_at': str(edge.invalid_at or 'Present'),
|
|
}
|
|
for edge in search_results.edges
|
|
]
|
|
entity_json = [
|
|
{'entity_name': node.name, 'summary': node.summary} for node in search_results.nodes
|
|
]
|
|
episode_json = [
|
|
{
|
|
'source_description': episode.source_description,
|
|
'content': episode.content,
|
|
}
|
|
for episode in search_results.episodes
|
|
]
|
|
community_json = [
|
|
{'community_name': community.name, 'summary': community.summary}
|
|
for community in search_results.communities
|
|
]
|
|
|
|
context_string = f"""
|
|
FACTS and ENTITIES represent relevant context to the current conversation.
|
|
COMMUNITIES represent a cluster of closely related entities.
|
|
|
|
These are the most relevant facts and their valid and invalid dates. Facts are considered valid
|
|
between their valid_at and invalid_at dates. Facts with an invalid_at date of "Present" are considered valid.
|
|
<FACTS>
|
|
{to_prompt_json(fact_json, ensure_ascii=ensure_ascii, indent=12)}
|
|
</FACTS>
|
|
<ENTITIES>
|
|
{to_prompt_json(entity_json, ensure_ascii=ensure_ascii, indent=12)}
|
|
</ENTITIES>
|
|
<EPISODES>
|
|
{to_prompt_json(episode_json, ensure_ascii=ensure_ascii, indent=12)}
|
|
</EPISODES>
|
|
<COMMUNITIES>
|
|
{to_prompt_json(community_json, ensure_ascii=ensure_ascii, indent=12)}
|
|
</COMMUNITIES>
|
|
"""
|
|
|
|
return context_string
|