Refactor prompt structure: move MESSAGES after instructions (#980)

* Refactor prompt structure: move MESSAGES after instructions

Reordered prompt structure in extract_nodes.py to place MESSAGES section
after instructions/guidelines in both extract_attributes and extract_summary
functions for improved prompt clarity.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Add sentence-aware text truncator for entity summaries

- Created truncate_at_sentence() utility function that truncates text at
  sentence boundaries while respecting max character limits
- Added MAX_SUMMARY_CHARS constant (250 chars) for entity summaries
- Applied truncator to entity summaries in prompts (extract_nodes.py)
- Applied truncator to LLM-generated summaries (node_operations.py)
- Added comprehensive test suite for truncation logic

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Clean up formatting in extract_attributes prompt

- Remove extra blank lines
- Fix indentation of MESSAGES tag

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Bump version to 0.22.0pre3

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Daniel Chalef 2025-10-04 19:06:32 -07:00 committed by GitHub
parent 896cb4e990
commit 8770012745
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 176 additions and 16 deletions

View file

@ -18,6 +18,8 @@ from typing import Any, Protocol, TypedDict
from pydantic import BaseModel, Field
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS
from .models import Message, PromptFunction, PromptVersion
from .prompt_helpers import to_prompt_json
from .snippets import summary_instructions
@ -57,7 +59,7 @@ class EntityClassification(BaseModel):
class EntitySummary(BaseModel):
summary: str = Field(
...,
description='Summary containing the important information about the entity. Under 250 characters.',
description=f'Summary containing the important information about the entity. Under {MAX_SUMMARY_CHARS} characters.',
)
@ -259,18 +261,17 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
Message(
role='user',
content=f"""
<MESSAGES>
{to_prompt_json(context['previous_episodes'], indent=2)}
{to_prompt_json(context['episode_content'], indent=2)}
</MESSAGES>
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
Given the MESSAGES and the following ENTITY, update any of its attributes based on the information provided
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
Guidelines:
1. Do not hallucinate entity property values if they cannot be found in the current context.
2. Only use the provided MESSAGES and ENTITY to set attribute values.
<MESSAGES>
{to_prompt_json(context['previous_episodes'], indent=2)}
{to_prompt_json(context['episode_content'], indent=2)}
</MESSAGES>
<ENTITY>
{context['node']}
@ -289,17 +290,16 @@ def extract_summary(context: dict[str, Any]) -> list[Message]:
Message(
role='user',
content=f"""
Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
from the messages and relevant information from the existing summary.
{summary_instructions}
<MESSAGES>
{to_prompt_json(context['previous_episodes'], indent=2)}
{to_prompt_json(context['episode_content'], indent=2)}
</MESSAGES>
Given the above MESSAGES and the following ENTITY, update the summary that combines relevant information about the entity
from the messages and relevant information from the existing summary.
{summary_instructions}
<ENTITY>
{context['node']}
</ENTITY>

View file

@ -53,6 +53,7 @@ from graphiti_core.utils.maintenance.dedup_helpers import (
from graphiti_core.utils.maintenance.edge_operations import (
filter_existing_duplicate_of_edges,
)
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS, truncate_at_sentence
logger = logging.getLogger(__name__)
@ -547,7 +548,7 @@ async def _extract_entity_summary(
summary_context = _build_episode_context(
node_data={
'name': node.name,
'summary': node.summary,
'summary': truncate_at_sentence(node.summary, MAX_SUMMARY_CHARS),
'entity_types': node.labels,
'attributes': node.attributes,
},
@ -562,7 +563,7 @@ async def _extract_entity_summary(
group_id=node.group_id,
)
node.summary = summary_response.get('summary', '')
node.summary = truncate_at_sentence(summary_response.get('summary', ''), MAX_SUMMARY_CHARS)
def _build_episode_context(

View file

@ -0,0 +1,53 @@
"""
Copyright 2024, Zep Software, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import re
# Maximum length for entity/node summaries
MAX_SUMMARY_CHARS = 250
def truncate_at_sentence(text: str, max_chars: int) -> str:
"""
Truncate text at or about max_chars while respecting sentence boundaries.
Attempts to truncate at the last complete sentence before max_chars.
If no sentence boundary is found before max_chars, truncates at max_chars.
Args:
text: The text to truncate
max_chars: Maximum number of characters
Returns:
Truncated text
"""
if not text or len(text) <= max_chars:
return text
# Find all sentence boundaries (., !, ?) up to max_chars
truncated = text[:max_chars]
# Look for sentence boundaries: period, exclamation, or question mark followed by space or end
sentence_pattern = r'[.!?](?:\s|$)'
matches = list(re.finditer(sentence_pattern, truncated))
if matches:
# Truncate at the last sentence boundary found
last_match = matches[-1]
return text[: last_match.end()].rstrip()
# No sentence boundary found, truncate at max_chars
return truncated.rstrip()

View file

@ -1,7 +1,7 @@
[project]
name = "graphiti-core"
description = "A temporal graph building library"
version = "0.22.0pre2"
version = "0.22.0pre3"
authors = [
{ name = "Paul Paliychuk", email = "paul@getzep.com" },
{ name = "Preston Rasmussen", email = "preston@getzep.com" },

106
tests/test_text_utils.py Normal file
View file

@ -0,0 +1,106 @@
"""
Copyright 2024, Zep Software, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS, truncate_at_sentence
def test_truncate_at_sentence_short_text():
"""Test that short text is returned unchanged."""
text = 'This is a short sentence.'
result = truncate_at_sentence(text, 100)
assert result == text
def test_truncate_at_sentence_empty():
"""Test that empty text is handled correctly."""
assert truncate_at_sentence('', 100) == ''
assert truncate_at_sentence(None, 100) is None
def test_truncate_at_sentence_exact_length():
"""Test text at exactly max_chars."""
text = 'A' * 100
result = truncate_at_sentence(text, 100)
assert result == text
def test_truncate_at_sentence_with_period():
"""Test truncation at sentence boundary with period."""
text = 'First sentence. Second sentence. Third sentence. Fourth sentence.'
result = truncate_at_sentence(text, 40)
assert result == 'First sentence. Second sentence.'
assert len(result) <= 40
def test_truncate_at_sentence_with_question():
"""Test truncation at sentence boundary with question mark."""
text = 'What is this? This is a test. More text here.'
result = truncate_at_sentence(text, 30)
assert result == 'What is this? This is a test.'
assert len(result) <= 32
def test_truncate_at_sentence_with_exclamation():
"""Test truncation at sentence boundary with exclamation mark."""
text = 'Hello world! This is exciting. And more text.'
result = truncate_at_sentence(text, 30)
assert result == 'Hello world! This is exciting.'
assert len(result) <= 32
def test_truncate_at_sentence_no_boundary():
"""Test truncation when no sentence boundary exists before max_chars."""
text = 'This is a very long sentence without any punctuation marks near the beginning'
result = truncate_at_sentence(text, 30)
assert len(result) <= 30
assert result.startswith('This is a very long sentence')
def test_truncate_at_sentence_multiple_periods():
"""Test with multiple sentence endings."""
text = 'A. B. C. D. E. F. G. H.'
result = truncate_at_sentence(text, 10)
assert result == 'A. B. C.'
assert len(result) <= 10
def test_truncate_at_sentence_strips_trailing_whitespace():
"""Test that trailing whitespace is stripped."""
text = 'First sentence. Second sentence.'
result = truncate_at_sentence(text, 20)
assert result == 'First sentence.'
assert not result.endswith(' ')
def test_max_summary_chars_constant():
"""Test that MAX_SUMMARY_CHARS is set to expected value."""
assert MAX_SUMMARY_CHARS == 250
def test_truncate_at_sentence_realistic_summary():
"""Test with a realistic entity summary."""
text = (
'John is a software engineer who works at a tech company in San Francisco. '
'He has been programming for over 10 years and specializes in Python and distributed systems. '
'John enjoys hiking on weekends and is learning to play guitar. '
'He graduated from MIT with a degree in computer science.'
)
result = truncate_at_sentence(text, MAX_SUMMARY_CHARS)
assert len(result) <= MAX_SUMMARY_CHARS
# Should keep complete sentences
assert result.endswith('.')
# Should include at least the first sentence
assert 'John is a software engineer' in result