Compare commits
5 commits
main
...
move-messa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0529f50c8 | ||
|
|
ad7b296c68 | ||
|
|
9f53ba221c | ||
|
|
9647c0b6f3 | ||
|
|
78699b0139 |
6 changed files with 177 additions and 17 deletions
|
|
@ -18,6 +18,8 @@ from typing import Any, Protocol, TypedDict
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS
|
||||
|
||||
from .models import Message, PromptFunction, PromptVersion
|
||||
from .prompt_helpers import to_prompt_json
|
||||
from .snippets import summary_instructions
|
||||
|
|
@ -57,7 +59,7 @@ class EntityClassification(BaseModel):
|
|||
class EntitySummary(BaseModel):
|
||||
summary: str = Field(
|
||||
...,
|
||||
description='Summary containing the important information about the entity. Under 250 characters.',
|
||||
description=f'Summary containing the important information about the entity. Under {MAX_SUMMARY_CHARS} characters.',
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -259,19 +261,18 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|||
Message(
|
||||
role='user',
|
||||
content=f"""
|
||||
|
||||
<MESSAGES>
|
||||
{to_prompt_json(context['previous_episodes'], indent=2)}
|
||||
{to_prompt_json(context['episode_content'], indent=2)}
|
||||
</MESSAGES>
|
||||
|
||||
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
||||
Given the MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
||||
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
||||
|
||||
Guidelines:
|
||||
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
||||
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
||||
|
||||
<MESSAGES>
|
||||
{to_prompt_json(context['previous_episodes'], indent=2)}
|
||||
{to_prompt_json(context['episode_content'], indent=2)}
|
||||
</MESSAGES>
|
||||
|
||||
<ENTITY>
|
||||
{context['node']}
|
||||
</ENTITY>
|
||||
|
|
@ -289,17 +290,16 @@ def extract_summary(context: dict[str, Any]) -> list[Message]:
|
|||
Message(
|
||||
role='user',
|
||||
content=f"""
|
||||
Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
|
||||
from the messages and relevant information from the existing summary.
|
||||
|
||||
{summary_instructions}
|
||||
|
||||
<MESSAGES>
|
||||
{to_prompt_json(context['previous_episodes'], indent=2)}
|
||||
{to_prompt_json(context['episode_content'], indent=2)}
|
||||
</MESSAGES>
|
||||
|
||||
Given the above MESSAGES and the following ENTITY, update the summary that combines relevant information about the entity
|
||||
from the messages and relevant information from the existing summary.
|
||||
|
||||
{summary_instructions}
|
||||
|
||||
<ENTITY>
|
||||
{context['node']}
|
||||
</ENTITY>
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ from graphiti_core.utils.maintenance.dedup_helpers import (
|
|||
from graphiti_core.utils.maintenance.edge_operations import (
|
||||
filter_existing_duplicate_of_edges,
|
||||
)
|
||||
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS, truncate_at_sentence
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -547,7 +548,7 @@ async def _extract_entity_summary(
|
|||
summary_context = _build_episode_context(
|
||||
node_data={
|
||||
'name': node.name,
|
||||
'summary': node.summary,
|
||||
'summary': truncate_at_sentence(node.summary, MAX_SUMMARY_CHARS),
|
||||
'entity_types': node.labels,
|
||||
'attributes': node.attributes,
|
||||
},
|
||||
|
|
@ -562,7 +563,7 @@ async def _extract_entity_summary(
|
|||
group_id=node.group_id,
|
||||
)
|
||||
|
||||
node.summary = summary_response.get('summary', '')
|
||||
node.summary = truncate_at_sentence(summary_response.get('summary', ''), MAX_SUMMARY_CHARS)
|
||||
|
||||
|
||||
def _build_episode_context(
|
||||
|
|
|
|||
53
graphiti_core/utils/text_utils.py
Normal file
53
graphiti_core/utils/text_utils.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
"""
|
||||
Copyright 2024, Zep Software, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
# Maximum length for entity/node summaries
|
||||
MAX_SUMMARY_CHARS = 250
|
||||
|
||||
|
||||
def truncate_at_sentence(text: str, max_chars: int) -> str:
|
||||
"""
|
||||
Truncate text at or about max_chars while respecting sentence boundaries.
|
||||
|
||||
Attempts to truncate at the last complete sentence before max_chars.
|
||||
If no sentence boundary is found before max_chars, truncates at max_chars.
|
||||
|
||||
Args:
|
||||
text: The text to truncate
|
||||
max_chars: Maximum number of characters
|
||||
|
||||
Returns:
|
||||
Truncated text
|
||||
"""
|
||||
if not text or len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
# Find all sentence boundaries (., !, ?) up to max_chars
|
||||
truncated = text[:max_chars]
|
||||
|
||||
# Look for sentence boundaries: period, exclamation, or question mark followed by space or end
|
||||
sentence_pattern = r'[.!?](?:\s|$)'
|
||||
matches = list(re.finditer(sentence_pattern, truncated))
|
||||
|
||||
if matches:
|
||||
# Truncate at the last sentence boundary found
|
||||
last_match = matches[-1]
|
||||
return text[: last_match.end()].rstrip()
|
||||
|
||||
# No sentence boundary found, truncate at max_chars
|
||||
return truncated.rstrip()
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
[project]
|
||||
name = "graphiti-core"
|
||||
description = "A temporal graph building library"
|
||||
version = "0.22.0pre2"
|
||||
version = "0.22.0pre3"
|
||||
authors = [
|
||||
{ name = "Paul Paliychuk", email = "paul@getzep.com" },
|
||||
{ name = "Preston Rasmussen", email = "preston@getzep.com" },
|
||||
|
|
|
|||
106
tests/test_text_utils.py
Normal file
106
tests/test_text_utils.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
"""
|
||||
Copyright 2024, Zep Software, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS, truncate_at_sentence
|
||||
|
||||
|
||||
def test_truncate_at_sentence_short_text():
|
||||
"""Test that short text is returned unchanged."""
|
||||
text = 'This is a short sentence.'
|
||||
result = truncate_at_sentence(text, 100)
|
||||
assert result == text
|
||||
|
||||
|
||||
def test_truncate_at_sentence_empty():
|
||||
"""Test that empty text is handled correctly."""
|
||||
assert truncate_at_sentence('', 100) == ''
|
||||
assert truncate_at_sentence(None, 100) is None
|
||||
|
||||
|
||||
def test_truncate_at_sentence_exact_length():
|
||||
"""Test text at exactly max_chars."""
|
||||
text = 'A' * 100
|
||||
result = truncate_at_sentence(text, 100)
|
||||
assert result == text
|
||||
|
||||
|
||||
def test_truncate_at_sentence_with_period():
|
||||
"""Test truncation at sentence boundary with period."""
|
||||
text = 'First sentence. Second sentence. Third sentence. Fourth sentence.'
|
||||
result = truncate_at_sentence(text, 40)
|
||||
assert result == 'First sentence. Second sentence.'
|
||||
assert len(result) <= 40
|
||||
|
||||
|
||||
def test_truncate_at_sentence_with_question():
|
||||
"""Test truncation at sentence boundary with question mark."""
|
||||
text = 'What is this? This is a test. More text here.'
|
||||
result = truncate_at_sentence(text, 30)
|
||||
assert result == 'What is this? This is a test.'
|
||||
assert len(result) <= 32
|
||||
|
||||
|
||||
def test_truncate_at_sentence_with_exclamation():
|
||||
"""Test truncation at sentence boundary with exclamation mark."""
|
||||
text = 'Hello world! This is exciting. And more text.'
|
||||
result = truncate_at_sentence(text, 30)
|
||||
assert result == 'Hello world! This is exciting.'
|
||||
assert len(result) <= 32
|
||||
|
||||
|
||||
def test_truncate_at_sentence_no_boundary():
|
||||
"""Test truncation when no sentence boundary exists before max_chars."""
|
||||
text = 'This is a very long sentence without any punctuation marks near the beginning'
|
||||
result = truncate_at_sentence(text, 30)
|
||||
assert len(result) <= 30
|
||||
assert result.startswith('This is a very long sentence')
|
||||
|
||||
|
||||
def test_truncate_at_sentence_multiple_periods():
|
||||
"""Test with multiple sentence endings."""
|
||||
text = 'A. B. C. D. E. F. G. H.'
|
||||
result = truncate_at_sentence(text, 10)
|
||||
assert result == 'A. B. C.'
|
||||
assert len(result) <= 10
|
||||
|
||||
|
||||
def test_truncate_at_sentence_strips_trailing_whitespace():
|
||||
"""Test that trailing whitespace is stripped."""
|
||||
text = 'First sentence. Second sentence.'
|
||||
result = truncate_at_sentence(text, 20)
|
||||
assert result == 'First sentence.'
|
||||
assert not result.endswith(' ')
|
||||
|
||||
|
||||
def test_max_summary_chars_constant():
|
||||
"""Test that MAX_SUMMARY_CHARS is set to expected value."""
|
||||
assert MAX_SUMMARY_CHARS == 250
|
||||
|
||||
|
||||
def test_truncate_at_sentence_realistic_summary():
|
||||
"""Test with a realistic entity summary."""
|
||||
text = (
|
||||
'John is a software engineer who works at a tech company in San Francisco. '
|
||||
'He has been programming for over 10 years and specializes in Python and distributed systems. '
|
||||
'John enjoys hiking on weekends and is learning to play guitar. '
|
||||
'He graduated from MIT with a degree in computer science.'
|
||||
)
|
||||
result = truncate_at_sentence(text, MAX_SUMMARY_CHARS)
|
||||
assert len(result) <= MAX_SUMMARY_CHARS
|
||||
# Should keep complete sentences
|
||||
assert result.endswith('.')
|
||||
# Should include at least the first sentence
|
||||
assert 'John is a software engineer' in result
|
||||
2
uv.lock
generated
2
uv.lock
generated
|
|
@ -783,7 +783,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "graphiti-core"
|
||||
version = "0.22.0rc2"
|
||||
version = "0.22.0rc3"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "diskcache" },
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue