Compare commits
5 commits
main
...
move-messa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0529f50c8 | ||
|
|
ad7b296c68 | ||
|
|
9f53ba221c | ||
|
|
9647c0b6f3 | ||
|
|
78699b0139 |
6 changed files with 177 additions and 17 deletions
|
|
@ -18,6 +18,8 @@ from typing import Any, Protocol, TypedDict
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS
|
||||||
|
|
||||||
from .models import Message, PromptFunction, PromptVersion
|
from .models import Message, PromptFunction, PromptVersion
|
||||||
from .prompt_helpers import to_prompt_json
|
from .prompt_helpers import to_prompt_json
|
||||||
from .snippets import summary_instructions
|
from .snippets import summary_instructions
|
||||||
|
|
@ -57,7 +59,7 @@ class EntityClassification(BaseModel):
|
||||||
class EntitySummary(BaseModel):
|
class EntitySummary(BaseModel):
|
||||||
summary: str = Field(
|
summary: str = Field(
|
||||||
...,
|
...,
|
||||||
description='Summary containing the important information about the entity. Under 250 characters.',
|
description=f'Summary containing the important information about the entity. Under {MAX_SUMMARY_CHARS} characters.',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -259,19 +261,18 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
||||||
Message(
|
Message(
|
||||||
role='user',
|
role='user',
|
||||||
content=f"""
|
content=f"""
|
||||||
|
Given the MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
||||||
<MESSAGES>
|
|
||||||
{to_prompt_json(context['previous_episodes'], indent=2)}
|
|
||||||
{to_prompt_json(context['episode_content'], indent=2)}
|
|
||||||
</MESSAGES>
|
|
||||||
|
|
||||||
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
|
||||||
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
||||||
|
|
||||||
Guidelines:
|
Guidelines:
|
||||||
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
||||||
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
||||||
|
|
||||||
|
<MESSAGES>
|
||||||
|
{to_prompt_json(context['previous_episodes'], indent=2)}
|
||||||
|
{to_prompt_json(context['episode_content'], indent=2)}
|
||||||
|
</MESSAGES>
|
||||||
|
|
||||||
<ENTITY>
|
<ENTITY>
|
||||||
{context['node']}
|
{context['node']}
|
||||||
</ENTITY>
|
</ENTITY>
|
||||||
|
|
@ -289,17 +290,16 @@ def extract_summary(context: dict[str, Any]) -> list[Message]:
|
||||||
Message(
|
Message(
|
||||||
role='user',
|
role='user',
|
||||||
content=f"""
|
content=f"""
|
||||||
|
Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
|
||||||
|
from the messages and relevant information from the existing summary.
|
||||||
|
|
||||||
|
{summary_instructions}
|
||||||
|
|
||||||
<MESSAGES>
|
<MESSAGES>
|
||||||
{to_prompt_json(context['previous_episodes'], indent=2)}
|
{to_prompt_json(context['previous_episodes'], indent=2)}
|
||||||
{to_prompt_json(context['episode_content'], indent=2)}
|
{to_prompt_json(context['episode_content'], indent=2)}
|
||||||
</MESSAGES>
|
</MESSAGES>
|
||||||
|
|
||||||
Given the above MESSAGES and the following ENTITY, update the summary that combines relevant information about the entity
|
|
||||||
from the messages and relevant information from the existing summary.
|
|
||||||
|
|
||||||
{summary_instructions}
|
|
||||||
|
|
||||||
<ENTITY>
|
<ENTITY>
|
||||||
{context['node']}
|
{context['node']}
|
||||||
</ENTITY>
|
</ENTITY>
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ from graphiti_core.utils.maintenance.dedup_helpers import (
|
||||||
from graphiti_core.utils.maintenance.edge_operations import (
|
from graphiti_core.utils.maintenance.edge_operations import (
|
||||||
filter_existing_duplicate_of_edges,
|
filter_existing_duplicate_of_edges,
|
||||||
)
|
)
|
||||||
|
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS, truncate_at_sentence
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -547,7 +548,7 @@ async def _extract_entity_summary(
|
||||||
summary_context = _build_episode_context(
|
summary_context = _build_episode_context(
|
||||||
node_data={
|
node_data={
|
||||||
'name': node.name,
|
'name': node.name,
|
||||||
'summary': node.summary,
|
'summary': truncate_at_sentence(node.summary, MAX_SUMMARY_CHARS),
|
||||||
'entity_types': node.labels,
|
'entity_types': node.labels,
|
||||||
'attributes': node.attributes,
|
'attributes': node.attributes,
|
||||||
},
|
},
|
||||||
|
|
@ -562,7 +563,7 @@ async def _extract_entity_summary(
|
||||||
group_id=node.group_id,
|
group_id=node.group_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
node.summary = summary_response.get('summary', '')
|
node.summary = truncate_at_sentence(summary_response.get('summary', ''), MAX_SUMMARY_CHARS)
|
||||||
|
|
||||||
|
|
||||||
def _build_episode_context(
|
def _build_episode_context(
|
||||||
|
|
|
||||||
53
graphiti_core/utils/text_utils.py
Normal file
53
graphiti_core/utils/text_utils.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
"""
|
||||||
|
Copyright 2024, Zep Software, Inc.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Maximum length for entity/node summaries
|
||||||
|
MAX_SUMMARY_CHARS = 250
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_at_sentence(text: str, max_chars: int) -> str:
|
||||||
|
"""
|
||||||
|
Truncate text at or about max_chars while respecting sentence boundaries.
|
||||||
|
|
||||||
|
Attempts to truncate at the last complete sentence before max_chars.
|
||||||
|
If no sentence boundary is found before max_chars, truncates at max_chars.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text to truncate
|
||||||
|
max_chars: Maximum number of characters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Truncated text
|
||||||
|
"""
|
||||||
|
if not text or len(text) <= max_chars:
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Find all sentence boundaries (., !, ?) up to max_chars
|
||||||
|
truncated = text[:max_chars]
|
||||||
|
|
||||||
|
# Look for sentence boundaries: period, exclamation, or question mark followed by space or end
|
||||||
|
sentence_pattern = r'[.!?](?:\s|$)'
|
||||||
|
matches = list(re.finditer(sentence_pattern, truncated))
|
||||||
|
|
||||||
|
if matches:
|
||||||
|
# Truncate at the last sentence boundary found
|
||||||
|
last_match = matches[-1]
|
||||||
|
return text[: last_match.end()].rstrip()
|
||||||
|
|
||||||
|
# No sentence boundary found, truncate at max_chars
|
||||||
|
return truncated.rstrip()
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
[project]
|
[project]
|
||||||
name = "graphiti-core"
|
name = "graphiti-core"
|
||||||
description = "A temporal graph building library"
|
description = "A temporal graph building library"
|
||||||
version = "0.22.0pre2"
|
version = "0.22.0pre3"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Paul Paliychuk", email = "paul@getzep.com" },
|
{ name = "Paul Paliychuk", email = "paul@getzep.com" },
|
||||||
{ name = "Preston Rasmussen", email = "preston@getzep.com" },
|
{ name = "Preston Rasmussen", email = "preston@getzep.com" },
|
||||||
|
|
|
||||||
106
tests/test_text_utils.py
Normal file
106
tests/test_text_utils.py
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
"""
|
||||||
|
Copyright 2024, Zep Software, Inc.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS, truncate_at_sentence
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_short_text():
|
||||||
|
"""Test that short text is returned unchanged."""
|
||||||
|
text = 'This is a short sentence.'
|
||||||
|
result = truncate_at_sentence(text, 100)
|
||||||
|
assert result == text
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_empty():
|
||||||
|
"""Test that empty text is handled correctly."""
|
||||||
|
assert truncate_at_sentence('', 100) == ''
|
||||||
|
assert truncate_at_sentence(None, 100) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_exact_length():
|
||||||
|
"""Test text at exactly max_chars."""
|
||||||
|
text = 'A' * 100
|
||||||
|
result = truncate_at_sentence(text, 100)
|
||||||
|
assert result == text
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_with_period():
|
||||||
|
"""Test truncation at sentence boundary with period."""
|
||||||
|
text = 'First sentence. Second sentence. Third sentence. Fourth sentence.'
|
||||||
|
result = truncate_at_sentence(text, 40)
|
||||||
|
assert result == 'First sentence. Second sentence.'
|
||||||
|
assert len(result) <= 40
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_with_question():
|
||||||
|
"""Test truncation at sentence boundary with question mark."""
|
||||||
|
text = 'What is this? This is a test. More text here.'
|
||||||
|
result = truncate_at_sentence(text, 30)
|
||||||
|
assert result == 'What is this? This is a test.'
|
||||||
|
assert len(result) <= 32
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_with_exclamation():
|
||||||
|
"""Test truncation at sentence boundary with exclamation mark."""
|
||||||
|
text = 'Hello world! This is exciting. And more text.'
|
||||||
|
result = truncate_at_sentence(text, 30)
|
||||||
|
assert result == 'Hello world! This is exciting.'
|
||||||
|
assert len(result) <= 32
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_no_boundary():
|
||||||
|
"""Test truncation when no sentence boundary exists before max_chars."""
|
||||||
|
text = 'This is a very long sentence without any punctuation marks near the beginning'
|
||||||
|
result = truncate_at_sentence(text, 30)
|
||||||
|
assert len(result) <= 30
|
||||||
|
assert result.startswith('This is a very long sentence')
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_multiple_periods():
|
||||||
|
"""Test with multiple sentence endings."""
|
||||||
|
text = 'A. B. C. D. E. F. G. H.'
|
||||||
|
result = truncate_at_sentence(text, 10)
|
||||||
|
assert result == 'A. B. C.'
|
||||||
|
assert len(result) <= 10
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_strips_trailing_whitespace():
|
||||||
|
"""Test that trailing whitespace is stripped."""
|
||||||
|
text = 'First sentence. Second sentence.'
|
||||||
|
result = truncate_at_sentence(text, 20)
|
||||||
|
assert result == 'First sentence.'
|
||||||
|
assert not result.endswith(' ')
|
||||||
|
|
||||||
|
|
||||||
|
def test_max_summary_chars_constant():
|
||||||
|
"""Test that MAX_SUMMARY_CHARS is set to expected value."""
|
||||||
|
assert MAX_SUMMARY_CHARS == 250
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_at_sentence_realistic_summary():
|
||||||
|
"""Test with a realistic entity summary."""
|
||||||
|
text = (
|
||||||
|
'John is a software engineer who works at a tech company in San Francisco. '
|
||||||
|
'He has been programming for over 10 years and specializes in Python and distributed systems. '
|
||||||
|
'John enjoys hiking on weekends and is learning to play guitar. '
|
||||||
|
'He graduated from MIT with a degree in computer science.'
|
||||||
|
)
|
||||||
|
result = truncate_at_sentence(text, MAX_SUMMARY_CHARS)
|
||||||
|
assert len(result) <= MAX_SUMMARY_CHARS
|
||||||
|
# Should keep complete sentences
|
||||||
|
assert result.endswith('.')
|
||||||
|
# Should include at least the first sentence
|
||||||
|
assert 'John is a software engineer' in result
|
||||||
2
uv.lock
generated
2
uv.lock
generated
|
|
@ -783,7 +783,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "graphiti-core"
|
name = "graphiti-core"
|
||||||
version = "0.22.0rc2"
|
version = "0.22.0rc3"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "diskcache" },
|
{ name = "diskcache" },
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue