* Refactor prompt structure: move MESSAGES after instructions Reordered prompt structure in extract_nodes.py to place MESSAGES section after instructions/guidelines in both extract_attributes and extract_summary functions for improved prompt clarity. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Add sentence-aware text truncator for entity summaries - Created truncate_at_sentence() utility function that truncates text at sentence boundaries while respecting max character limits - Added MAX_SUMMARY_CHARS constant (250 chars) for entity summaries - Applied truncator to entity summaries in prompts (extract_nodes.py) - Applied truncator to LLM-generated summaries (node_operations.py) - Added comprehensive test suite for truncation logic 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Clean up formatting in extract_attributes prompt - Remove extra blank lines - Fix indentation of MESSAGES tag 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Bump version to 0.22.0pre3 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
53 lines
1.6 KiB
Python
53 lines
1.6 KiB
Python
"""
|
|
Copyright 2024, Zep Software, Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
import re
|
|
|
|
# Maximum length for entity/node summaries
|
|
MAX_SUMMARY_CHARS = 250
|
|
|
|
|
|
def truncate_at_sentence(text: str, max_chars: int) -> str:
|
|
"""
|
|
Truncate text at or about max_chars while respecting sentence boundaries.
|
|
|
|
Attempts to truncate at the last complete sentence before max_chars.
|
|
If no sentence boundary is found before max_chars, truncates at max_chars.
|
|
|
|
Args:
|
|
text: The text to truncate
|
|
max_chars: Maximum number of characters
|
|
|
|
Returns:
|
|
Truncated text
|
|
"""
|
|
if not text or len(text) <= max_chars:
|
|
return text
|
|
|
|
# Find all sentence boundaries (., !, ?) up to max_chars
|
|
truncated = text[:max_chars]
|
|
|
|
# Look for sentence boundaries: period, exclamation, or question mark followed by space or end
|
|
sentence_pattern = r'[.!?](?:\s|$)'
|
|
matches = list(re.finditer(sentence_pattern, truncated))
|
|
|
|
if matches:
|
|
# Truncate at the last sentence boundary found
|
|
last_match = matches[-1]
|
|
return text[: last_match.end()].rstrip()
|
|
|
|
# No sentence boundary found, truncate at max_chars
|
|
return truncated.rstrip()
|