Make natural language extraction configurable (#943)

Replace MULTILINGUAL_EXTRACTION_RESPONSES constant with configurable
get_extraction_language_instruction() function to improve determinism
and allow customization.

Changes:
- Replace constant with function in client.py
- Update all LLM client implementations to use new function
- Maintain backward compatibility with same default behavior
- Enable users to override function for custom language requirements

Users can now customize extraction behavior by monkey-patching:
```python
import graphiti_core.llm_client.client as client
client.get_extraction_language_instruction = lambda: "Custom instruction"
```

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Daniel Chalef 2025-09-30 08:09:03 -07:00 committed by GitHub
parent f632a8ae9e
commit b2ff050e57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 20 additions and 10 deletions

View file

@ -32,9 +32,19 @@ from .errors import RateLimitError
DEFAULT_TEMPERATURE = 0
DEFAULT_CACHE_DIR = './llm_cache'
MULTILINGUAL_EXTRACTION_RESPONSES = (
'\n\nAny extracted information should be returned in the same language as it was written in.'
)
def get_extraction_language_instruction() -> str:
"""Returns instruction for language extraction behavior.
Override this function to customize language extraction:
- Return empty string to disable multilingual instructions
- Return custom instructions for specific language requirements
Returns:
str: Language instruction to append to system messages
"""
return '\n\nAny extracted information should be returned in the same language as it was written in.'
logger = logging.getLogger(__name__)
@ -145,7 +155,7 @@ class LLMClient(ABC):
)
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
messages[0].content += get_extraction_language_instruction()
if self.cache_enabled and self.cache_dir is not None:
cache_key = self._get_cache_key(messages)

View file

@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar
from pydantic import BaseModel
from ..prompts.models import Message
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
from .client import LLMClient, get_extraction_language_instruction
from .config import LLMConfig, ModelSize
from .errors import RateLimitError
@ -376,7 +376,7 @@ class GeminiClient(LLMClient):
last_output = None
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
messages[0].content += get_extraction_language_instruction()
while retry_count < self.MAX_RETRIES:
try:

View file

@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from ..prompts.models import Message
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
from .client import LLMClient, get_extraction_language_instruction
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
from .errors import RateLimitError, RefusalError
@ -184,7 +184,7 @@ class BaseOpenAIClient(LLMClient):
last_error = None
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
messages[0].content += get_extraction_language_instruction()
while retry_count <= self.MAX_RETRIES:
try:

View file

@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from ..prompts.models import Message
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
from .client import LLMClient, get_extraction_language_instruction
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
from .errors import RateLimitError, RefusalError
@ -136,7 +136,7 @@ class OpenAIGenericClient(LLMClient):
)
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
messages[0].content += get_extraction_language_instruction()
while retry_count <= self.MAX_RETRIES:
try: