Make natural language extraction configurable (#943)
Replace MULTILINGUAL_EXTRACTION_RESPONSES constant with configurable get_extraction_language_instruction() function to improve determinism and allow customization. Changes: - Replace constant with function in client.py - Update all LLM client implementations to use new function - Maintain backward compatibility with same default behavior - Enable users to override function for custom language requirements Users can now customize extraction behavior by monkey-patching: ```python import graphiti_core.llm_client.client as client client.get_extraction_language_instruction = lambda: "Custom instruction" ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
f632a8ae9e
commit
b2ff050e57
4 changed files with 20 additions and 10 deletions
|
|
@ -32,9 +32,19 @@ from .errors import RateLimitError
|
||||||
DEFAULT_TEMPERATURE = 0
|
DEFAULT_TEMPERATURE = 0
|
||||||
DEFAULT_CACHE_DIR = './llm_cache'
|
DEFAULT_CACHE_DIR = './llm_cache'
|
||||||
|
|
||||||
MULTILINGUAL_EXTRACTION_RESPONSES = (
|
|
||||||
'\n\nAny extracted information should be returned in the same language as it was written in.'
|
def get_extraction_language_instruction() -> str:
|
||||||
)
|
"""Returns instruction for language extraction behavior.
|
||||||
|
|
||||||
|
Override this function to customize language extraction:
|
||||||
|
- Return empty string to disable multilingual instructions
|
||||||
|
- Return custom instructions for specific language requirements
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Language instruction to append to system messages
|
||||||
|
"""
|
||||||
|
return '\n\nAny extracted information should be returned in the same language as it was written in.'
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -145,7 +155,7 @@ class LLMClient(ABC):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add multilingual extraction instructions
|
# Add multilingual extraction instructions
|
||||||
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
|
messages[0].content += get_extraction_language_instruction()
|
||||||
|
|
||||||
if self.cache_enabled and self.cache_dir is not None:
|
if self.cache_enabled and self.cache_dir is not None:
|
||||||
cache_key = self._get_cache_key(messages)
|
cache_key = self._get_cache_key(messages)
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from ..prompts.models import Message
|
from ..prompts.models import Message
|
||||||
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
|
from .client import LLMClient, get_extraction_language_instruction
|
||||||
from .config import LLMConfig, ModelSize
|
from .config import LLMConfig, ModelSize
|
||||||
from .errors import RateLimitError
|
from .errors import RateLimitError
|
||||||
|
|
||||||
|
|
@ -376,7 +376,7 @@ class GeminiClient(LLMClient):
|
||||||
last_output = None
|
last_output = None
|
||||||
|
|
||||||
# Add multilingual extraction instructions
|
# Add multilingual extraction instructions
|
||||||
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
|
messages[0].content += get_extraction_language_instruction()
|
||||||
|
|
||||||
while retry_count < self.MAX_RETRIES:
|
while retry_count < self.MAX_RETRIES:
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from ..prompts.models import Message
|
from ..prompts.models import Message
|
||||||
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
|
from .client import LLMClient, get_extraction_language_instruction
|
||||||
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
||||||
from .errors import RateLimitError, RefusalError
|
from .errors import RateLimitError, RefusalError
|
||||||
|
|
||||||
|
|
@ -184,7 +184,7 @@ class BaseOpenAIClient(LLMClient):
|
||||||
last_error = None
|
last_error = None
|
||||||
|
|
||||||
# Add multilingual extraction instructions
|
# Add multilingual extraction instructions
|
||||||
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
|
messages[0].content += get_extraction_language_instruction()
|
||||||
|
|
||||||
while retry_count <= self.MAX_RETRIES:
|
while retry_count <= self.MAX_RETRIES:
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from ..prompts.models import Message
|
from ..prompts.models import Message
|
||||||
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
|
from .client import LLMClient, get_extraction_language_instruction
|
||||||
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
||||||
from .errors import RateLimitError, RefusalError
|
from .errors import RateLimitError, RefusalError
|
||||||
|
|
||||||
|
|
@ -136,7 +136,7 @@ class OpenAIGenericClient(LLMClient):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add multilingual extraction instructions
|
# Add multilingual extraction instructions
|
||||||
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
|
messages[0].content += get_extraction_language_instruction()
|
||||||
|
|
||||||
while retry_count <= self.MAX_RETRIES:
|
while retry_count <= self.MAX_RETRIES:
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue