From b2ff050e57a093be78a68021aa73ac1670ae2dc6 Mon Sep 17 00:00:00 2001 From: Daniel Chalef <131175+danielchalef@users.noreply.github.com> Date: Tue, 30 Sep 2025 08:09:03 -0700 Subject: [PATCH] Make natural language extraction configurable (#943) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace MULTILINGUAL_EXTRACTION_RESPONSES constant with configurable get_extraction_language_instruction() function to improve determinism and allow customization. Changes: - Replace constant with function in client.py - Update all LLM client implementations to use new function - Maintain backward compatibility with same default behavior - Enable users to override function for custom language requirements Users can now customize extraction behavior by monkey-patching: ```python import graphiti_core.llm_client.client as client client.get_extraction_language_instruction = lambda: "Custom instruction" ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude --- graphiti_core/llm_client/client.py | 18 ++++++++++++++---- graphiti_core/llm_client/gemini_client.py | 4 ++-- graphiti_core/llm_client/openai_base_client.py | 4 ++-- .../llm_client/openai_generic_client.py | 4 ++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/graphiti_core/llm_client/client.py b/graphiti_core/llm_client/client.py index 9f7558c1..b06f870d 100644 --- a/graphiti_core/llm_client/client.py +++ b/graphiti_core/llm_client/client.py @@ -32,9 +32,19 @@ from .errors import RateLimitError DEFAULT_TEMPERATURE = 0 DEFAULT_CACHE_DIR = './llm_cache' -MULTILINGUAL_EXTRACTION_RESPONSES = ( - '\n\nAny extracted information should be returned in the same language as it was written in.' -) + +def get_extraction_language_instruction() -> str: + """Returns instruction for language extraction behavior. + + Override this function to customize language extraction: + - Return empty string to disable multilingual instructions + - Return custom instructions for specific language requirements + + Returns: + str: Language instruction to append to system messages + """ + return '\n\nAny extracted information should be returned in the same language as it was written in.' + logger = logging.getLogger(__name__) @@ -145,7 +155,7 @@ class LLMClient(ABC): ) # Add multilingual extraction instructions - messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES + messages[0].content += get_extraction_language_instruction() if self.cache_enabled and self.cache_dir is not None: cache_key = self._get_cache_key(messages) diff --git a/graphiti_core/llm_client/gemini_client.py b/graphiti_core/llm_client/gemini_client.py index a8422dfd..f80b2f7e 100644 --- a/graphiti_core/llm_client/gemini_client.py +++ b/graphiti_core/llm_client/gemini_client.py @@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar from pydantic import BaseModel from ..prompts.models import Message -from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient +from .client import LLMClient, get_extraction_language_instruction from .config import LLMConfig, ModelSize from .errors import RateLimitError @@ -376,7 +376,7 @@ class GeminiClient(LLMClient): last_output = None # Add multilingual extraction instructions - messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES + messages[0].content += get_extraction_language_instruction() while retry_count < self.MAX_RETRIES: try: diff --git a/graphiti_core/llm_client/openai_base_client.py b/graphiti_core/llm_client/openai_base_client.py index e3332bc2..cdda179c 100644 --- a/graphiti_core/llm_client/openai_base_client.py +++ b/graphiti_core/llm_client/openai_base_client.py @@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam from pydantic import BaseModel from ..prompts.models import Message -from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient +from .client import LLMClient, get_extraction_language_instruction from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize from .errors import RateLimitError, RefusalError @@ -184,7 +184,7 @@ class BaseOpenAIClient(LLMClient): last_error = None # Add multilingual extraction instructions - messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES + messages[0].content += get_extraction_language_instruction() while retry_count <= self.MAX_RETRIES: try: diff --git a/graphiti_core/llm_client/openai_generic_client.py b/graphiti_core/llm_client/openai_generic_client.py index 571cf902..c4f4d212 100644 --- a/graphiti_core/llm_client/openai_generic_client.py +++ b/graphiti_core/llm_client/openai_generic_client.py @@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam from pydantic import BaseModel from ..prompts.models import Message -from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient +from .client import LLMClient, get_extraction_language_instruction from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize from .errors import RateLimitError, RefusalError @@ -136,7 +136,7 @@ class OpenAIGenericClient(LLMClient): ) # Add multilingual extraction instructions - messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES + messages[0].content += get_extraction_language_instruction() while retry_count <= self.MAX_RETRIES: try: