Make natural language extraction configurable (#943)

Replace MULTILINGUAL_EXTRACTION_RESPONSES constant with configurable get_extraction_language_instruction() function to improve determinism and allow customization. Changes: - Replace constant with function in client.py - Update all LLM client implementations to use new function - Maintain backward compatibility with same default behavior - Enable users to override function for custom language requirements Users can now customize extraction behavior by monkey-patching: ```python import graphiti_core.llm_client.client as client client.get_extraction_language_instruction = lambda: "Custom instruction" ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>
2025-09-30 08:09:03 -07:00 · 2025-09-30 08:09:03 -07:00 · b2ff050e57
commit b2ff050e57
parent f632a8ae9e
4 changed files with 20 additions and 10 deletions
--- a/graphiti_core/llm_client/client.py
+++ b/graphiti_core/llm_client/client.py
@ -32,9 +32,19 @@ from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'

-MULTILINGUAL_EXTRACTION_RESPONSES = (
-    '\n\nAny extracted information should be returned in the same language as it was written in.'
-)
+
+def get_extraction_language_instruction() -> str:
+    """Returns instruction for language extraction behavior.
+
+    Override this function to customize language extraction:
+    - Return empty string to disable multilingual instructions
+    - Return custom instructions for specific language requirements
+
+    Returns:
+        str: Language instruction to append to system messages
+    """
+    return '\n\nAny extracted information should be returned in the same language as it was written in.'
+

 logger = logging.getLogger(__name__)

@ -145,7 +155,7 @@ class LLMClient(ABC):
            )

        # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()

        if self.cache_enabled and self.cache_dir is not None:
            cache_key = self._get_cache_key(messages)
--- a/graphiti_core/llm_client/gemini_client.py
+++ b/graphiti_core/llm_client/gemini_client.py
@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar
 from pydantic import BaseModel

 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import LLMConfig, ModelSize
 from .errors import RateLimitError

@ -376,7 +376,7 @@ class GeminiClient(LLMClient):
        last_output = None

        # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()

        while retry_count < self.MAX_RETRIES:
            try:
--- a/graphiti_core/llm_client/openai_base_client.py
+++ b/graphiti_core/llm_client/openai_base_client.py
@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel

 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
 from .errors import RateLimitError, RefusalError

@ -184,7 +184,7 @@ class BaseOpenAIClient(LLMClient):
        last_error = None

        # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()

        while retry_count <= self.MAX_RETRIES:
            try:
--- a/graphiti_core/llm_client/openai_generic_client.py
+++ b/graphiti_core/llm_client/openai_generic_client.py
@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel

 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
 from .errors import RateLimitError, RefusalError

@ -136,7 +136,7 @@ class OpenAIGenericClient(LLMClient):
            )

        # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()

        while retry_count <= self.MAX_RETRIES:
            try: