From b2ff050e57a093be78a68021aa73ac1670ae2dc6 Mon Sep 17 00:00:00 2001
From: Daniel Chalef <131175+danielchalef@users.noreply.github.com>
Date: Tue, 30 Sep 2025 08:09:03 -0700
Subject: [PATCH] Make natural language extraction configurable (#943)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace MULTILINGUAL_EXTRACTION_RESPONSES constant with configurable
get_extraction_language_instruction() function to improve determinism
and allow customization.

Changes:
- Replace constant with function in client.py
- Update all LLM client implementations to use new function
- Maintain backward compatibility with same default behavior
- Enable users to override function for custom language requirements

Users can now customize extraction behavior by monkey-patching:
```python
import graphiti_core.llm_client.client as client
client.get_extraction_language_instruction = lambda: "Custom instruction"
```

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-authored-by: Claude <noreply@anthropic.com>
---
 graphiti_core/llm_client/client.py             | 18 ++++++++++++++----
 graphiti_core/llm_client/gemini_client.py      |  4 ++--
 graphiti_core/llm_client/openai_base_client.py |  4 ++--
 .../llm_client/openai_generic_client.py        |  4 ++--
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/graphiti_core/llm_client/client.py b/graphiti_core/llm_client/client.py
index 9f7558c1..b06f870d 100644
--- a/graphiti_core/llm_client/client.py
+++ b/graphiti_core/llm_client/client.py
@@ -32,9 +32,19 @@ from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'
 
-MULTILINGUAL_EXTRACTION_RESPONSES = (
-    '\n\nAny extracted information should be returned in the same language as it was written in.'
-)
+
+def get_extraction_language_instruction() -> str:
+    """Returns instruction for language extraction behavior.
+
+    Override this function to customize language extraction:
+    - Return empty string to disable multilingual instructions
+    - Return custom instructions for specific language requirements
+
+    Returns:
+        str: Language instruction to append to system messages
+    """
+    return '\n\nAny extracted information should be returned in the same language as it was written in.'
+
 
 logger = logging.getLogger(__name__)
 
@@ -145,7 +155,7 @@ class LLMClient(ABC):
             )
 
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()
 
         if self.cache_enabled and self.cache_dir is not None:
             cache_key = self._get_cache_key(messages)
diff --git a/graphiti_core/llm_client/gemini_client.py b/graphiti_core/llm_client/gemini_client.py
index a8422dfd..f80b2f7e 100644
--- a/graphiti_core/llm_client/gemini_client.py
+++ b/graphiti_core/llm_client/gemini_client.py
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar
 from pydantic import BaseModel
 
 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import LLMConfig, ModelSize
 from .errors import RateLimitError
 
@@ -376,7 +376,7 @@ class GeminiClient(LLMClient):
         last_output = None
 
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()
 
         while retry_count < self.MAX_RETRIES:
             try:
diff --git a/graphiti_core/llm_client/openai_base_client.py b/graphiti_core/llm_client/openai_base_client.py
index e3332bc2..cdda179c 100644
--- a/graphiti_core/llm_client/openai_base_client.py
+++ b/graphiti_core/llm_client/openai_base_client.py
@@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
 
 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
 from .errors import RateLimitError, RefusalError
 
@@ -184,7 +184,7 @@ class BaseOpenAIClient(LLMClient):
         last_error = None
 
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()
 
         while retry_count <= self.MAX_RETRIES:
             try:
diff --git a/graphiti_core/llm_client/openai_generic_client.py b/graphiti_core/llm_client/openai_generic_client.py
index 571cf902..c4f4d212 100644
--- a/graphiti_core/llm_client/openai_generic_client.py
+++ b/graphiti_core/llm_client/openai_generic_client.py
@@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
 
 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
 from .errors import RateLimitError, RefusalError
 
@@ -136,7 +136,7 @@ class OpenAIGenericClient(LLMClient):
             )
 
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction()
 
         while retry_count <= self.MAX_RETRIES:
             try: