Extract entities and facts in their language (#351)

* Extract entities and facts in their language * linter
2025-04-14 16:28:59 -04:00 · 2025-04-14 16:28:59 -04:00 · ed26852531
commit ed26852531
parent 5dce26722e
3 changed files with 15 additions and 2 deletions
--- a/graphiti_core/llm_client/client.py
+++ b/graphiti_core/llm_client/client.py
@ -32,6 +32,10 @@ from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'

+MULTILINGUAL_EXTRACTION_RESPONSES = (
+    '\n\nAny extracted information should be returned in the same language as it was written in.'
+)
+
 logger = logging.getLogger(__name__)


@ -133,6 +137,9 @@ class LLMClient(ABC):
                f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
            )

+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+
        if self.cache_enabled and self.cache_dir is not None:
            cache_key = self._get_cache_key(messages)

--- a/graphiti_core/llm_client/openai_client.py
+++ b/graphiti_core/llm_client/openai_client.py
@ -24,7 +24,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel

 from ..prompts.models import Message
-from .client import LLMClient
+from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
 from .config import DEFAULT_MAX_TOKENS, LLMConfig
 from .errors import RateLimitError, RefusalError

@ -136,6 +136,9 @@ class OpenAIClient(LLMClient):
        retry_count = 0
        last_error = None

+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+
        while retry_count <= self.MAX_RETRIES:
            try:
                response = await self._generate_response(messages, response_model, max_tokens)
--- a/graphiti_core/llm_client/openai_generic_client.py
+++ b/graphiti_core/llm_client/openai_generic_client.py
@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel

 from ..prompts.models import Message
-from .client import LLMClient
+from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
 from .config import DEFAULT_MAX_TOKENS, LLMConfig
 from .errors import RateLimitError, RefusalError

@ -130,6 +130,9 @@ class OpenAIGenericClient(LLMClient):
                f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
            )

+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+
        while retry_count <= self.MAX_RETRIES:
            try:
                response = await self._generate_response(