Extract entities and facts in their language (#351)

* Extract entities and facts in their language

* linter
This commit is contained in:
Preston Rasmussen 2025-04-14 16:28:59 -04:00 committed by GitHub
parent 5dce26722e
commit ed26852531
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 15 additions and 2 deletions

View file

@ -32,6 +32,10 @@ from .errors import RateLimitError
DEFAULT_TEMPERATURE = 0
DEFAULT_CACHE_DIR = './llm_cache'
MULTILINGUAL_EXTRACTION_RESPONSES = (
'\n\nAny extracted information should be returned in the same language as it was written in.'
)
logger = logging.getLogger(__name__)
@ -133,6 +137,9 @@ class LLMClient(ABC):
f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
)
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
if self.cache_enabled and self.cache_dir is not None:
cache_key = self._get_cache_key(messages)

View file

@ -24,7 +24,7 @@ from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from ..prompts.models import Message
from .client import LLMClient
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
from .config import DEFAULT_MAX_TOKENS, LLMConfig
from .errors import RateLimitError, RefusalError
@ -136,6 +136,9 @@ class OpenAIClient(LLMClient):
retry_count = 0
last_error = None
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
while retry_count <= self.MAX_RETRIES:
try:
response = await self._generate_response(messages, response_model, max_tokens)

View file

@ -25,7 +25,7 @@ from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from ..prompts.models import Message
from .client import LLMClient
from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
from .config import DEFAULT_MAX_TOKENS, LLMConfig
from .errors import RateLimitError, RefusalError
@ -130,6 +130,9 @@ class OpenAIGenericClient(LLMClient):
f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
)
# Add multilingual extraction instructions
messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
while retry_count <= self.MAX_RETRIES:
try:
response = await self._generate_response(