cognee/cognee/tasks/translation/config.py
andikarachman b6aa33f343 refactor: rename OpenAI translation provider to LLM provider
- Rename OpenAITranslationProvider to LLMTranslationProvider
- Rename openai_provider.py to llm_provider.py
- Change provider type from 'openai' to 'llm' in TranslationProviderType
- Update all test files to use 'llm' provider and has_llm_api_key()
- Add AliasChoices for explicit env var mapping in TranslationConfig
- Update translate_content.py to fallback to config.target_language
- Update cognify.py docstrings to reference 'llm' provider
- Update .env.template and test README documentation

The LLM provider now uses whatever LLM is configured in cognee
(OpenAI, Azure, Ollama, Anthropic, etc.) instead of being tied to OpenAI.
2026-01-13 18:52:04 +07:00

110 lines
4 KiB
Python

from functools import lru_cache
from typing import Literal, Optional
from pydantic import AliasChoices, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
TranslationProviderType = Literal["llm", "google", "azure"]
class TranslationConfig(BaseSettings):
"""
Configuration settings for the translation task.
Environment variables can be used to configure these settings:
- TRANSLATION_PROVIDER: The translation service to use ("llm", "google", "azure")
- TARGET_LANGUAGE: Default target language (ISO 639-1 code, e.g., "en", "es", "fr")
- CONFIDENCE_THRESHOLD: Minimum confidence for language detection (0.0 to 1.0)
- GOOGLE_TRANSLATE_API_KEY: API key for Google Translate
- GOOGLE_PROJECT_ID: Google Cloud project ID
- AZURE_TRANSLATOR_KEY: API key for Azure Translator
- AZURE_TRANSLATOR_REGION: Region for Azure Translator
- AZURE_TRANSLATOR_ENDPOINT: Endpoint URL for Azure Translator
- TRANSLATION_BATCH_SIZE: Number of texts to translate per batch
- TRANSLATION_MAX_RETRIES: Maximum retry attempts on failure
- TRANSLATION_TIMEOUT_SECONDS: Request timeout in seconds
"""
# Translation provider settings
translation_provider: TranslationProviderType = Field(
default="llm",
validation_alias=AliasChoices("TRANSLATION_PROVIDER", "translation_provider"),
)
target_language: str = Field(
default="en",
validation_alias=AliasChoices("TARGET_LANGUAGE", "target_language"),
)
confidence_threshold: float = Field(
default=0.8,
ge=0.0,
le=1.0,
validation_alias=AliasChoices("CONFIDENCE_THRESHOLD", "confidence_threshold"),
)
# Google Translate settings
google_translate_api_key: Optional[str] = Field(
default=None,
validation_alias=AliasChoices("GOOGLE_TRANSLATE_API_KEY", "google_translate_api_key"),
)
google_project_id: Optional[str] = Field(
default=None,
validation_alias=AliasChoices("GOOGLE_PROJECT_ID", "google_project_id"),
)
# Azure Translator settings
azure_translator_key: Optional[str] = Field(
default=None,
validation_alias=AliasChoices("AZURE_TRANSLATOR_KEY", "azure_translator_key"),
)
azure_translator_region: Optional[str] = Field(
default=None,
validation_alias=AliasChoices("AZURE_TRANSLATOR_REGION", "azure_translator_region"),
)
azure_translator_endpoint: str = Field(
default="https://api.cognitive.microsofttranslator.com",
validation_alias=AliasChoices("AZURE_TRANSLATOR_ENDPOINT", "azure_translator_endpoint"),
)
# LLM provider uses the existing LLM configuration
# Performance settings (with TRANSLATION_ prefix for env vars)
batch_size: int = Field(
default=10,
validation_alias=AliasChoices("TRANSLATION_BATCH_SIZE", "batch_size"),
)
max_retries: int = Field(
default=3,
validation_alias=AliasChoices("TRANSLATION_MAX_RETRIES", "max_retries"),
)
timeout_seconds: int = Field(
default=30,
validation_alias=AliasChoices("TRANSLATION_TIMEOUT_SECONDS", "timeout_seconds"),
)
# Language detection settings
min_text_length_for_detection: int = 10
skip_detection_for_short_text: bool = True
model_config = SettingsConfigDict(env_file=".env", extra="allow")
def to_dict(self) -> dict:
return {
"translation_provider": self.translation_provider,
"target_language": self.target_language,
"confidence_threshold": self.confidence_threshold,
"batch_size": self.batch_size,
"max_retries": self.max_retries,
"timeout_seconds": self.timeout_seconds,
}
@lru_cache()
def get_translation_config() -> TranslationConfig:
"""Get the translation configuration singleton."""
return TranslationConfig()
def clear_translation_config_cache():
"""Clear the cached config for testing purposes."""
get_translation_config.cache_clear()