cognee/cognee/tasks/translation/providers/openai_provider.py
andikarachman 00e318b3ed test: add comprehensive translation module tests
- Add unit tests for translation configuration, language detection, providers, and translate_content task
- Add integration tests for full cognify pipeline with translation
- All 40 tests passing (32 unit + 8 integration)
- Tests use asyncio.run() pattern matching project style
- Tests named with *_test.py suffix per project convention
- Update README with test documentation

Formatting changes:
- Apply ruff format to cognify.py (bracket placement style)

Signed-off-by: andikarachman <andika.rachman.y@gmail.com>
2026-01-13 18:46:01 +07:00

105 lines
3.3 KiB
Python

import asyncio
from typing import Optional
from pydantic import BaseModel
from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.shared.logging_utils import get_logger
from .base import TranslationProvider, TranslationResult
logger = get_logger(__name__)
class TranslationOutput(BaseModel):
"""Pydantic model for structured translation output from LLM."""
translated_text: str
detected_source_language: str
translation_notes: Optional[str] = None
class OpenAITranslationProvider(TranslationProvider):
"""
Translation provider using OpenAI's LLM for translation.
This provider leverages the existing LLM infrastructure in Cognee
to perform translations using GPT models.
"""
@property
def provider_name(self) -> str:
return "openai"
async def translate(
self,
text: str,
target_language: str = "en",
source_language: Optional[str] = None,
) -> TranslationResult:
"""
Translate text using OpenAI's LLM.
Args:
text: The text to translate
target_language: Target language code (default: "en")
source_language: Source language code (optional)
Returns:
TranslationResult with translated text and metadata
"""
try:
system_prompt = read_query_prompt("translate_content.txt")
# Build the input with context
if source_language:
input_text = (
f"Translate the following text from {source_language} to {target_language}.\n\n"
f"Text to translate:\n{text}"
)
else:
input_text = (
f"Translate the following text to {target_language}. "
f"First detect the source language.\n\n"
f"Text to translate:\n{text}"
)
result = await LLMGateway.acreate_structured_output(
text_input=input_text,
system_prompt=system_prompt,
response_model=TranslationOutput,
)
return TranslationResult(
translated_text=result.translated_text,
source_language=source_language or result.detected_source_language,
target_language=target_language,
confidence_score=0.95, # LLM translations are generally high quality
provider=self.provider_name,
raw_response={"notes": result.translation_notes},
)
except Exception as e:
logger.error(f"OpenAI translation failed: {e}")
raise
async def translate_batch(
self,
texts: list[str],
target_language: str = "en",
source_language: Optional[str] = None,
) -> list[TranslationResult]:
"""
Translate multiple texts using OpenAI's LLM.
Args:
texts: List of texts to translate
target_language: Target language code
source_language: Source language code (optional)
Returns:
List of TranslationResult objects
"""
tasks = [self.translate(text, target_language, source_language) for text in texts]
return await asyncio.gather(*tasks)