cognee/cognee/tasks/translation/providers/google_provider.py
andikarachman 69c25b43d7 refactor: address code review feedback
- Made is_available() abstract in base.py with proper implementation in providers
- Added original_error parameter to UnsupportedLanguageError and TranslationConfigError
- Added Field validation for confidence_threshold bounds (0.0-1.0)
- Changed @lru_cache to @lru_cache() for explicit style
- Added get_translation_provider to __all__ in providers/__init__.py
- Replaced deprecated asyncio.get_event_loop() with get_running_loop()
- Added debug logging to is_available() in GoogleTranslationProvider
- Added TODO comment for confidence score improvement in OpenAIProvider
- Added None check for read_query_prompt() with fallback default prompt
- Moved ClientSession outside batch loop in AzureTranslationProvider
- Fixed Optional[float] type annotation in detect_language()
- Added Note section documenting in-place mutation in translate_content()
- Added test_confidence_threshold_validation() for bounds testing
- Added descriptive assertion messages to config tests
- Converted all async tests to use @pytest.mark.asyncio decorators
- Replaced manual skip checks with @pytest.mark.skipif
- Removed manual main() blocks, tests now pytest-only
- Changed Chinese language assertion to use startswith('zh') for flexibility
2026-01-04 12:06:50 +07:00

160 lines
5.2 KiB
Python

import asyncio
from typing import Optional
from cognee.shared.logging_utils import get_logger
from .base import TranslationProvider, TranslationResult
from ..config import get_translation_config
logger = get_logger(__name__)
class GoogleTranslationProvider(TranslationProvider):
"""
Translation provider using Google Cloud Translation API.
Requires:
- google-cloud-translate package
- GOOGLE_TRANSLATE_API_KEY or GOOGLE_PROJECT_ID environment variable
"""
def __init__(self):
self._client = None
self._config = get_translation_config()
@property
def provider_name(self) -> str:
return "google"
def _get_client(self):
"""Lazy initialization of Google Translate client."""
if self._client is None:
try:
from google.cloud import translate_v2 as translate
self._client = translate.Client()
except ImportError:
raise ImportError(
"google-cloud-translate is required for Google translation. "
"Install it with: pip install google-cloud-translate"
)
except Exception as e:
logger.error(f"Failed to initialize Google Translate client: {e}")
raise
return self._client
def is_available(self) -> bool:
"""Check if Google Translate is available."""
try:
self._get_client()
return True
except Exception as e:
logger.debug(f"Google Translate not available: {e}")
return False
async def translate(
self,
text: str,
target_language: str = "en",
source_language: Optional[str] = None,
) -> TranslationResult:
"""
Translate text using Google Translate API.
Args:
text: The text to translate
target_language: Target language code (default: "en")
source_language: Source language code (optional)
Returns:
TranslationResult with translated text and metadata
"""
try:
client = self._get_client()
# Run in thread pool since google-cloud-translate is synchronous
loop = asyncio.get_running_loop()
if source_language:
result = await loop.run_in_executor(
None,
lambda: client.translate(
text, target_language=target_language, source_language=source_language
),
)
else:
result = await loop.run_in_executor(
None, lambda: client.translate(text, target_language=target_language)
)
detected_language = result.get("detectedSourceLanguage", source_language or "unknown")
return TranslationResult(
translated_text=result["translatedText"],
source_language=detected_language,
target_language=target_language,
confidence_score=0.9, # Google Translate is generally reliable
provider=self.provider_name,
raw_response=result,
)
except Exception as e:
logger.error(f"Google translation failed: {e}")
raise
async def translate_batch(
self,
texts: list[str],
target_language: str = "en",
source_language: Optional[str] = None,
) -> list[TranslationResult]:
"""
Translate multiple texts using Google Translate API.
Google Translate supports batch translation natively.
Args:
texts: List of texts to translate
target_language: Target language code
source_language: Source language code (optional)
Returns:
List of TranslationResult objects
"""
try:
client = self._get_client()
loop = asyncio.get_running_loop()
if source_language:
results = await loop.run_in_executor(
None,
lambda: client.translate(
texts, target_language=target_language, source_language=source_language
),
)
else:
results = await loop.run_in_executor(
None, lambda: client.translate(texts, target_language=target_language)
)
translation_results = []
for result in results:
detected_language = result.get(
"detectedSourceLanguage", source_language or "unknown"
)
translation_results.append(
TranslationResult(
translated_text=result["translatedText"],
source_language=detected_language,
target_language=target_language,
confidence_score=0.9,
provider=self.provider_name,
raw_response=result,
)
)
return translation_results
except Exception as e:
logger.error(f"Google batch translation failed: {e}")
raise