cognee/cognee/tests/tasks/translation/integration_test.py
andikarachman 9dd295821d refactor: address code review feedback
- Made is_available() abstract in base.py with proper implementation in providers
- Added original_error parameter to UnsupportedLanguageError and TranslationConfigError
- Added Field validation for confidence_threshold bounds (0.0-1.0)
- Changed @lru_cache to @lru_cache() for explicit style
- Added get_translation_provider to __all__ in providers/__init__.py
- Replaced deprecated asyncio.get_event_loop() with get_running_loop()
- Added debug logging to is_available() in GoogleTranslationProvider
- Added TODO comment for confidence score improvement in OpenAIProvider
- Added None check for read_query_prompt() with fallback default prompt
- Moved ClientSession outside batch loop in AzureTranslationProvider
- Fixed Optional[float] type annotation in detect_language()
- Added Note section documenting in-place mutation in translate_content()
- Added test_confidence_threshold_validation() for bounds testing
- Added descriptive assertion messages to config tests
- Converted all async tests to use @pytest.mark.asyncio decorators
- Replaced manual skip checks with @pytest.mark.skipif
- Removed manual main() blocks, tests now pytest-only
- Changed Chinese language assertion to use startswith('zh') for flexibility
2026-01-03 09:22:10 +07:00

210 lines
6.5 KiB
Python

"""
Integration tests for multilingual content translation feature.
Tests the full cognify pipeline with translation enabled.
"""
import os
import pytest
from cognee import add, cognify, prune, search, SearchType
from cognee.tasks.translation import translate_text
from cognee.tasks.translation.detect_language import detect_language_async
def has_openai_key():
"""Check if OpenAI API key is available"""
return bool(os.environ.get("LLM_API_KEY") or os.environ.get("OPENAI_API_KEY"))
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_quick_translation():
"""Quick smoke test for translation feature"""
await prune.prune_data()
await prune.prune_system(metadata=True)
spanish_text = "La inteligencia artificial está transformando el mundo de la tecnología."
await add(spanish_text, dataset_name="spanish_test")
result = await cognify(
datasets=["spanish_test"],
auto_translate=True,
target_language="en",
translation_provider="openai",
)
assert result is not None
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_translation_basic():
"""Test basic translation functionality with English text"""
await prune.prune_data()
await prune.prune_system(metadata=True)
english_text = "Hello, this is a test document about artificial intelligence."
await add(english_text, dataset_name="test_english")
result = await cognify(
datasets=["test_english"],
auto_translate=True,
target_language="en",
translation_provider="openai",
)
assert result is not None
search_results = await search(
query_text="What is this document about?",
query_type=SearchType.SUMMARIES,
)
assert search_results is not None
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_translation_spanish():
"""Test translation with Spanish text"""
await prune.prune_data()
await prune.prune_system(metadata=True)
spanish_text = """
La inteligencia artificial es una rama de la informática que se centra en
crear sistemas capaces de realizar tareas que normalmente requieren inteligencia humana.
Estos sistemas pueden aprender, razonar y resolver problemas complejos.
"""
await add(spanish_text, dataset_name="test_spanish")
result = await cognify(
datasets=["test_spanish"],
auto_translate=True,
target_language="en",
translation_provider="openai",
)
assert result is not None
search_results = await search(
query_text="What is artificial intelligence?",
query_type=SearchType.SUMMARIES,
)
assert search_results is not None
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_translation_french():
"""Test translation with French text"""
await prune.prune_data()
await prune.prune_system(metadata=True)
french_text = """
L'apprentissage automatique est une méthode d'analyse de données qui
automatise la construction de modèles analytiques. C'est une branche
de l'intelligence artificielle basée sur l'idée que les systèmes peuvent
apprendre à partir de données, identifier des modèles et prendre des décisions.
"""
await add(french_text, dataset_name="test_french")
result = await cognify(
datasets=["test_french"],
auto_translate=True,
target_language="en",
)
assert result is not None
search_results = await search(
query_text="What is machine learning?",
query_type=SearchType.SUMMARIES,
)
assert search_results is not None
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_translation_disabled():
"""Test that cognify works without translation"""
await prune.prune_data()
await prune.prune_system(metadata=True)
text = "This is a baseline test without translation enabled."
await add(text, dataset_name="test_baseline")
result = await cognify(
datasets=["test_baseline"],
auto_translate=False,
)
assert result is not None
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_translation_mixed_languages():
"""Test with multiple documents in different languages"""
await prune.prune_data()
await prune.prune_system(metadata=True)
texts = [
"Artificial intelligence is transforming the world.",
"La tecnología está cambiando nuestras vidas.",
"Les ordinateurs deviennent de plus en plus puissants.",
]
for text in texts:
await add(text, dataset_name="test_mixed")
result = await cognify(
datasets=["test_mixed"],
auto_translate=True,
target_language="en",
)
assert result is not None
search_results = await search(
query_text="What topics are discussed?",
query_type=SearchType.SUMMARIES,
)
assert search_results is not None
@pytest.mark.asyncio
@pytest.mark.skipif(not has_openai_key(), reason="No OpenAI API key available")
async def test_direct_translation_function():
"""Test the translate_text convenience function directly"""
result = await translate_text(
text="Hola, ¿cómo estás? Espero que tengas un buen día.",
target_language="en",
translation_provider="openai",
)
assert result.translated_text is not None
assert result.translated_text != ""
assert result.target_language == "en"
assert result.provider == "openai"
@pytest.mark.asyncio
async def test_language_detection():
"""Test language detection directly"""
test_texts = [
("Hello world, how are you doing today?", "en", False),
("Bonjour le monde, comment allez-vous aujourd'hui?", "en", True),
("Hola mundo, cómo estás hoy?", "en", True),
("This is already in English language", "en", False),
]
for text, target_lang, should_translate in test_texts:
result = await detect_language_async(text, target_lang)
assert result.language_code is not None
assert result.confidence > 0.0
# Only check requires_translation for high-confidence detections
if result.confidence > 0.8:
assert result.requires_translation == should_translate