diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index af26f1bd..5077ee25 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -24,12 +24,34 @@ async def fetch_data(url, headers, data): async with session.post(url, headers=headers, json=data) as response: if response.status != 200: error_text = await response.text() - logger.error(f"Jina API error {response.status}: {error_text}") + + # Check if the error response is HTML (common for 502, 503, etc.) + content_type = response.headers.get("content-type", "").lower() + is_html_error = ( + error_text.strip().startswith("") + or "text/html" in content_type + ) + + if is_html_error: + # Provide clean, user-friendly error messages for HTML error pages + if response.status == 502: + clean_error = "Bad Gateway (502) - Jina AI service temporarily unavailable. Please try again in a few minutes." + elif response.status == 503: + clean_error = "Service Unavailable (503) - Jina AI service is temporarily overloaded. Please try again later." + elif response.status == 504: + clean_error = "Gateway Timeout (504) - Jina AI service request timed out. Please try again." + else: + clean_error = f"HTTP {response.status} - Jina AI service error. Please try again later." + else: + # Use original error text if it's not HTML + clean_error = error_text + + logger.error(f"Jina API error {response.status}: {clean_error}") raise aiohttp.ClientResponseError( request_info=response.request_info, history=response.history, status=response.status, - message=f"Jina API error: {error_text}", + message=f"Jina API error: {clean_error}", ) response_json = await response.json() data_list = response_json.get("data", [])