feat: improve Jina API error handling to show clean messages instead of HTML

This commit is contained in:
yangdx 2025-08-05 11:46:02 +08:00
parent c5babf61d7
commit 6ff25210ea

View file

@ -24,12 +24,34 @@ async def fetch_data(url, headers, data):
async with session.post(url, headers=headers, json=data) as response:
if response.status != 200:
error_text = await response.text()
logger.error(f"Jina API error {response.status}: {error_text}")
# Check if the error response is HTML (common for 502, 503, etc.)
content_type = response.headers.get("content-type", "").lower()
is_html_error = (
error_text.strip().startswith("<!DOCTYPE html>")
or "text/html" in content_type
)
if is_html_error:
# Provide clean, user-friendly error messages for HTML error pages
if response.status == 502:
clean_error = "Bad Gateway (502) - Jina AI service temporarily unavailable. Please try again in a few minutes."
elif response.status == 503:
clean_error = "Service Unavailable (503) - Jina AI service is temporarily overloaded. Please try again later."
elif response.status == 504:
clean_error = "Gateway Timeout (504) - Jina AI service request timed out. Please try again."
else:
clean_error = f"HTTP {response.status} - Jina AI service error. Please try again later."
else:
# Use original error text if it's not HTML
clean_error = error_text
logger.error(f"Jina API error {response.status}: {clean_error}")
raise aiohttp.ClientResponseError(
request_info=response.request_info,
history=response.history,
status=response.status,
message=f"Jina API error: {error_text}",
message=f"Jina API error: {clean_error}",
)
response_json = await response.json()
data_list = response_json.get("data", [])