From c5babf61d72c4e3f681fda85761ef98d8f789297 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 5 Aug 2025 11:38:40 +0800 Subject: [PATCH 1/2] Feat: Change embedding formats from float to base64 for efficiency - Add base64 support for Jina embeddings - Add base64 support for OpenAI embeddings - Update env.example with new embedding options --- env.example | 17 ++++++++++++----- lightrag/llm/jina.py | 9 ++++++++- lightrag/llm/openai.py | 10 ++++++++-- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/env.example b/env.example index 3967e00e..fae8122a 100644 --- a/env.example +++ b/env.example @@ -121,6 +121,16 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key +### Optional for Azure +# AZURE_OPENAI_API_VERSION=2024-08-01-preview +# AZURE_OPENAI_DEPLOYMENT=gpt-4o + +### Openrouter example +# LLM_MODEL=google/gemini-2.5-flash +# LLM_BINDING_HOST=https://openrouter.ai/api/v1 +# LLM_BINDING_API_KEY=your_api_key +# LLM_BINDING=openai + ### Most Commont Parameters for Ollama Server ### Time out in seconds, None for infinite timeout TIMEOUT=240 @@ -132,14 +142,11 @@ OLLAMA_LLM_NUM_CTX=32768 # OLLAMA_LLM_TEMPERATURE=0.85 ### see also env.ollama-binding-options.example for fine tuning ollama -### Optional for Azure -# AZURE_OPENAI_API_VERSION=2024-08-01-preview -# AZURE_OPENAI_DEPLOYMENT=gpt-4o #################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) #################################################################################### -### Embedding Binding type: openai, ollama, lollms, azure_openai, jina +### Embedding Binding type: ollama, openai, azure_openai, jina, lollms ### see also env.ollama-binding-options.example for fine tuning ollama EMBEDDING_BINDING=ollama @@ -149,7 +156,7 @@ EMBEDDING_BINDING_API_KEY=your_api_key # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost EMBEDDING_BINDING_HOST=http://localhost:11434 -### OpenAI compatible +### OpenAI compatible (VoyageAI embedding openai compatible) # EMBEDDING_BINDING=openai # EMBEDDING_MODEL=text-embedding-3-large # EMBEDDING_DIM=3072 diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index 5a1b59fb..af26f1bd 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -8,6 +8,7 @@ if not pm.is_installed("tenacity"): pm.install("tenacity") import numpy as np +import base64 import aiohttp from tenacity import ( retry, @@ -82,6 +83,7 @@ async def jina_embed( "model": "jina-embeddings-v4", "task": "text-matching", "dimensions": dimensions, + "embedding_type": "base64", "input": texts, } @@ -108,7 +110,12 @@ async def jina_embed( f"Jina API returned {len(data_list)} embeddings for {len(texts)} texts" ) - embeddings = np.array([dp["embedding"] for dp in data_list]) + embeddings = np.array( + [ + np.frombuffer(base64.b64decode(dp["embedding"]), dtype=np.float32) + for dp in data_list + ] + ) logger.debug(f"Jina embeddings generated: shape {embeddings.shape}") return embeddings diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 981669b6..5a52e767 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -34,6 +34,7 @@ from lightrag.types import GPTKeywordExtractionFormat from lightrag.api import __api_version__ import numpy as np +import base64 from typing import Any, Union from dotenv import load_dotenv @@ -472,6 +473,11 @@ async def openai_embed( async with openai_async_client: response = await openai_async_client.embeddings.create( - model=model, input=texts, encoding_format="float" + model=model, input=texts, encoding_format="base64" + ) + return np.array( + [ + np.frombuffer(base64.b64decode(dp.embedding), dtype=np.float32) + for dp in response.data + ] ) - return np.array([dp.embedding for dp in response.data]) From 6ff25210ea90b4f9c3309188a7fd4181b3c3eaf2 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 5 Aug 2025 11:46:02 +0800 Subject: [PATCH 2/2] feat: improve Jina API error handling to show clean messages instead of HTML --- lightrag/llm/jina.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index af26f1bd..5077ee25 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -24,12 +24,34 @@ async def fetch_data(url, headers, data): async with session.post(url, headers=headers, json=data) as response: if response.status != 200: error_text = await response.text() - logger.error(f"Jina API error {response.status}: {error_text}") + + # Check if the error response is HTML (common for 502, 503, etc.) + content_type = response.headers.get("content-type", "").lower() + is_html_error = ( + error_text.strip().startswith("") + or "text/html" in content_type + ) + + if is_html_error: + # Provide clean, user-friendly error messages for HTML error pages + if response.status == 502: + clean_error = "Bad Gateway (502) - Jina AI service temporarily unavailable. Please try again in a few minutes." + elif response.status == 503: + clean_error = "Service Unavailable (503) - Jina AI service is temporarily overloaded. Please try again later." + elif response.status == 504: + clean_error = "Gateway Timeout (504) - Jina AI service request timed out. Please try again." + else: + clean_error = f"HTTP {response.status} - Jina AI service error. Please try again later." + else: + # Use original error text if it's not HTML + clean_error = error_text + + logger.error(f"Jina API error {response.status}: {clean_error}") raise aiohttp.ClientResponseError( request_info=response.request_info, history=response.history, status=response.status, - message=f"Jina API error: {error_text}", + message=f"Jina API error: {clean_error}", ) response_json = await response.json() data_list = response_json.get("data", [])