Merge pull request #1913 from danielaskdd/fix-base64-for-embedding
Feat: Change embedding formats from float to base64 for efficiency
This commit is contained in:
commit
793e82ae89
3 changed files with 52 additions and 10 deletions
17
env.example
17
env.example
|
|
@ -121,6 +121,16 @@ LLM_MODEL=gpt-4o
|
||||||
LLM_BINDING_HOST=https://api.openai.com/v1
|
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||||
LLM_BINDING_API_KEY=your_api_key
|
LLM_BINDING_API_KEY=your_api_key
|
||||||
|
|
||||||
|
### Optional for Azure
|
||||||
|
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||||
|
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
||||||
|
|
||||||
|
### Openrouter example
|
||||||
|
# LLM_MODEL=google/gemini-2.5-flash
|
||||||
|
# LLM_BINDING_HOST=https://openrouter.ai/api/v1
|
||||||
|
# LLM_BINDING_API_KEY=your_api_key
|
||||||
|
# LLM_BINDING=openai
|
||||||
|
|
||||||
### Most Commont Parameters for Ollama Server
|
### Most Commont Parameters for Ollama Server
|
||||||
### Time out in seconds, None for infinite timeout
|
### Time out in seconds, None for infinite timeout
|
||||||
TIMEOUT=240
|
TIMEOUT=240
|
||||||
|
|
@ -132,14 +142,11 @@ OLLAMA_LLM_NUM_CTX=32768
|
||||||
# OLLAMA_LLM_TEMPERATURE=0.85
|
# OLLAMA_LLM_TEMPERATURE=0.85
|
||||||
### see also env.ollama-binding-options.example for fine tuning ollama
|
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||||
|
|
||||||
### Optional for Azure
|
|
||||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
|
||||||
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
|
||||||
|
|
||||||
####################################################################################
|
####################################################################################
|
||||||
### Embedding Configuration (Should not be changed after the first file processed)
|
### Embedding Configuration (Should not be changed after the first file processed)
|
||||||
####################################################################################
|
####################################################################################
|
||||||
### Embedding Binding type: openai, ollama, lollms, azure_openai, jina
|
### Embedding Binding type: ollama, openai, azure_openai, jina, lollms
|
||||||
|
|
||||||
### see also env.ollama-binding-options.example for fine tuning ollama
|
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||||
EMBEDDING_BINDING=ollama
|
EMBEDDING_BINDING=ollama
|
||||||
|
|
@ -149,7 +156,7 @@ EMBEDDING_BINDING_API_KEY=your_api_key
|
||||||
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
||||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||||
|
|
||||||
### OpenAI compatible
|
### OpenAI compatible (VoyageAI embedding openai compatible)
|
||||||
# EMBEDDING_BINDING=openai
|
# EMBEDDING_BINDING=openai
|
||||||
# EMBEDDING_MODEL=text-embedding-3-large
|
# EMBEDDING_MODEL=text-embedding-3-large
|
||||||
# EMBEDDING_DIM=3072
|
# EMBEDDING_DIM=3072
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ if not pm.is_installed("tenacity"):
|
||||||
pm.install("tenacity")
|
pm.install("tenacity")
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import base64
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from tenacity import (
|
from tenacity import (
|
||||||
retry,
|
retry,
|
||||||
|
|
@ -23,12 +24,34 @@ async def fetch_data(url, headers, data):
|
||||||
async with session.post(url, headers=headers, json=data) as response:
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
error_text = await response.text()
|
error_text = await response.text()
|
||||||
logger.error(f"Jina API error {response.status}: {error_text}")
|
|
||||||
|
# Check if the error response is HTML (common for 502, 503, etc.)
|
||||||
|
content_type = response.headers.get("content-type", "").lower()
|
||||||
|
is_html_error = (
|
||||||
|
error_text.strip().startswith("<!DOCTYPE html>")
|
||||||
|
or "text/html" in content_type
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_html_error:
|
||||||
|
# Provide clean, user-friendly error messages for HTML error pages
|
||||||
|
if response.status == 502:
|
||||||
|
clean_error = "Bad Gateway (502) - Jina AI service temporarily unavailable. Please try again in a few minutes."
|
||||||
|
elif response.status == 503:
|
||||||
|
clean_error = "Service Unavailable (503) - Jina AI service is temporarily overloaded. Please try again later."
|
||||||
|
elif response.status == 504:
|
||||||
|
clean_error = "Gateway Timeout (504) - Jina AI service request timed out. Please try again."
|
||||||
|
else:
|
||||||
|
clean_error = f"HTTP {response.status} - Jina AI service error. Please try again later."
|
||||||
|
else:
|
||||||
|
# Use original error text if it's not HTML
|
||||||
|
clean_error = error_text
|
||||||
|
|
||||||
|
logger.error(f"Jina API error {response.status}: {clean_error}")
|
||||||
raise aiohttp.ClientResponseError(
|
raise aiohttp.ClientResponseError(
|
||||||
request_info=response.request_info,
|
request_info=response.request_info,
|
||||||
history=response.history,
|
history=response.history,
|
||||||
status=response.status,
|
status=response.status,
|
||||||
message=f"Jina API error: {error_text}",
|
message=f"Jina API error: {clean_error}",
|
||||||
)
|
)
|
||||||
response_json = await response.json()
|
response_json = await response.json()
|
||||||
data_list = response_json.get("data", [])
|
data_list = response_json.get("data", [])
|
||||||
|
|
@ -82,6 +105,7 @@ async def jina_embed(
|
||||||
"model": "jina-embeddings-v4",
|
"model": "jina-embeddings-v4",
|
||||||
"task": "text-matching",
|
"task": "text-matching",
|
||||||
"dimensions": dimensions,
|
"dimensions": dimensions,
|
||||||
|
"embedding_type": "base64",
|
||||||
"input": texts,
|
"input": texts,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -108,7 +132,12 @@ async def jina_embed(
|
||||||
f"Jina API returned {len(data_list)} embeddings for {len(texts)} texts"
|
f"Jina API returned {len(data_list)} embeddings for {len(texts)} texts"
|
||||||
)
|
)
|
||||||
|
|
||||||
embeddings = np.array([dp["embedding"] for dp in data_list])
|
embeddings = np.array(
|
||||||
|
[
|
||||||
|
np.frombuffer(base64.b64decode(dp["embedding"]), dtype=np.float32)
|
||||||
|
for dp in data_list
|
||||||
|
]
|
||||||
|
)
|
||||||
logger.debug(f"Jina embeddings generated: shape {embeddings.shape}")
|
logger.debug(f"Jina embeddings generated: shape {embeddings.shape}")
|
||||||
|
|
||||||
return embeddings
|
return embeddings
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ from lightrag.types import GPTKeywordExtractionFormat
|
||||||
from lightrag.api import __api_version__
|
from lightrag.api import __api_version__
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import base64
|
||||||
from typing import Any, Union
|
from typing import Any, Union
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
@ -472,6 +473,11 @@ async def openai_embed(
|
||||||
|
|
||||||
async with openai_async_client:
|
async with openai_async_client:
|
||||||
response = await openai_async_client.embeddings.create(
|
response = await openai_async_client.embeddings.create(
|
||||||
model=model, input=texts, encoding_format="float"
|
model=model, input=texts, encoding_format="base64"
|
||||||
|
)
|
||||||
|
return np.array(
|
||||||
|
[
|
||||||
|
np.frombuffer(base64.b64decode(dp.embedding), dtype=np.float32)
|
||||||
|
for dp in response.data
|
||||||
|
]
|
||||||
)
|
)
|
||||||
return np.array([dp.embedding for dp in response.data])
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue