From c5babf61d72c4e3f681fda85761ef98d8f789297 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 5 Aug 2025 11:38:40 +0800 Subject: [PATCH] Feat: Change embedding formats from float to base64 for efficiency - Add base64 support for Jina embeddings - Add base64 support for OpenAI embeddings - Update env.example with new embedding options --- env.example | 17 ++++++++++++----- lightrag/llm/jina.py | 9 ++++++++- lightrag/llm/openai.py | 10 ++++++++-- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/env.example b/env.example index 3967e00e..fae8122a 100644 --- a/env.example +++ b/env.example @@ -121,6 +121,16 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key +### Optional for Azure +# AZURE_OPENAI_API_VERSION=2024-08-01-preview +# AZURE_OPENAI_DEPLOYMENT=gpt-4o + +### Openrouter example +# LLM_MODEL=google/gemini-2.5-flash +# LLM_BINDING_HOST=https://openrouter.ai/api/v1 +# LLM_BINDING_API_KEY=your_api_key +# LLM_BINDING=openai + ### Most Commont Parameters for Ollama Server ### Time out in seconds, None for infinite timeout TIMEOUT=240 @@ -132,14 +142,11 @@ OLLAMA_LLM_NUM_CTX=32768 # OLLAMA_LLM_TEMPERATURE=0.85 ### see also env.ollama-binding-options.example for fine tuning ollama -### Optional for Azure -# AZURE_OPENAI_API_VERSION=2024-08-01-preview -# AZURE_OPENAI_DEPLOYMENT=gpt-4o #################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) #################################################################################### -### Embedding Binding type: openai, ollama, lollms, azure_openai, jina +### Embedding Binding type: ollama, openai, azure_openai, jina, lollms ### see also env.ollama-binding-options.example for fine tuning ollama EMBEDDING_BINDING=ollama @@ -149,7 +156,7 @@ EMBEDDING_BINDING_API_KEY=your_api_key # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost EMBEDDING_BINDING_HOST=http://localhost:11434 -### OpenAI compatible +### OpenAI compatible (VoyageAI embedding openai compatible) # EMBEDDING_BINDING=openai # EMBEDDING_MODEL=text-embedding-3-large # EMBEDDING_DIM=3072 diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index 5a1b59fb..af26f1bd 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -8,6 +8,7 @@ if not pm.is_installed("tenacity"): pm.install("tenacity") import numpy as np +import base64 import aiohttp from tenacity import ( retry, @@ -82,6 +83,7 @@ async def jina_embed( "model": "jina-embeddings-v4", "task": "text-matching", "dimensions": dimensions, + "embedding_type": "base64", "input": texts, } @@ -108,7 +110,12 @@ async def jina_embed( f"Jina API returned {len(data_list)} embeddings for {len(texts)} texts" ) - embeddings = np.array([dp["embedding"] for dp in data_list]) + embeddings = np.array( + [ + np.frombuffer(base64.b64decode(dp["embedding"]), dtype=np.float32) + for dp in data_list + ] + ) logger.debug(f"Jina embeddings generated: shape {embeddings.shape}") return embeddings diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 981669b6..5a52e767 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -34,6 +34,7 @@ from lightrag.types import GPTKeywordExtractionFormat from lightrag.api import __api_version__ import numpy as np +import base64 from typing import Any, Union from dotenv import load_dotenv @@ -472,6 +473,11 @@ async def openai_embed( async with openai_async_client: response = await openai_async_client.embeddings.create( - model=model, input=texts, encoding_format="float" + model=model, input=texts, encoding_format="base64" + ) + return np.array( + [ + np.frombuffer(base64.b64decode(dp.embedding), dtype=np.float32) + for dp in response.data + ] ) - return np.array([dp.embedding for dp in response.data])