chore/support-voyageai-embed-directly: feat: voyageai embed support
This commit is contained in:
parent
46ce6d9a13
commit
45700cdf76
4 changed files with 199 additions and 111 deletions
|
|
@ -316,8 +316,9 @@ def create_app(args):
|
|||
"aws_bedrock",
|
||||
"jina",
|
||||
"gemini",
|
||||
"voyageai",
|
||||
]:
|
||||
raise Exception("embedding binding not supported")
|
||||
raise Exception(f"embedding binding '{args.embedding_binding}' not supported")
|
||||
|
||||
# Set default hosts if not provided
|
||||
if args.llm_binding_host is None:
|
||||
|
|
@ -687,7 +688,10 @@ def create_app(args):
|
|||
from lightrag.llm.lollms import lollms_embed
|
||||
|
||||
provider_func = lollms_embed
|
||||
elif binding == "voyageai":
|
||||
from lightrag.llm.voyageai import voyageai_embed
|
||||
|
||||
provider_func = voyageai_embed
|
||||
# Extract attributes if provider is an EmbeddingFunc
|
||||
if provider_func and isinstance(provider_func, EmbeddingFunc):
|
||||
provider_max_token_size = provider_func.max_token_size
|
||||
|
|
@ -806,6 +810,20 @@ def create_app(args):
|
|||
embedding_dim=embedding_dim,
|
||||
task_type=gemini_options.get("task_type", "RETRIEVAL_DOCUMENT"),
|
||||
)
|
||||
elif binding == "voyageai":
|
||||
from lightrag.llm.voyageai import voyageai_embed
|
||||
|
||||
actual_func = (
|
||||
voyageai_embed.func
|
||||
if isinstance(voyageai_embed, EmbeddingFunc)
|
||||
else voyageai_embed
|
||||
)
|
||||
return await actual_func(
|
||||
texts,
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
embedding_dim=embedding_dim,
|
||||
)
|
||||
else: # openai and compatible
|
||||
from lightrag.llm.openai import openai_embed
|
||||
|
||||
|
|
@ -817,7 +835,6 @@ def create_app(args):
|
|||
return await actual_func(
|
||||
texts,
|
||||
model=model,
|
||||
base_url=host,
|
||||
api_key=api_key,
|
||||
embedding_dim=embedding_dim,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ from ..utils import verbose_debug, VERBOSE_DEBUG
|
|||
import sys
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
from typing import Any, Union, AsyncIterator
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
|
|
@ -15,11 +14,6 @@ else:
|
|||
if not pm.is_installed("anthropic"):
|
||||
pm.install("anthropic")
|
||||
|
||||
# Add Voyage AI import
|
||||
if not pm.is_installed("voyageai"):
|
||||
pm.install("voyageai")
|
||||
import voyageai
|
||||
|
||||
from anthropic import (
|
||||
AsyncAnthropic,
|
||||
APIConnectionError,
|
||||
|
|
@ -230,104 +224,3 @@ async def claude_3_haiku_complete(
|
|||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
# Embedding function (placeholder, as Anthropic does not provide embeddings)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
retry=retry_if_exception_type(
|
||||
(RateLimitError, APIConnectionError, APITimeoutError)
|
||||
),
|
||||
)
|
||||
async def anthropic_embed(
|
||||
texts: list[str],
|
||||
model: str = "voyage-3", # Default to voyage-3 as a good general-purpose model
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings using Voyage AI since Anthropic doesn't provide native embedding support.
|
||||
|
||||
Args:
|
||||
texts: List of text strings to embed
|
||||
model: Voyage AI model name (e.g., "voyage-3", "voyage-3-large", "voyage-code-3")
|
||||
base_url: Optional custom base URL (not used for Voyage AI)
|
||||
api_key: API key for Voyage AI (defaults to VOYAGE_API_KEY environment variable)
|
||||
|
||||
Returns:
|
||||
numpy array of shape (len(texts), embedding_dimension) containing the embeddings
|
||||
"""
|
||||
if not api_key:
|
||||
api_key = os.environ.get("VOYAGE_API_KEY")
|
||||
if not api_key:
|
||||
logger.error("VOYAGE_API_KEY environment variable not set")
|
||||
raise ValueError(
|
||||
"VOYAGE_API_KEY environment variable is required for embeddings"
|
||||
)
|
||||
|
||||
try:
|
||||
# Initialize Voyage AI client
|
||||
voyage_client = voyageai.Client(api_key=api_key)
|
||||
|
||||
# Get embeddings
|
||||
result = voyage_client.embed(
|
||||
texts,
|
||||
model=model,
|
||||
input_type="document", # Assuming document context; could be made configurable
|
||||
)
|
||||
|
||||
# Convert list of embeddings to numpy array
|
||||
embeddings = np.array(result.embeddings, dtype=np.float32)
|
||||
|
||||
logger.debug(f"Generated embeddings for {len(texts)} texts using {model}")
|
||||
verbose_debug(f"Embedding shape: {embeddings.shape}")
|
||||
|
||||
return embeddings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Voyage AI embedding failed: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
# Optional: a helper function to get available embedding models
|
||||
def get_available_embedding_models() -> dict[str, dict]:
|
||||
"""
|
||||
Returns a dictionary of available Voyage AI embedding models and their properties.
|
||||
"""
|
||||
return {
|
||||
"voyage-3-large": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Best general-purpose and multilingual",
|
||||
},
|
||||
"voyage-3": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "General-purpose and multilingual",
|
||||
},
|
||||
"voyage-3-lite": {
|
||||
"context_length": 32000,
|
||||
"dimension": 512,
|
||||
"description": "Optimized for latency and cost",
|
||||
},
|
||||
"voyage-code-3": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Optimized for code",
|
||||
},
|
||||
"voyage-finance-2": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Optimized for finance",
|
||||
},
|
||||
"voyage-law-2": {
|
||||
"context_length": 16000,
|
||||
"dimension": 1024,
|
||||
"description": "Optimized for legal",
|
||||
},
|
||||
"voyage-multimodal-3": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Multimodal text and images",
|
||||
},
|
||||
}
|
||||
|
|
|
|||
176
lightrag/llm/voyageai.py
Normal file
176
lightrag/llm/voyageai.py
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
# Add Voyage AI import
|
||||
if not pm.is_installed("voyageai"):
|
||||
pm.install("voyageai")
|
||||
|
||||
from voyageai.error import (
|
||||
RateLimitError,
|
||||
APIConnectionError,
|
||||
)
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_exponential,
|
||||
retry_if_exception_type,
|
||||
)
|
||||
from lightrag.utils import wrap_embedding_func_with_attrs, logger
|
||||
|
||||
|
||||
# Custome exceptions for VoyageAI errors
|
||||
class VoyageAIError(Exception):
|
||||
"""Generic VoyageAI API error"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=16000)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
retry=retry_if_exception_type((RateLimitError, APIConnectionError)),
|
||||
)
|
||||
async def voyageai_embed(
|
||||
texts: list[str],
|
||||
model: str = "voyage-3",
|
||||
api_key: str | None = None,
|
||||
embedding_dim: int | None = None,
|
||||
input_type: str | None = None,
|
||||
truncation: bool | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Generate embeddings for a list of texts using VoyageAI's API.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed.
|
||||
model: The VoyageAI embedding model to use. Options include:
|
||||
- "voyage-3": General purpose (1024 dims, 32K context)
|
||||
- "voyage-3-lite": Lightweight (512 dims, 32K context)
|
||||
- "voyage-3-large": Highest accuracy (1024 dims, 32K context)
|
||||
- "voyage-code-3": Code optimized (1024 dims, 32K context)
|
||||
- "voyage-law-2": Legal documents (1024 dims, 16K context)
|
||||
- "voyage-finance-2": Finance (1024 dims, 32K context)
|
||||
api_key: Optional VoyageAI API key. If None, uses VOYAGEAI_API_KEY environment variable.
|
||||
input_type: Optional input type hint for the model. Options:
|
||||
- "query": For search queries
|
||||
- "document": For documents to be indexed
|
||||
- None: Let the model decide (default)
|
||||
truncation: Whether to truncate texts that exceed token limit (default: None).
|
||||
|
||||
Returns:
|
||||
A numpy array of embeddings, one per input text.
|
||||
|
||||
Raises:
|
||||
VoyageAIError: If the API call fails or returns invalid data.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
import voyageai
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"voyageai package is required. Install it with: pip install voyageai"
|
||||
)
|
||||
|
||||
# Get API key from parameter or environment
|
||||
logger.debug(
|
||||
"Starting VoyageAI embedding generation. (Ignore api_key, use env variable)"
|
||||
)
|
||||
if not api_key:
|
||||
api_key = os.environ.get("VOYAGEAI_API_KEY")
|
||||
if not api_key:
|
||||
logger.error("VOYAGEAI_API_KEY environment variable not set")
|
||||
raise ValueError(
|
||||
"VOYAGEAI_API_KEY environment variable is required or pass api_key parameter"
|
||||
)
|
||||
|
||||
try:
|
||||
# Create async client
|
||||
client = voyageai.AsyncClient(api_key=api_key)
|
||||
|
||||
logger.debug(f"VoyageAI embedding request: {len(texts)} texts, model: {model}")
|
||||
# Calculate total characters for debugging
|
||||
total_chars = sum(len(t) for t in texts)
|
||||
avg_chars = total_chars / len(texts) if texts else 0
|
||||
logger.debug(
|
||||
f"VoyageAI embedding request: {len(texts)} texts, "
|
||||
f"total_chars={total_chars}, avg_chars={avg_chars:.0f}, model={model}"
|
||||
)
|
||||
|
||||
# Prepare API call parameters
|
||||
embed_params = dict(
|
||||
texts=texts,
|
||||
model=model,
|
||||
# Optional parameters -- if None, voyageai client uses defaults
|
||||
output_dimension=embedding_dim,
|
||||
truncation=truncation,
|
||||
input_type=input_type,
|
||||
)
|
||||
# Make API call with timing
|
||||
result = await client.embed(**embed_params)
|
||||
|
||||
if not result.embeddings:
|
||||
err_msg = "VoyageAI API returned empty embeddings"
|
||||
logger.error(err_msg)
|
||||
raise VoyageAIError(err_msg)
|
||||
|
||||
if len(result.embeddings) != len(texts):
|
||||
err_msg = f"VoyageAI API returned {len(result.embeddings)} embeddings for {len(texts)} texts"
|
||||
logger.error(err_msg)
|
||||
raise VoyageAIError(err_msg)
|
||||
|
||||
# Convert to numpy array with timing
|
||||
embeddings = np.array(result.embeddings, dtype=np.float32)
|
||||
logger.debug(f"VoyageAI embeddings generated: shape {embeddings.shape}")
|
||||
|
||||
return embeddings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"VoyageAI embedding error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
# Optional: a helper function to get available embedding models
|
||||
def get_available_embedding_models() -> dict[str, dict]:
|
||||
"""
|
||||
Returns a dictionary of available Voyage AI embedding models and their properties.
|
||||
"""
|
||||
return {
|
||||
"voyage-3-large": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Best general-purpose and multilingual",
|
||||
},
|
||||
"voyage-3": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "General-purpose and multilingual",
|
||||
},
|
||||
"voyage-3-lite": {
|
||||
"context_length": 32000,
|
||||
"dimension": 512,
|
||||
"description": "Optimized for latency and cost",
|
||||
},
|
||||
"voyage-code-3": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Optimized for code",
|
||||
},
|
||||
"voyage-finance-2": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Optimized for finance",
|
||||
},
|
||||
"voyage-law-2": {
|
||||
"context_length": 16000,
|
||||
"dimension": 1024,
|
||||
"description": "Optimized for legal",
|
||||
},
|
||||
"voyage-multimodal-3": {
|
||||
"context_length": 32000,
|
||||
"dimension": 1024,
|
||||
"description": "Multimodal text and images",
|
||||
},
|
||||
}
|
||||
6
uv.lock
generated
6
uv.lock
generated
|
|
@ -1,5 +1,5 @@
|
|||
version = 1
|
||||
revision = 3
|
||||
revision = 2
|
||||
requires-python = ">=3.10"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14' and python_full_version < '4' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
|
||||
|
|
@ -2735,7 +2735,6 @@ requires-dist = [
|
|||
{ name = "json-repair", marker = "extra == 'api'" },
|
||||
{ name = "langfuse", marker = "extra == 'observability'", specifier = ">=3.8.1" },
|
||||
{ name = "lightrag-hku", extras = ["api", "offline-llm", "offline-storage"], marker = "extra == 'offline'" },
|
||||
{ name = "lightrag-hku", extras = ["pytest"], marker = "extra == 'evaluation'" },
|
||||
{ name = "llama-index", marker = "extra == 'offline-llm'", specifier = ">=0.9.0,<1.0.0" },
|
||||
{ name = "nano-vectordb" },
|
||||
{ name = "nano-vectordb", marker = "extra == 'api'" },
|
||||
|
|
@ -2753,6 +2752,7 @@ requires-dist = [
|
|||
{ name = "passlib", extras = ["bcrypt"], marker = "extra == 'api'" },
|
||||
{ name = "pipmaster" },
|
||||
{ name = "pipmaster", marker = "extra == 'api'" },
|
||||
{ name = "pre-commit", marker = "extra == 'evaluation'" },
|
||||
{ name = "pre-commit", marker = "extra == 'pytest'" },
|
||||
{ name = "psutil", marker = "extra == 'api'" },
|
||||
{ name = "pycryptodome", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" },
|
||||
|
|
@ -2764,7 +2764,9 @@ requires-dist = [
|
|||
{ name = "pypdf", marker = "extra == 'api'", specifier = ">=6.1.0" },
|
||||
{ name = "pypinyin" },
|
||||
{ name = "pypinyin", marker = "extra == 'api'" },
|
||||
{ name = "pytest", marker = "extra == 'evaluation'", specifier = ">=8.4.2" },
|
||||
{ name = "pytest", marker = "extra == 'pytest'", specifier = ">=8.4.2" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'evaluation'", specifier = ">=1.2.0" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'pytest'", specifier = ">=1.2.0" },
|
||||
{ name = "python-docx", marker = "extra == 'api'", specifier = ">=0.8.11,<2.0.0" },
|
||||
{ name = "python-dotenv" },
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue