Merge 2752b01f12 into 9562a974d2

2025-12-12 06:38:16 -08:00 · 2025-12-12 06:38:16 -08:00 · 9fd0bf4de5
commit 9fd0bf4de5
parent 9562a974d2 2752b01f12
4 changed files with 195 additions and 110 deletions
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@ -247,6 +247,7 @@ def parse_args() -> argparse.Namespace:
            "aws_bedrock",
            "jina",
            "gemini",
            "voyageai",
        ],
        help="Embedding binding type (default: from env or ollama)",
    )
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -319,8 +319,9 @@ def create_app(args):
        "aws_bedrock",
        "jina",
        "gemini",
        "voyageai",
    ]:
-        raise Exception("embedding binding not supported")
+        raise Exception(f"embedding binding '{args.embedding_binding}' not supported")
    # Set default hosts if not provided
    if args.llm_binding_host is None:
@ -701,7 +702,10 @@ def create_app(args):
                from lightrag.llm.lollms import lollms_embed
                provider_func = lollms_embed
            elif binding == "voyageai":
                from lightrag.llm.voyageai import voyageai_embed
                provider_func = voyageai_embed
            # Extract attributes if provider is an EmbeddingFunc
            if provider_func and isinstance(provider_func, EmbeddingFunc):
                provider_max_token_size = provider_func.max_token_size
@ -827,7 +831,6 @@ def create_app(args):
                        from lightrag.llm.binding_options import GeminiEmbeddingOptions
                        gemini_options = GeminiEmbeddingOptions.options_dict(args)
                    # Pass model only if provided, let function use its default (gemini-embedding-001)
                    kwargs = {
                        "texts": texts,
@ -841,6 +844,19 @@ def create_app(args):
                    if model:
                        kwargs["model"] = model
                    return await actual_func(**kwargs)
                elif binding == "voyageai":
                    from lightrag.llm.voyageai import voyageai_embed
                    actual_func = (
                        voyageai_embed.func
                        if isinstance(voyageai_embed, EmbeddingFunc)
                        else voyageai_embed
                    )
                    return await actual_func(
                        texts,
                        api_key=api_key,
                        embedding_dim=embedding_dim,
                    )
                else:  # openai and compatible
                    from lightrag.llm.openai import openai_embed
--- a/lightrag/llm/anthropic.py
+++ b/lightrag/llm/anthropic.py
@ -2,7 +2,6 @@ from ..utils import verbose_debug, VERBOSE_DEBUG
 import sys
 import os
 import logging
 import numpy as np
 from typing import Any, Union, AsyncIterator
 import pipmaster as pm  # Pipmaster for dynamic library install
@ -15,11 +14,6 @@ else:
 if not pm.is_installed("anthropic"):
    pm.install("anthropic")
 # Add Voyage AI import
 if not pm.is_installed("voyageai"):
    pm.install("voyageai")
 import voyageai
 from anthropic import (
    AsyncAnthropic,
    APIConnectionError,
@ -229,105 +223,3 @@ async def claude_3_haiku_complete(
        enable_cot=enable_cot,
        **kwargs,
    )
 # Embedding function (placeholder, as Anthropic does not provide embeddings)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    retry=retry_if_exception_type(
        (RateLimitError, APIConnectionError, APITimeoutError)
    ),
 )
 async def anthropic_embed(
    texts: list[str],
    model: str = "voyage-3",  # Default to voyage-3 as a good general-purpose model
    base_url: str = None,
    api_key: str = None,
 ) -> np.ndarray:
    """
    Generate embeddings using Voyage AI since Anthropic doesn't provide native embedding support.
    Args:
        texts: List of text strings to embed
        model: Voyage AI model name (e.g., "voyage-3", "voyage-3-large", "voyage-code-3")
        base_url: Optional custom base URL (not used for Voyage AI)
        api_key: API key for Voyage AI (defaults to VOYAGE_API_KEY environment variable)
    Returns:
        numpy array of shape (len(texts), embedding_dimension) containing the embeddings
    """
    if not api_key:
        api_key = os.environ.get("VOYAGE_API_KEY")
        if not api_key:
            logger.error("VOYAGE_API_KEY environment variable not set")
            raise ValueError(
                "VOYAGE_API_KEY environment variable is required for embeddings"
            )
    try:
        # Initialize Voyage AI client
        voyage_client = voyageai.Client(api_key=api_key)
        # Get embeddings
        result = voyage_client.embed(
            texts,
            model=model,
            input_type="document",  # Assuming document context; could be made configurable
        )
        # Convert list of embeddings to numpy array
        embeddings = np.array(result.embeddings, dtype=np.float32)
        logger.debug(f"Generated embeddings for {len(texts)} texts using {model}")
        verbose_debug(f"Embedding shape: {embeddings.shape}")
        return embeddings
    except Exception as e:
        logger.error(f"Voyage AI embedding failed: {str(e)}")
        raise
 # Optional: a helper function to get available embedding models
 def get_available_embedding_models() -> dict[str, dict]:
    """
    Returns a dictionary of available Voyage AI embedding models and their properties.
    """
    return {
        "voyage-3-large": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Best general-purpose and multilingual",
        },
        "voyage-3": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "General-purpose and multilingual",
        },
        "voyage-3-lite": {
            "context_length": 32000,
            "dimension": 512,
            "description": "Optimized for latency and cost",
        },
        "voyage-code-3": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Optimized for code",
        },
        "voyage-finance-2": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Optimized for finance",
        },
        "voyage-law-2": {
            "context_length": 16000,
            "dimension": 1024,
            "description": "Optimized for legal",
        },
        "voyage-multimodal-3": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Multimodal text and images",
        },
    }
--- a/lightrag/llm/voyageai.py
+++ b/lightrag/llm/voyageai.py
@ -0,0 +1,176 @@
 import os
 import numpy as np
 import pipmaster as pm  # Pipmaster for dynamic library install
 # Add Voyage AI import
 if not pm.is_installed("voyageai"):
    pm.install("voyageai")
 from voyageai.error import (
    RateLimitError,
    APIConnectionError,
 )
 from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type,
 )
 from lightrag.utils import wrap_embedding_func_with_attrs, logger
 # Custome exceptions for VoyageAI errors
 class VoyageAIError(Exception):
    """Generic VoyageAI API error"""
    pass
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=16000)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    retry=retry_if_exception_type((RateLimitError, APIConnectionError)),
 )
 async def voyageai_embed(
    texts: list[str],
    model: str = "voyage-3",
    api_key: str | None = None,
    embedding_dim: int | None = None,
    input_type: str | None = None,
    truncation: bool | None = None,
 ) -> np.ndarray:
    """Generate embeddings for a list of texts using VoyageAI's API.
    Args:
        texts: List of texts to embed.
        model: The VoyageAI embedding model to use. Options include:
            - "voyage-3": General purpose (1024 dims, 32K context)
            - "voyage-3-lite": Lightweight (512 dims, 32K context)
            - "voyage-3-large": Highest accuracy (1024 dims, 32K context)
            - "voyage-code-3": Code optimized (1024 dims, 32K context)
            - "voyage-law-2": Legal documents (1024 dims, 16K context)
            - "voyage-finance-2": Finance (1024 dims, 32K context)
        api_key: Optional VoyageAI API key. If None, uses VOYAGEAI_API_KEY environment variable.
        input_type: Optional input type hint for the model. Options:
            - "query": For search queries
            - "document": For documents to be indexed
            - None: Let the model decide (default)
        truncation: Whether to truncate texts that exceed token limit (default: None).
    Returns:
        A numpy array of embeddings, one per input text.
    Raises:
        VoyageAIError: If the API call fails or returns invalid data.
    """
    try:
        import voyageai
    except ImportError:
        raise ImportError(
            "voyageai package is required. Install it with: pip install voyageai"
        )
    # Get API key from parameter or environment
    logger.debug(
        "Starting VoyageAI embedding generation. (Ignore api_key, use env variable)"
    )
    if not api_key:
        api_key = os.environ.get("VOYAGEAI_API_KEY")
        if not api_key:
            logger.error("VOYAGEAI_API_KEY environment variable not set")
            raise ValueError(
                "VOYAGEAI_API_KEY environment variable is required or pass api_key parameter"
            )
    try:
        # Create async client
        client = voyageai.AsyncClient(api_key=api_key)
        logger.debug(f"VoyageAI embedding request: {len(texts)} texts, model: {model}")
        # Calculate total characters for debugging
        total_chars = sum(len(t) for t in texts)
        avg_chars = total_chars / len(texts) if texts else 0
        logger.debug(
            f"VoyageAI embedding request: {len(texts)} texts, "
            f"total_chars={total_chars}, avg_chars={avg_chars:.0f}, model={model}"
        )
        # Prepare API call parameters
        embed_params = dict(
            texts=texts,
            model=model,
            # Optional parameters -- if None, voyageai client uses defaults
            output_dimension=embedding_dim,
            truncation=truncation,
            input_type=input_type,
        )
        # Make API call with timing
        result = await client.embed(**embed_params)
        if not result.embeddings:
            err_msg = "VoyageAI API returned empty embeddings"
            logger.error(err_msg)
            raise VoyageAIError(err_msg)
        if len(result.embeddings) != len(texts):
            err_msg = f"VoyageAI API returned {len(result.embeddings)} embeddings for {len(texts)} texts"
            logger.error(err_msg)
            raise VoyageAIError(err_msg)
        # Convert to numpy array with timing
        embeddings = np.array(result.embeddings, dtype=np.float32)
        logger.debug(f"VoyageAI embeddings generated: shape {embeddings.shape}")
        return embeddings
    except Exception as e:
        logger.error(f"VoyageAI embedding error: {e}")
        raise
 # Optional: a helper function to get available embedding models
 def get_available_embedding_models() -> dict[str, dict]:
    """
    Returns a dictionary of available Voyage AI embedding models and their properties.
    """
    return {
        "voyage-3-large": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Best general-purpose and multilingual",
        },
        "voyage-3": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "General-purpose and multilingual",
        },
        "voyage-3-lite": {
            "context_length": 32000,
            "dimension": 512,
            "description": "Optimized for latency and cost",
        },
        "voyage-code-3": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Optimized for code",
        },
        "voyage-finance-2": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Optimized for finance",
        },
        "voyage-law-2": {
            "context_length": 16000,
            "dimension": 1024,
            "description": "Optimized for legal",
        },
        "voyage-multimodal-3": {
            "context_length": 32000,
            "dimension": 1024,
            "description": "Multimodal text and images",
        },
    }