chore/support-voyageai-embed-directly: feat: voyageai embed support

2025-12-04 16:18:44 -08:00 · 2025-12-04 16:18:44 -08:00 · 45700cdf76
commit 45700cdf76
parent 46ce6d9a13
4 changed files with 199 additions and 111 deletions
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -316,8 +316,9 @@ def create_app(args):
        "aws_bedrock",
        "jina",
        "gemini",
+        "voyageai",
    ]:
-        raise Exception("embedding binding not supported")
+        raise Exception(f"embedding binding '{args.embedding_binding}' not supported")

    # Set default hosts if not provided
    if args.llm_binding_host is None:
@ -687,7 +688,10 @@ def create_app(args):
                from lightrag.llm.lollms import lollms_embed

                provider_func = lollms_embed
+            elif binding == "voyageai":
+                from lightrag.llm.voyageai import voyageai_embed

+                provider_func = voyageai_embed
            # Extract attributes if provider is an EmbeddingFunc
            if provider_func and isinstance(provider_func, EmbeddingFunc):
                provider_max_token_size = provider_func.max_token_size
@ -806,6 +810,20 @@ def create_app(args):
                        embedding_dim=embedding_dim,
                        task_type=gemini_options.get("task_type", "RETRIEVAL_DOCUMENT"),
                    )
+                elif binding == "voyageai":
+                    from lightrag.llm.voyageai import voyageai_embed
+
+                    actual_func = (
+                        voyageai_embed.func
+                        if isinstance(voyageai_embed, EmbeddingFunc)
+                        else voyageai_embed
+                    )
+                    return await actual_func(
+                        texts,
+                        model=model,
+                        api_key=api_key,
+                        embedding_dim=embedding_dim,
+                    )
                else:  # openai and compatible
                    from lightrag.llm.openai import openai_embed

@ -817,7 +835,6 @@ def create_app(args):
                    return await actual_func(
                        texts,
                        model=model,
-                        base_url=host,
                        api_key=api_key,
                        embedding_dim=embedding_dim,
                    )
--- a/lightrag/llm/anthropic.py
+++ b/lightrag/llm/anthropic.py
@ -2,7 +2,6 @@ from ..utils import verbose_debug, VERBOSE_DEBUG
 import sys
 import os
 import logging
-import numpy as np
 from typing import Any, Union, AsyncIterator
 import pipmaster as pm  # Pipmaster for dynamic library install

@ -15,11 +14,6 @@ else:
 if not pm.is_installed("anthropic"):
    pm.install("anthropic")

-# Add Voyage AI import
-if not pm.is_installed("voyageai"):
-    pm.install("voyageai")
-import voyageai
-
 from anthropic import (
    AsyncAnthropic,
    APIConnectionError,
@ -230,104 +224,3 @@ async def claude_3_haiku_complete(
        **kwargs,
    )

-
-# Embedding function (placeholder, as Anthropic does not provide embeddings)
-@retry(
-    stop=stop_after_attempt(3),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type(
-        (RateLimitError, APIConnectionError, APITimeoutError)
-    ),
-)
-async def anthropic_embed(
-    texts: list[str],
-    model: str = "voyage-3",  # Default to voyage-3 as a good general-purpose model
-    base_url: str = None,
-    api_key: str = None,
-) -> np.ndarray:
-    """
-    Generate embeddings using Voyage AI since Anthropic doesn't provide native embedding support.
-
-    Args:
-        texts: List of text strings to embed
-        model: Voyage AI model name (e.g., "voyage-3", "voyage-3-large", "voyage-code-3")
-        base_url: Optional custom base URL (not used for Voyage AI)
-        api_key: API key for Voyage AI (defaults to VOYAGE_API_KEY environment variable)
-
-    Returns:
-        numpy array of shape (len(texts), embedding_dimension) containing the embeddings
-    """
-    if not api_key:
-        api_key = os.environ.get("VOYAGE_API_KEY")
-        if not api_key:
-            logger.error("VOYAGE_API_KEY environment variable not set")
-            raise ValueError(
-                "VOYAGE_API_KEY environment variable is required for embeddings"
-            )
-
-    try:
-        # Initialize Voyage AI client
-        voyage_client = voyageai.Client(api_key=api_key)
-
-        # Get embeddings
-        result = voyage_client.embed(
-            texts,
-            model=model,
-            input_type="document",  # Assuming document context; could be made configurable
-        )
-
-        # Convert list of embeddings to numpy array
-        embeddings = np.array(result.embeddings, dtype=np.float32)
-
-        logger.debug(f"Generated embeddings for {len(texts)} texts using {model}")
-        verbose_debug(f"Embedding shape: {embeddings.shape}")
-
-        return embeddings
-
-    except Exception as e:
-        logger.error(f"Voyage AI embedding failed: {str(e)}")
-        raise
-
-
-# Optional: a helper function to get available embedding models
-def get_available_embedding_models() -> dict[str, dict]:
-    """
-    Returns a dictionary of available Voyage AI embedding models and their properties.
-    """
-    return {
-        "voyage-3-large": {
-            "context_length": 32000,
-            "dimension": 1024,
-            "description": "Best general-purpose and multilingual",
-        },
-        "voyage-3": {
-            "context_length": 32000,
-            "dimension": 1024,
-            "description": "General-purpose and multilingual",
-        },
-        "voyage-3-lite": {
-            "context_length": 32000,
-            "dimension": 512,
-            "description": "Optimized for latency and cost",
-        },
-        "voyage-code-3": {
-            "context_length": 32000,
-            "dimension": 1024,
-            "description": "Optimized for code",
-        },
-        "voyage-finance-2": {
-            "context_length": 32000,
-            "dimension": 1024,
-            "description": "Optimized for finance",
-        },
-        "voyage-law-2": {
-            "context_length": 16000,
-            "dimension": 1024,
-            "description": "Optimized for legal",
-        },
-        "voyage-multimodal-3": {
-            "context_length": 32000,
-            "dimension": 1024,
-            "description": "Multimodal text and images",
-        },
-    }
--- a/lightrag/llm/voyageai.py
+++ b/lightrag/llm/voyageai.py
@ -0,0 +1,176 @@
+import os
+import numpy as np
+import pipmaster as pm  # Pipmaster for dynamic library install
+
+# Add Voyage AI import
+if not pm.is_installed("voyageai"):
+    pm.install("voyageai")
+
+from voyageai.error import (
+    RateLimitError,
+    APIConnectionError,
+)
+
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+from lightrag.utils import wrap_embedding_func_with_attrs, logger
+
+
+# Custome exceptions for VoyageAI errors
+class VoyageAIError(Exception):
+    """Generic VoyageAI API error"""
+
+    pass
+
+
+@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=16000)
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=60),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError)),
+)
+async def voyageai_embed(
+    texts: list[str],
+    model: str = "voyage-3",
+    api_key: str | None = None,
+    embedding_dim: int | None = None,
+    input_type: str | None = None,
+    truncation: bool | None = None,
+) -> np.ndarray:
+    """Generate embeddings for a list of texts using VoyageAI's API.
+
+    Args:
+        texts: List of texts to embed.
+        model: The VoyageAI embedding model to use. Options include:
+            - "voyage-3": General purpose (1024 dims, 32K context)
+            - "voyage-3-lite": Lightweight (512 dims, 32K context)
+            - "voyage-3-large": Highest accuracy (1024 dims, 32K context)
+            - "voyage-code-3": Code optimized (1024 dims, 32K context)
+            - "voyage-law-2": Legal documents (1024 dims, 16K context)
+            - "voyage-finance-2": Finance (1024 dims, 32K context)
+        api_key: Optional VoyageAI API key. If None, uses VOYAGEAI_API_KEY environment variable.
+        input_type: Optional input type hint for the model. Options:
+            - "query": For search queries
+            - "document": For documents to be indexed
+            - None: Let the model decide (default)
+        truncation: Whether to truncate texts that exceed token limit (default: None).
+
+    Returns:
+        A numpy array of embeddings, one per input text.
+
+    Raises:
+        VoyageAIError: If the API call fails or returns invalid data.
+
+    """
+
+    try:
+        import voyageai
+    except ImportError:
+        raise ImportError(
+            "voyageai package is required. Install it with: pip install voyageai"
+        )
+
+    # Get API key from parameter or environment
+    logger.debug(
+        "Starting VoyageAI embedding generation. (Ignore api_key, use env variable)"
+    )
+    if not api_key:
+        api_key = os.environ.get("VOYAGEAI_API_KEY")
+        if not api_key:
+            logger.error("VOYAGEAI_API_KEY environment variable not set")
+            raise ValueError(
+                "VOYAGEAI_API_KEY environment variable is required or pass api_key parameter"
+            )
+
+    try:
+        # Create async client
+        client = voyageai.AsyncClient(api_key=api_key)
+
+        logger.debug(f"VoyageAI embedding request: {len(texts)} texts, model: {model}")
+        # Calculate total characters for debugging
+        total_chars = sum(len(t) for t in texts)
+        avg_chars = total_chars / len(texts) if texts else 0
+        logger.debug(
+            f"VoyageAI embedding request: {len(texts)} texts, "
+            f"total_chars={total_chars}, avg_chars={avg_chars:.0f}, model={model}"
+        )
+
+        # Prepare API call parameters
+        embed_params = dict(
+            texts=texts,
+            model=model,
+            # Optional parameters -- if None, voyageai client uses defaults
+            output_dimension=embedding_dim,
+            truncation=truncation,
+            input_type=input_type,
+        )
+        # Make API call with timing
+        result = await client.embed(**embed_params)
+
+        if not result.embeddings:
+            err_msg = "VoyageAI API returned empty embeddings"
+            logger.error(err_msg)
+            raise VoyageAIError(err_msg)
+
+        if len(result.embeddings) != len(texts):
+            err_msg = f"VoyageAI API returned {len(result.embeddings)} embeddings for {len(texts)} texts"
+            logger.error(err_msg)
+            raise VoyageAIError(err_msg)
+
+        # Convert to numpy array with timing
+        embeddings = np.array(result.embeddings, dtype=np.float32)
+        logger.debug(f"VoyageAI embeddings generated: shape {embeddings.shape}")
+
+        return embeddings
+
+    except Exception as e:
+        logger.error(f"VoyageAI embedding error: {e}")
+        raise
+
+
+# Optional: a helper function to get available embedding models
+def get_available_embedding_models() -> dict[str, dict]:
+    """
+    Returns a dictionary of available Voyage AI embedding models and their properties.
+    """
+    return {
+        "voyage-3-large": {
+            "context_length": 32000,
+            "dimension": 1024,
+            "description": "Best general-purpose and multilingual",
+        },
+        "voyage-3": {
+            "context_length": 32000,
+            "dimension": 1024,
+            "description": "General-purpose and multilingual",
+        },
+        "voyage-3-lite": {
+            "context_length": 32000,
+            "dimension": 512,
+            "description": "Optimized for latency and cost",
+        },
+        "voyage-code-3": {
+            "context_length": 32000,
+            "dimension": 1024,
+            "description": "Optimized for code",
+        },
+        "voyage-finance-2": {
+            "context_length": 32000,
+            "dimension": 1024,
+            "description": "Optimized for finance",
+        },
+        "voyage-law-2": {
+            "context_length": 16000,
+            "dimension": 1024,
+            "description": "Optimized for legal",
+        },
+        "voyage-multimodal-3": {
+            "context_length": 32000,
+            "dimension": 1024,
+            "description": "Multimodal text and images",
+        },
+    }
--- a/uv.lock
+++ b/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
    "python_full_version >= '3.14' and python_full_version < '4' and platform_machine == 'x86_64' and sys_platform == 'darwin'",
@ -2735,7 +2735,6 @@ requires-dist = [
    { name = "json-repair", marker = "extra == 'api'" },
    { name = "langfuse", marker = "extra == 'observability'", specifier = ">=3.8.1" },
    { name = "lightrag-hku", extras = ["api", "offline-llm", "offline-storage"], marker = "extra == 'offline'" },
-    { name = "lightrag-hku", extras = ["pytest"], marker = "extra == 'evaluation'" },
    { name = "llama-index", marker = "extra == 'offline-llm'", specifier = ">=0.9.0,<1.0.0" },
    { name = "nano-vectordb" },
    { name = "nano-vectordb", marker = "extra == 'api'" },
@ -2753,6 +2752,7 @@ requires-dist = [
    { name = "passlib", extras = ["bcrypt"], marker = "extra == 'api'" },
    { name = "pipmaster" },
    { name = "pipmaster", marker = "extra == 'api'" },
+    { name = "pre-commit", marker = "extra == 'evaluation'" },
    { name = "pre-commit", marker = "extra == 'pytest'" },
    { name = "psutil", marker = "extra == 'api'" },
    { name = "pycryptodome", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" },
@ -2764,7 +2764,9 @@ requires-dist = [
    { name = "pypdf", marker = "extra == 'api'", specifier = ">=6.1.0" },
    { name = "pypinyin" },
    { name = "pypinyin", marker = "extra == 'api'" },
+    { name = "pytest", marker = "extra == 'evaluation'", specifier = ">=8.4.2" },
    { name = "pytest", marker = "extra == 'pytest'", specifier = ">=8.4.2" },
+    { name = "pytest-asyncio", marker = "extra == 'evaluation'", specifier = ">=1.2.0" },
    { name = "pytest-asyncio", marker = "extra == 'pytest'", specifier = ">=1.2.0" },
    { name = "python-docx", marker = "extra == 'api'", specifier = ">=0.8.11,<2.0.0" },
    { name = "python-dotenv" },