cherry-pick 3d9de5ed

2025-12-04 19:14:30 +08:00 · 2025-12-04 19:14:30 +08:00 · 3aa214b005
commit 3aa214b005
parent 96f23d59af
3 changed files with 181 additions and 3 deletions
--- a/lightrag/llm/gemini.py
+++ b/lightrag/llm/gemini.py
@ -16,22 +16,44 @@ from collections.abc import AsyncIterator
 from functools import lru_cache
 from typing import Any
-from lightrag.utils import logger, remove_think_tags, safe_unicode_decode
+import numpy as np
 from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type,
 )
 from lightrag.utils import (
    logger,
    remove_think_tags,
    safe_unicode_decode,
    wrap_embedding_func_with_attrs,
 )
 import pipmaster as pm
-# Install the Google Gemini client on demand
+# Install the Google Gemini client and its dependencies on demand
 if not pm.is_installed("google-genai"):
    pm.install("google-genai")
 if not pm.is_installed("google-api-core"):
    pm.install("google-api-core")
 from google import genai  # type: ignore
 from google.genai import types  # type: ignore
 from google.api_core import exceptions as google_api_exceptions  # type: ignore
 DEFAULT_GEMINI_ENDPOINT = "https://generativelanguage.googleapis.com"
 LOG = logging.getLogger(__name__)
 class InvalidResponseError(Exception):
    """Custom exception class for triggering retry mechanism when Gemini returns empty responses"""
    pass
@lru_cache(maxsize=8)
 def _get_gemini_client(
    api_key: str, base_url: str | None, timeout: int | None = None
@ -163,6 +185,21 @@ def _extract_response_text(
    return ("\n".join(regular_parts), "\n".join(thought_parts))
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    retry=(
        retry_if_exception_type(google_api_exceptions.InternalServerError)
        | retry_if_exception_type(google_api_exceptions.ServiceUnavailable)
        | retry_if_exception_type(google_api_exceptions.ResourceExhausted)
        | retry_if_exception_type(google_api_exceptions.GatewayTimeout)
        | retry_if_exception_type(google_api_exceptions.BadGateway)
        | retry_if_exception_type(google_api_exceptions.DeadlineExceeded)
        | retry_if_exception_type(google_api_exceptions.Aborted)
        | retry_if_exception_type(google_api_exceptions.Unknown)
        | retry_if_exception_type(InvalidResponseError)
    ),
 )
 async def gemini_complete_if_cache(
    model: str,
    prompt: str,
@ -369,7 +406,7 @@ async def gemini_complete_if_cache(
        final_text = regular_text or ""
    if not final_text:
-        raise RuntimeError("Gemini response did not contain any text content.")
+        raise InvalidResponseError("Gemini response did not contain any text content.")
    if "\\u" in final_text:
        final_text = safe_unicode_decode(final_text.encode("utf-8"))
@ -416,7 +453,143 @@ async def gemini_model_complete(
    )
@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    retry=(
        retry_if_exception_type(google_api_exceptions.InternalServerError)
        | retry_if_exception_type(google_api_exceptions.ServiceUnavailable)
        | retry_if_exception_type(google_api_exceptions.ResourceExhausted)
        | retry_if_exception_type(google_api_exceptions.GatewayTimeout)
        | retry_if_exception_type(google_api_exceptions.BadGateway)
        | retry_if_exception_type(google_api_exceptions.DeadlineExceeded)
        | retry_if_exception_type(google_api_exceptions.Aborted)
        | retry_if_exception_type(google_api_exceptions.Unknown)
    ),
 )
 async def gemini_embed(
    texts: list[str],
    model: str = "gemini-embedding-001",
    base_url: str | None = None,
    api_key: str | None = None,
    embedding_dim: int | None = None,
    task_type: str = "RETRIEVAL_DOCUMENT",
    timeout: int | None = None,
    token_tracker: Any | None = None,
 ) -> np.ndarray:
    """Generate embeddings for a list of texts using Gemini's API.
    This function uses Google's Gemini embedding model to generate text embeddings.
    It supports dynamic dimension control and automatic normalization for dimensions
    less than 3072.
    Args:
        texts: List of texts to embed.
        model: The Gemini embedding model to use. Default is "gemini-embedding-001".
        base_url: Optional custom API endpoint.
        api_key: Optional Gemini API key. If None, uses environment variables.
        embedding_dim: Optional embedding dimension for dynamic dimension reduction.
            **IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper.
            Do NOT manually pass this parameter when calling the function directly.
            The dimension is controlled by the @wrap_embedding_func_with_attrs decorator
            or the EMBEDDING_DIM environment variable.
            Supported range: 128-3072. Recommended values: 768, 1536, 3072.
        task_type: Task type for embedding optimization. Default is "RETRIEVAL_DOCUMENT".
            Supported types: SEMANTIC_SIMILARITY, CLASSIFICATION, CLUSTERING,
            RETRIEVAL_DOCUMENT, RETRIEVAL_QUERY, CODE_RETRIEVAL_QUERY,
            QUESTION_ANSWERING, FACT_VERIFICATION.
        timeout: Request timeout in seconds (will be converted to milliseconds for Gemini API).
        token_tracker: Optional token usage tracker for monitoring API usage.
    Returns:
        A numpy array of embeddings, one per input text. For dimensions < 3072,
        the embeddings are L2-normalized to ensure optimal semantic similarity performance.
    Raises:
        ValueError: If API key is not provided or configured.
        RuntimeError: If the response from Gemini is invalid or empty.
    Note:
        - For dimension 3072: Embeddings are already normalized by the API
        - For dimensions < 3072: Embeddings are L2-normalized after retrieval
        - Normalization ensures accurate semantic similarity via cosine distance
    """
    loop = asyncio.get_running_loop()
    key = _ensure_api_key(api_key)
    # Convert timeout from seconds to milliseconds for Gemini API
    timeout_ms = timeout * 1000 if timeout else None
    client = _get_gemini_client(key, base_url, timeout_ms)
    # Prepare embedding configuration
    config_kwargs: dict[str, Any] = {}
    # Add task_type to config
    if task_type:
        config_kwargs["task_type"] = task_type
    # Add output_dimensionality if embedding_dim is provided
    if embedding_dim is not None:
        config_kwargs["output_dimensionality"] = embedding_dim
    # Create config object if we have parameters
    config_obj = types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
    def _call_embed() -> Any:
        """Call Gemini embedding API in executor thread."""
        request_kwargs: dict[str, Any] = {
            "model": model,
            "contents": texts,
        }
        if config_obj is not None:
            request_kwargs["config"] = config_obj
        return client.models.embed_content(**request_kwargs)
    # Execute API call in thread pool
    response = await loop.run_in_executor(None, _call_embed)
    # Extract embeddings from response
    if not hasattr(response, "embeddings") or not response.embeddings:
        raise RuntimeError("Gemini response did not contain embeddings.")
    # Convert embeddings to numpy array
    embeddings = np.array(
        [np.array(e.values, dtype=np.float32) for e in response.embeddings]
    )
    # Apply L2 normalization for dimensions < 3072
    # The 3072 dimension embedding is already normalized by Gemini API
    if embedding_dim and embedding_dim < 3072:
        # Normalize each embedding vector to unit length
        norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
        # Avoid division by zero
        norms = np.where(norms == 0, 1, norms)
        embeddings = embeddings / norms
        logger.debug(
            f"Applied L2 normalization to {len(embeddings)} embeddings of dimension {embedding_dim}"
        )
    # Track token usage if tracker is provided
    # Note: Gemini embedding API may not provide usage metadata
    if token_tracker and hasattr(response, "usage_metadata"):
        usage = response.usage_metadata
        token_counts = {
            "prompt_tokens": getattr(usage, "prompt_token_count", 0),
            "total_tokens": getattr(usage, "total_token_count", 0),
        }
        token_tracker.add_usage(token_counts)
    logger.debug(
        f"Generated {len(embeddings)} Gemini embeddings with dimension {embeddings.shape[1]}"
    )
    return embeddings
 __all__ = [
    "gemini_complete_if_cache",
    "gemini_model_complete",
    "gemini_embed",
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -24,6 +24,7 @@ dependencies = [
    "aiohttp",
    "configparser",
    "future",
    "google-api-core>=2.0.0,<3.0.0",
    "google-genai>=1.0.0,<2.0.0",
    "json_repair",
    "nano-vectordb",
@ -60,6 +61,7 @@ api = [
    "tenacity",
    "tiktoken",
    "xlsxwriter>=3.1.0",
    "google-api-core>=2.0.0,<3.0.0",
    "google-genai>=1.0.0,<2.0.0",
    # API-specific dependencies
    "aiofiles",
@ -107,6 +109,7 @@ offline-llm = [
    "aioboto3>=12.0.0,<16.0.0",
    "voyageai>=0.2.0,<1.0.0",
    "llama-index>=0.9.0,<1.0.0",
    "google-api-core>=2.0.0,<3.0.0",
    "google-genai>=1.0.0,<2.0.0",
 ]
--- a/requirements-offline-llm.txt
+++ b/requirements-offline-llm.txt
@ -10,6 +10,8 @@
 # LLM provider dependencies (with version constraints matching pyproject.toml)
 aioboto3>=12.0.0,<16.0.0
 anthropic>=0.18.0,<1.0.0
 google-api-core>=2.0.0,<3.0.0
 google-genai>=1.0.0,<2.0.0
 llama-index>=0.9.0,<1.0.0
 ollama>=0.1.0,<1.0.0
 openai>=1.0.0,<3.0.0