diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index 70de5995..f3c89228 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -1,4 +1,6 @@ import os +from typing import Final + import pipmaster as pm # Pipmaster for dynamic library install # install specific modules @@ -19,6 +21,9 @@ from tenacity import ( from lightrag.utils import wrap_embedding_func_with_attrs, logger +DEFAULT_JINA_EMBED_DIM: Final[int] = 2048 + + async def fetch_data(url, headers, data): async with aiohttp.ClientSession() as session: async with session.post(url, headers=headers, json=data) as response: @@ -58,7 +63,7 @@ async def fetch_data(url, headers, data): return data_list -@wrap_embedding_func_with_attrs(embedding_dim=2048) +@wrap_embedding_func_with_attrs(embedding_dim=DEFAULT_JINA_EMBED_DIM) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), @@ -69,7 +74,7 @@ async def fetch_data(url, headers, data): ) async def jina_embed( texts: list[str], - embedding_dim: int = 2048, + embedding_dim: int | None = DEFAULT_JINA_EMBED_DIM, late_chunking: bool = False, base_url: str = None, api_key: str = None, @@ -95,6 +100,10 @@ async def jina_embed( aiohttp.ClientError: If there is a connection error with the Jina API. aiohttp.ClientResponseError: If the Jina API returns an error response. """ + resolved_embedding_dim = ( + embedding_dim if embedding_dim is not None else DEFAULT_JINA_EMBED_DIM + ) + if api_key: os.environ["JINA_API_KEY"] = api_key @@ -109,7 +118,7 @@ async def jina_embed( data = { "model": "jina-embeddings-v4", "task": "text-matching", - "dimensions": embedding_dim, + "dimensions": resolved_embedding_dim, "embedding_type": "base64", "input": texts, } @@ -119,7 +128,7 @@ async def jina_embed( data["late_chunking"] = late_chunking logger.debug( - f"Jina embedding request: {len(texts)} texts, dimensions: {embedding_dim}" + f"Jina embedding request: {len(texts)} texts, dimensions: {resolved_embedding_dim}" ) try: