Add max_token_size parameter to embedding function decorators
- Add max_token_size=8192 to all embed funcs - Move siliconcloud to deprecated folder - Import wrap_embedding_func_with_attrs - Update EmbeddingFunc docstring - Fix langfuse import type annotation
This commit is contained in:
parent
399a23c3a6
commit
05852e1ab2
11 changed files with 21 additions and 9 deletions
|
|
@ -16,6 +16,7 @@ from tenacity import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
|
||||||
if sys.version_info < (3, 9):
|
if sys.version_info < (3, 9):
|
||||||
from typing import AsyncIterator
|
from typing import AsyncIterator
|
||||||
|
|
@ -253,7 +254,7 @@ async def bedrock_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
# @wrap_embedding_func_with_attrs(embedding_dim=1024)
|
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||||
# @retry(
|
# @retry(
|
||||||
# stop=stop_after_attempt(3),
|
# stop=stop_after_attempt(3),
|
||||||
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
|
|
||||||
|
|
@ -453,7 +453,7 @@ async def gemini_model_complete(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=2048)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ from lightrag.exceptions import (
|
||||||
)
|
)
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
|
||||||
|
|
@ -141,6 +142,7 @@ async def hf_model_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||||
async def hf_embed(texts: list[str], tokenizer, embed_model) -> np.ndarray:
|
async def hf_embed(texts: list[str], tokenizer, embed_model) -> np.ndarray:
|
||||||
# Detect the appropriate device
|
# Detect the appropriate device
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ async def fetch_data(url, headers, data):
|
||||||
return data_list
|
return data_list
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -174,7 +174,7 @@ async def llama_index_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,10 @@ from lightrag.exceptions import (
|
||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from lightrag.utils import (
|
||||||
|
wrap_embedding_func_with_attrs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
|
|
@ -134,6 +138,7 @@ async def lollms_model_complete(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||||
async def lollms_embed(
|
async def lollms_embed(
|
||||||
texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs
|
texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ from lightrag.utils import (
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=2048)
|
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,10 @@ from lightrag.api import __api_version__
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
from lightrag.utils import logger
|
from lightrag.utils import (
|
||||||
|
wrap_embedding_func_with_attrs,
|
||||||
|
logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_OLLAMA_CLOUD_HOST = "https://ollama.com"
|
_OLLAMA_CLOUD_HOST = "https://ollama.com"
|
||||||
|
|
@ -169,6 +172,7 @@ async def ollama_model_complete(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||||
async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
||||||
api_key = kwargs.pop("api_key", None)
|
api_key = kwargs.pop("api_key", None)
|
||||||
if not api_key:
|
if not api_key:
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ try:
|
||||||
|
|
||||||
# Only enable Langfuse if both keys are configured
|
# Only enable Langfuse if both keys are configured
|
||||||
if langfuse_public_key and langfuse_secret_key:
|
if langfuse_public_key and langfuse_secret_key:
|
||||||
from langfuse.openai import AsyncOpenAI
|
from langfuse.openai import AsyncOpenAI # type: ignore[import-untyped]
|
||||||
|
|
||||||
LANGFUSE_ENABLED = True
|
LANGFUSE_ENABLED = True
|
||||||
logger.info("Langfuse observability enabled for OpenAI client")
|
logger.info("Langfuse observability enabled for OpenAI client")
|
||||||
|
|
@ -604,7 +604,7 @@ async def nvidia_openai_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||||
|
|
|
||||||
|
|
@ -355,7 +355,7 @@ class TaskState:
|
||||||
class EmbeddingFunc:
|
class EmbeddingFunc:
|
||||||
embedding_dim: int
|
embedding_dim: int
|
||||||
func: callable
|
func: callable
|
||||||
max_token_size: int | None = None # deprecated keep it for compatible only
|
max_token_size: int | None = None # Token limit for the embedding model
|
||||||
send_dimensions: bool = (
|
send_dimensions: bool = (
|
||||||
False # Control whether to send embedding_dim to the function
|
False # Control whether to send embedding_dim to the function
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue