diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 9215c9369..3032bd4e8 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -269,13 +269,13 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model=graph_model, config=config, custom_prompt=custom_prompt, - task_config={"batch_size": 20}, + task_config={"batch_size": 100}, ), # Generate knowledge graphs from the document chunks. Task( summarize_text, - task_config={"batch_size": 20}, + task_config={"batch_size": 100}, ), - Task(add_data_points, task_config={"batch_size": 20}), + Task(add_data_points, task_config={"batch_size": 100}), ] return default_tasks diff --git a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py index e34ab5d9d..c2acd516e 100644 --- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py @@ -1,8 +1,17 @@ -from cognee.shared.logging_utils import get_logger +import os +import logging from typing import List, Optional from fastembed import TextEmbedding import litellm -import os +from tenacity import ( + retry, + stop_after_delay, + wait_exponential_jitter, + retry_if_not_exception_type, + before_sleep_log, +) + +from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine from cognee.infrastructure.databases.exceptions import EmbeddingException from cognee.infrastructure.llm.tokenizer.TikToken import ( @@ -57,6 +66,13 @@ class FastembedEmbeddingEngine(EmbeddingEngine): enable_mocking = str(enable_mocking).lower() self.mock = enable_mocking in ("true", "1", "yes") + @retry( + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), + retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), + before_sleep=before_sleep_log(logger, logging.DEBUG), + reraise=True, + ) async def embed_text(self, text: List[str]) -> List[List[float]]: """ Embed the given text into numerical vectors. diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py index 302950f66..03ce86bee 100644 --- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py @@ -16,9 +16,6 @@ import litellm import os from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine from cognee.infrastructure.databases.exceptions import EmbeddingException -from cognee.infrastructure.llm.tokenizer.Gemini import ( - GeminiTokenizer, -) from cognee.infrastructure.llm.tokenizer.HuggingFace import ( HuggingFaceTokenizer, ) @@ -28,10 +25,6 @@ from cognee.infrastructure.llm.tokenizer.Mistral import ( from cognee.infrastructure.llm.tokenizer.TikToken import ( TikTokenTokenizer, ) -from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import ( - embedding_rate_limit_async, - embedding_sleep_and_retry_async, -) litellm.set_verbose = False logger = get_logger("LiteLLMEmbeddingEngine") @@ -86,8 +79,8 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine): self.mock = enable_mocking in ("true", "1", "yes") @retry( - stop=stop_after_delay(180), - wait=wait_exponential_jitter(1, 180), + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, diff --git a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py index b8ee9c7df..2882b679a 100644 --- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py @@ -100,8 +100,8 @@ class OllamaEmbeddingEngine(EmbeddingEngine): return embeddings @retry( - stop=stop_after_delay(180), - wait=wait_exponential_jitter(1, 180), + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py index 314adbd99..56cd79678 100644 --- a/cognee/infrastructure/databases/vector/embeddings/config.py +++ b/cognee/infrastructure/databases/vector/embeddings/config.py @@ -25,9 +25,9 @@ class EmbeddingConfig(BaseSettings): def model_post_init(self, __context) -> None: if not self.embedding_batch_size and self.embedding_provider.lower() == "openai": - self.embedding_batch_size = 1024 + self.embedding_batch_size = 36 elif not self.embedding_batch_size: - self.embedding_batch_size = 100 + self.embedding_batch_size = 36 def to_dict(self) -> dict: """