refactor: Optimize Cognee speed

This commit is contained in:
Igor Ilic 2025-10-15 13:32:17 +02:00
parent 417015d9a9
commit 1b28f13743
5 changed files with 27 additions and 18 deletions

View file

@ -269,13 +269,13 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
graph_model=graph_model,
config=config,
custom_prompt=custom_prompt,
task_config={"batch_size": 20},
task_config={"batch_size": 100},
), # Generate knowledge graphs from the document chunks.
Task(
summarize_text,
task_config={"batch_size": 20},
task_config={"batch_size": 100},
),
Task(add_data_points, task_config={"batch_size": 20}),
Task(add_data_points, task_config={"batch_size": 100}),
]
return default_tasks

View file

@ -1,8 +1,17 @@
from cognee.shared.logging_utils import get_logger
import os
import logging
from typing import List, Optional
from fastembed import TextEmbedding
import litellm
import os
from tenacity import (
retry,
stop_after_delay,
wait_exponential_jitter,
retry_if_not_exception_type,
before_sleep_log,
)
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions import EmbeddingException
from cognee.infrastructure.llm.tokenizer.TikToken import (
@ -57,6 +66,13 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
enable_mocking = str(enable_mocking).lower()
self.mock = enable_mocking in ("true", "1", "yes")
@retry(
stop=stop_after_delay(128),
wait=wait_exponential_jitter(2, 128),
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
async def embed_text(self, text: List[str]) -> List[List[float]]:
"""
Embed the given text into numerical vectors.

View file

@ -16,9 +16,6 @@ import litellm
import os
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions import EmbeddingException
from cognee.infrastructure.llm.tokenizer.Gemini import (
GeminiTokenizer,
)
from cognee.infrastructure.llm.tokenizer.HuggingFace import (
HuggingFaceTokenizer,
)
@ -28,10 +25,6 @@ from cognee.infrastructure.llm.tokenizer.Mistral import (
from cognee.infrastructure.llm.tokenizer.TikToken import (
TikTokenTokenizer,
)
from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import (
embedding_rate_limit_async,
embedding_sleep_and_retry_async,
)
litellm.set_verbose = False
logger = get_logger("LiteLLMEmbeddingEngine")
@ -86,8 +79,8 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
self.mock = enable_mocking in ("true", "1", "yes")
@retry(
stop=stop_after_delay(180),
wait=wait_exponential_jitter(1, 180),
stop=stop_after_delay(128),
wait=wait_exponential_jitter(2, 128),
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,

View file

@ -100,8 +100,8 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
return embeddings
@retry(
stop=stop_after_delay(180),
wait=wait_exponential_jitter(1, 180),
stop=stop_after_delay(128),
wait=wait_exponential_jitter(2, 128),
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,

View file

@ -25,9 +25,9 @@ class EmbeddingConfig(BaseSettings):
def model_post_init(self, __context) -> None:
if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
self.embedding_batch_size = 1024
self.embedding_batch_size = 36
elif not self.embedding_batch_size:
self.embedding_batch_size = 100
self.embedding_batch_size = 36
def to_dict(self) -> dict:
"""