diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 30afb269a..898c35518 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -269,13 +269,13 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model=graph_model, config=config, custom_prompt=custom_prompt, - task_config={"batch_size": 100}, + task_config={"batch_size": 20}, ), # Generate knowledge graphs from the document chunks. Task( summarize_text, - task_config={"batch_size": 100}, + task_config={"batch_size": 20}, ), - Task(add_data_points, task_config={"batch_size": 100}), + Task(add_data_points, task_config={"batch_size": 20}), ] return default_tasks diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py index dcb55f4a4..314adbd99 100644 --- a/cognee/infrastructure/databases/vector/embeddings/config.py +++ b/cognee/infrastructure/databases/vector/embeddings/config.py @@ -24,11 +24,10 @@ class EmbeddingConfig(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="allow") def model_post_init(self, __context) -> None: - # If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models if not self.embedding_batch_size and self.embedding_provider.lower() == "openai": - self.embedding_batch_size = 30 + self.embedding_batch_size = 1024 elif not self.embedding_batch_size: - self.embedding_batch_size = 10 + self.embedding_batch_size = 100 def to_dict(self) -> dict: """