refactor: set default numbers that are more reasonable

2025-10-14 13:57:41 +02:00 · 2025-10-14 13:57:41 +02:00 · eb631a23ad
commit eb631a23ad
parent 13d1133680
2 changed files with 5 additions and 6 deletions
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -269,13 +269,13 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
            graph_model=graph_model,
            config=config,
            custom_prompt=custom_prompt,
-            task_config={"batch_size": 100},
+            task_config={"batch_size": 20},
        ),  # Generate knowledge graphs from the document chunks.
        Task(
            summarize_text,
-            task_config={"batch_size": 100},
+            task_config={"batch_size": 20},
        ),
-        Task(add_data_points, task_config={"batch_size": 100}),
+        Task(add_data_points, task_config={"batch_size": 20}),
    ]

    return default_tasks
--- a/cognee/infrastructure/databases/vector/embeddings/config.py
+++ b/cognee/infrastructure/databases/vector/embeddings/config.py
@ -24,11 +24,10 @@ class EmbeddingConfig(BaseSettings):
    model_config = SettingsConfigDict(env_file=".env", extra="allow")

    def model_post_init(self, __context) -> None:
-        # If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models
        if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
-            self.embedding_batch_size = 30
+            self.embedding_batch_size = 1024
        elif not self.embedding_batch_size:
-            self.embedding_batch_size = 10
+            self.embedding_batch_size = 100

    def to_dict(self) -> dict:
        """