refactor: set default numbers that are more reasonable

This commit is contained in:
Igor Ilic 2025-10-14 13:57:41 +02:00
parent 13d1133680
commit eb631a23ad
2 changed files with 5 additions and 6 deletions

View file

@ -269,13 +269,13 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
graph_model=graph_model, graph_model=graph_model,
config=config, config=config,
custom_prompt=custom_prompt, custom_prompt=custom_prompt,
task_config={"batch_size": 100}, task_config={"batch_size": 20},
), # Generate knowledge graphs from the document chunks. ), # Generate knowledge graphs from the document chunks.
Task( Task(
summarize_text, summarize_text,
task_config={"batch_size": 100}, task_config={"batch_size": 20},
), ),
Task(add_data_points, task_config={"batch_size": 100}), Task(add_data_points, task_config={"batch_size": 20}),
] ]
return default_tasks return default_tasks

View file

@ -24,11 +24,10 @@ class EmbeddingConfig(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="allow") model_config = SettingsConfigDict(env_file=".env", extra="allow")
def model_post_init(self, __context) -> None: def model_post_init(self, __context) -> None:
# If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models
if not self.embedding_batch_size and self.embedding_provider.lower() == "openai": if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
self.embedding_batch_size = 30 self.embedding_batch_size = 1024
elif not self.embedding_batch_size: elif not self.embedding_batch_size:
self.embedding_batch_size = 10 self.embedding_batch_size = 100
def to_dict(self) -> dict: def to_dict(self) -> dict:
""" """