diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index d7c40ae1a..d638f9c10 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -118,6 +118,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's user: User = None, graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, + chunk_size: int = None, ontology_file_path: Optional[str] = None, ) -> list[Task]: if user is None: @@ -131,7 +132,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task(check_permissions_on_documents, user=user, permissions=["write"]), Task( extract_chunks_from_documents, - max_chunk_size=get_max_chunk_tokens(), + max_chunk_size=chunk_size or get_max_chunk_tokens(), chunker=chunker, ), # Extract text chunks based on the document type. Task( diff --git a/cognee/eval_framework/corpus_builder/corpus_builder_executor.py b/cognee/eval_framework/corpus_builder/corpus_builder_executor.py index e1c30a1f7..2a6ff63ce 100644 --- a/cognee/eval_framework/corpus_builder/corpus_builder_executor.py +++ b/cognee/eval_framework/corpus_builder/corpus_builder_executor.py @@ -55,5 +55,5 @@ class CorpusBuilderExecutor: await cognee.add(self.raw_corpus) - tasks = await self.task_getter(chunker=TextChunker) + tasks = await self.task_getter(chunk_size=chunk_size, chunker=chunker) await cognee.cognify(tasks=tasks) diff --git a/cognee/eval_framework/corpus_builder/run_corpus_builder.py b/cognee/eval_framework/corpus_builder/run_corpus_builder.py index b7bd2c4f7..f443cfcac 100644 --- a/cognee/eval_framework/corpus_builder/run_corpus_builder.py +++ b/cognee/eval_framework/corpus_builder/run_corpus_builder.py @@ -49,6 +49,7 @@ async def run_corpus_builder(params: dict, chunk_size=1024, chunker=TextChunker) questions = await corpus_builder.build_corpus( limit=params.get("number_of_samples_in_corpus"), chunker=chunker, + chunk_size=chunk_size, load_golden_context=params.get("evaluating_contexts"), ) with open(params["questions_path"], "w", encoding="utf-8") as f: