cognee/cognee/tasks/documents/extract_chunks_from_documents.py
Igor Ilic 3db7f85c9c feat: Add max_chunk_tokens value to chunkers
Add formula and forwarding of max_chunk_tokens value through Cognee
2025-01-28 14:32:00 +01:00

23 lines
842 B
Python

from typing import Optional, AsyncGenerator
from cognee.modules.data.processing.document_types.Document import Document
async def extract_chunks_from_documents(
documents: list[Document],
max_chunk_tokens: int,
chunk_size: int = 1024,
chunker="text_chunker",
) -> AsyncGenerator:
"""
Extracts chunks of data from a list of documents based on the specified chunking parameters.
Notes:
- The `read` method of the `Document` class must be implemented to support the chunking operation.
- The `chunker` parameter determines the chunking logic and should align with the document type.
"""
for document in documents:
for document_chunk in document.read(
chunk_size=chunk_size, chunker=chunker, max_chunk_tokens=max_chunk_tokens
):
yield document_chunk