feat: adds errors to classify, and chunking top level
This commit is contained in:
parent
c99b453d96
commit
df3a3df117
4 changed files with 30 additions and 4 deletions
|
|
@ -117,7 +117,6 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
|
|||
|
||||
documents = []
|
||||
for data_item in data_documents:
|
||||
|
||||
document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](
|
||||
id=data_item.id,
|
||||
title=f"{data_item.name}.{data_item.extension}",
|
||||
|
|
|
|||
|
|
@ -6,4 +6,6 @@ This module defines a set of exceptions for handling various data errors
|
|||
|
||||
from .exceptions import (
|
||||
WrongDataDocumentInputError,
|
||||
InvalidChunkSizeError,
|
||||
InvalidChunkerError,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from fastapi import status
|
|||
|
||||
class WrongDataDocumentInputError(CogneeValidationError):
|
||||
"""Raised when a wrong data document is provided."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
field: str,
|
||||
|
|
@ -14,4 +15,22 @@ class WrongDataDocumentInputError(CogneeValidationError):
|
|||
status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
):
|
||||
message = f"Missing of invalid parameter: '{field}'."
|
||||
super().__init__(message, name, status_code)
|
||||
super().__init__(message, name, status_code)
|
||||
|
||||
|
||||
class InvalidChunkSizeError(CogneeValidationError):
|
||||
def __init__(self, value):
|
||||
super().__init__(
|
||||
message=f"max_chunk_size must be a positive integer (got {value}).",
|
||||
name="InvalidChunkSizeError",
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
class InvalidChunkerError(CogneeValidationError):
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
message=f"chunker must be a valid Chunker class.",
|
||||
name="InvalidChunkerError",
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from cognee.modules.data.models import Data
|
|||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
from cognee.modules.chunking.Chunker import Chunker
|
||||
from cognee.tasks.documents.exceptions import InvalidChunkSizeError, InvalidChunkerError
|
||||
|
||||
|
||||
async def update_document_token_count(document_id: UUID, token_count: int) -> None:
|
||||
|
|
@ -37,6 +38,13 @@ async def extract_chunks_from_documents(
|
|||
- The `read` method of the `Document` class must be implemented to support the chunking operation.
|
||||
- The `chunker` parameter determines the chunking logic and should align with the document type.
|
||||
"""
|
||||
if not isinstance(max_chunk_size, int) or max_chunk_size <= 0:
|
||||
raise InvalidChunkSizeError(max_chunk_size)
|
||||
if not isinstance(chunker, type):
|
||||
raise InvalidChunkerError()
|
||||
if not hasattr(chunker, "read"):
|
||||
raise InvalidChunkerError()
|
||||
|
||||
for document in documents:
|
||||
document_token_count = 0
|
||||
|
||||
|
|
@ -48,5 +56,3 @@ async def extract_chunks_from_documents(
|
|||
yield document_chunk
|
||||
|
||||
await update_document_token_count(document.id, document_token_count)
|
||||
|
||||
# todo rita
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue