diff --git a/cognee/modules/data/exceptions/__init__.py b/cognee/modules/data/exceptions/__init__.py new file mode 100644 index 000000000..fa8468c88 --- /dev/null +++ b/cognee/modules/data/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + UnstructuredLibraryImportError, +) \ No newline at end of file diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py new file mode 100644 index 000000000..3b1aac52c --- /dev/null +++ b/cognee/modules/data/exceptions/exceptions.py @@ -0,0 +1,11 @@ +from cognee.exceptions import CogneeApiError +from fastapi import status + +class UnstructuredLibraryImportError(CogneeApiError): + def __init__( + self, + message: str = "Import error. Unstructured library is not installed.", + name: str = "UnstructuredModuleImportError", + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + ): + super().__init__(message, name, status_code) \ No newline at end of file diff --git a/cognee/modules/data/processing/document_types/UnstructuredDocument.py b/cognee/modules/data/processing/document_types/UnstructuredDocument.py index 68ccbe1f2..62632cd08 100644 --- a/cognee/modules/data/processing/document_types/UnstructuredDocument.py +++ b/cognee/modules/data/processing/document_types/UnstructuredDocument.py @@ -2,13 +2,19 @@ from io import StringIO from cognee.modules.chunking.TextChunker import TextChunker from .Document import Document +from cognee.modules.data.exceptions import UnstructuredLibraryImportError + class UnstructuredDocument(Document): type: str = "unstructured" def read(self, chunk_size: int): def get_text(): - from unstructured.partition.auto import partition + try: + from unstructured.partition.auto import partition + except ModuleNotFoundError: + raise UnstructuredLibraryImportError + elements = partition(self.raw_data_location, content_type=self.mime_type) in_memory_file = StringIO("\n\n".join([str(el) for el in elements])) in_memory_file.seek(0)