From d6727a1b4aee789d92d2e0589d96f15f4102ef96 Mon Sep 17 00:00:00 2001 From: Boris Date: Thu, 24 Jul 2025 13:23:27 +0200 Subject: [PATCH] fix: UnstructuredDocument read method (#1141) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee-frontend/Dockerfile | 2 +- .../data/processing/document_types/UnstructuredDocument.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cognee-frontend/Dockerfile b/cognee-frontend/Dockerfile index 7c7b56bc5..2d474821b 100644 --- a/cognee-frontend/Dockerfile +++ b/cognee-frontend/Dockerfile @@ -9,7 +9,7 @@ COPY package.json package-lock.json ./ # Install any needed packages specified in package.json RUN npm ci -# RUN npm rebuild lightningcss +RUN npm rebuild lightningcss # Copy the rest of the application code to the working directory COPY src ./src diff --git a/cognee/modules/data/processing/document_types/UnstructuredDocument.py b/cognee/modules/data/processing/document_types/UnstructuredDocument.py index 204da1dc1..c0c83defd 100644 --- a/cognee/modules/data/processing/document_types/UnstructuredDocument.py +++ b/cognee/modules/data/processing/document_types/UnstructuredDocument.py @@ -18,11 +18,8 @@ class UnstructuredDocument(Document): except ModuleNotFoundError: raise UnstructuredLibraryImportError - if self.raw_data_location.startswith("s3://"): - async with open_data_file(self.raw_data_location, mode="rb") as f: - elements = partition(file=f, content_type=self.mime_type) - else: - elements = partition(self.raw_data_location, content_type=self.mime_type) + async with open_data_file(self.raw_data_location, mode="rb") as f: + elements = partition(file=f, content_type=self.mime_type) in_memory_file = StringIO("\n\n".join([str(el) for el in elements])) in_memory_file.seek(0)