fix: UnstructuredDocument read method (#1141)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Boris 2025-07-24 13:23:27 +02:00 committed by GitHub
parent 8e943beb15
commit d6727a1b4a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 6 deletions

View file

@ -9,7 +9,7 @@ COPY package.json package-lock.json ./
# Install any needed packages specified in package.json
RUN npm ci
# RUN npm rebuild lightningcss
RUN npm rebuild lightningcss
# Copy the rest of the application code to the working directory
COPY src ./src

View file

@ -18,11 +18,8 @@ class UnstructuredDocument(Document):
except ModuleNotFoundError:
raise UnstructuredLibraryImportError
if self.raw_data_location.startswith("s3://"):
async with open_data_file(self.raw_data_location, mode="rb") as f:
elements = partition(file=f, content_type=self.mime_type)
else:
elements = partition(self.raw_data_location, content_type=self.mime_type)
async with open_data_file(self.raw_data_location, mode="rb") as f:
elements = partition(file=f, content_type=self.mime_type)
in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
in_memory_file.seek(0)