fix: UnstructuredDocument read method (#1141)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
8e943beb15
commit
d6727a1b4a
2 changed files with 3 additions and 6 deletions
|
|
@ -9,7 +9,7 @@ COPY package.json package-lock.json ./
|
|||
|
||||
# Install any needed packages specified in package.json
|
||||
RUN npm ci
|
||||
# RUN npm rebuild lightningcss
|
||||
RUN npm rebuild lightningcss
|
||||
|
||||
# Copy the rest of the application code to the working directory
|
||||
COPY src ./src
|
||||
|
|
|
|||
|
|
@ -18,11 +18,8 @@ class UnstructuredDocument(Document):
|
|||
except ModuleNotFoundError:
|
||||
raise UnstructuredLibraryImportError
|
||||
|
||||
if self.raw_data_location.startswith("s3://"):
|
||||
async with open_data_file(self.raw_data_location, mode="rb") as f:
|
||||
elements = partition(file=f, content_type=self.mime_type)
|
||||
else:
|
||||
elements = partition(self.raw_data_location, content_type=self.mime_type)
|
||||
async with open_data_file(self.raw_data_location, mode="rb") as f:
|
||||
elements = partition(file=f, content_type=self.mime_type)
|
||||
|
||||
in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
|
||||
in_memory_file.seek(0)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue