fix: UnstructuredDocument read method (#1141)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
8e943beb15
commit
d6727a1b4a
2 changed files with 3 additions and 6 deletions
|
|
@ -9,7 +9,7 @@ COPY package.json package-lock.json ./
|
||||||
|
|
||||||
# Install any needed packages specified in package.json
|
# Install any needed packages specified in package.json
|
||||||
RUN npm ci
|
RUN npm ci
|
||||||
# RUN npm rebuild lightningcss
|
RUN npm rebuild lightningcss
|
||||||
|
|
||||||
# Copy the rest of the application code to the working directory
|
# Copy the rest of the application code to the working directory
|
||||||
COPY src ./src
|
COPY src ./src
|
||||||
|
|
|
||||||
|
|
@ -18,11 +18,8 @@ class UnstructuredDocument(Document):
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
raise UnstructuredLibraryImportError
|
raise UnstructuredLibraryImportError
|
||||||
|
|
||||||
if self.raw_data_location.startswith("s3://"):
|
async with open_data_file(self.raw_data_location, mode="rb") as f:
|
||||||
async with open_data_file(self.raw_data_location, mode="rb") as f:
|
elements = partition(file=f, content_type=self.mime_type)
|
||||||
elements = partition(file=f, content_type=self.mime_type)
|
|
||||||
else:
|
|
||||||
elements = partition(self.raw_data_location, content_type=self.mime_type)
|
|
||||||
|
|
||||||
in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
|
in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
|
||||||
in_memory_file.seek(0)
|
in_memory_file.seek(0)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue