Fix: docx get image exception. (#7636)
### What problem does this PR solve? Close #7631 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
e7a6a9e47e
commit
bfe97d896d
2 changed files with 4 additions and 0 deletions
|
|
@ -60,6 +60,9 @@ class Docx(DocxParser):
|
||||||
except InvalidImageStreamError:
|
except InvalidImageStreamError:
|
||||||
logging.info("The recognized image stream appears to be corrupted. Skipping image.")
|
logging.info("The recognized image stream appears to be corrupted. Skipping image.")
|
||||||
return None
|
return None
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
logging.info("The recognized image stream appears to be corrupted. Skipping image.")
|
||||||
|
return None
|
||||||
try:
|
try:
|
||||||
image = Image.open(BytesIO(image_blob)).convert('RGB')
|
image = Image.open(BytesIO(image_blob)).convert('RGB')
|
||||||
return image
|
return image
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ def chunks_format(reference):
|
||||||
"similarity": chunk.get("similarity"),
|
"similarity": chunk.get("similarity"),
|
||||||
"vector_similarity": chunk.get("vector_similarity"),
|
"vector_similarity": chunk.get("vector_similarity"),
|
||||||
"term_similarity": chunk.get("term_similarity"),
|
"term_similarity": chunk.get("term_similarity"),
|
||||||
|
"doc_type": chunk.get("doc_type_kwd"),
|
||||||
}
|
}
|
||||||
for chunk in reference.get("chunks", [])
|
for chunk in reference.get("chunks", [])
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue