fix: Changes exception handling to catch 'em all

This commit is contained in:
hajdul88 2025-07-03 07:54:19 +02:00
parent 278ede9dbe
commit 6a7f5be561

View file

@ -16,8 +16,10 @@ class PdfDocument(Document):
logger.info(f"Reading PDF:{self.raw_data_location}")
try:
file = PdfReader(stream, strict=False)
except PdfReadError:
logger.warning(f"PyPDF couldnt open PDF—skipping: {self.raw_data_location}")
except Exception as e:
logger.warning(
f"PyPDF couldnt open PDF—skipping: {self.raw_data_location} with error: {e}"
)
return
def get_text():
@ -25,8 +27,10 @@ class PdfDocument(Document):
for page in file.pages:
page_text = page.extract_text()
yield page_text
except PdfReadError:
logger.warning(f"PyPDF couldnt open PDF—skipping: {self.raw_data_location}")
except Exception as e:
logger.warning(
f"PyPDF couldnt open PDF—skipping: {self.raw_data_location} with error: {e}"
)
return
chunker = chunker_cls(self, get_text=get_text, max_chunk_size=max_chunk_size)