refactor: Add error handling to hash util

Added error handling to reading of file in hash util

Refactor COG-505
This commit is contained in:
Igor Ilic 2024-12-05 20:54:55 +01:00
parent e80377b729
commit 1e098ae70d
3 changed files with 36 additions and 11 deletions

View file

@ -0,0 +1,9 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various shared utility errors
"""
from .exceptions import (
IngestionError,
)

View file

@ -0,0 +1,11 @@
from cognee.exceptions import CogneeApiError
from fastapi import status
class IngestionError(CogneeApiError):
def __init__(
self,
message: str = "Failed to load data.",
name: str = "IngestionError",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
):
super().__init__(message, name, status_code)

View file

@ -19,6 +19,8 @@ from cognee.infrastructure.databases.graph import get_graph_engine
from uuid import uuid4
import pathlib
from cognee.shared.exceptions import IngestionError
# Analytics Proxy Url, currently hosted by Vercel
proxy_url = "https://test.prometh.ai"
@ -76,23 +78,26 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int:
def get_file_content_hash(file_obj: Union[str, BinaryIO]) -> str:
h = hashlib.md5()
if isinstance(file_obj, str):
with open(file_obj, 'rb') as file:
try:
if isinstance(file_obj, str):
with open(file_obj, 'rb') as file:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
if not chunk:
break
h.update(chunk)
else:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
chunk = file_obj.read(h.block_size)
if not chunk:
break
h.update(chunk)
else:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file_obj.read(h.block_size)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
return h.hexdigest()
except IOError as e:
raise IngestionError(message=f"Failed to load data from {file}: {e}")
def trim_text_to_max_tokens(text: str, max_tokens: int, encoding_name: str) -> str:
"""