refactor: Add error handling to hash util
Added error handling to reading of file in hash util Refactor COG-505
This commit is contained in:
parent
e80377b729
commit
1e098ae70d
3 changed files with 36 additions and 11 deletions
9
cognee/shared/exceptions/__init__.py
Normal file
9
cognee/shared/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various shared utility errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import (
|
||||||
|
IngestionError,
|
||||||
|
)
|
||||||
11
cognee/shared/exceptions/exceptions.py
Normal file
11
cognee/shared/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
from cognee.exceptions import CogneeApiError
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
class IngestionError(CogneeApiError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Failed to load data.",
|
||||||
|
name: str = "IngestionError",
|
||||||
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
@ -19,6 +19,8 @@ from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
|
from cognee.shared.exceptions import IngestionError
|
||||||
|
|
||||||
# Analytics Proxy Url, currently hosted by Vercel
|
# Analytics Proxy Url, currently hosted by Vercel
|
||||||
proxy_url = "https://test.prometh.ai"
|
proxy_url = "https://test.prometh.ai"
|
||||||
|
|
||||||
|
|
@ -76,23 +78,26 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||||
def get_file_content_hash(file_obj: Union[str, BinaryIO]) -> str:
|
def get_file_content_hash(file_obj: Union[str, BinaryIO]) -> str:
|
||||||
h = hashlib.md5()
|
h = hashlib.md5()
|
||||||
|
|
||||||
if isinstance(file_obj, str):
|
try:
|
||||||
with open(file_obj, 'rb') as file:
|
if isinstance(file_obj, str):
|
||||||
|
with open(file_obj, 'rb') as file:
|
||||||
|
while True:
|
||||||
|
# Reading is buffered, so we can read smaller chunks.
|
||||||
|
chunk = file.read(h.block_size)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
h.update(chunk)
|
||||||
|
else:
|
||||||
while True:
|
while True:
|
||||||
# Reading is buffered, so we can read smaller chunks.
|
# Reading is buffered, so we can read smaller chunks.
|
||||||
chunk = file.read(h.block_size)
|
chunk = file_obj.read(h.block_size)
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
h.update(chunk)
|
h.update(chunk)
|
||||||
else:
|
|
||||||
while True:
|
|
||||||
# Reading is buffered, so we can read smaller chunks.
|
|
||||||
chunk = file_obj.read(h.block_size)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
h.update(chunk)
|
|
||||||
|
|
||||||
return h.hexdigest()
|
return h.hexdigest()
|
||||||
|
except IOError as e:
|
||||||
|
raise IngestionError(message=f"Failed to load data from {file}: {e}")
|
||||||
|
|
||||||
def trim_text_to_max_tokens(text: str, max_tokens: int, encoding_name: str) -> str:
|
def trim_text_to_max_tokens(text: str, max_tokens: int, encoding_name: str) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue