diff --git a/cognee/tasks/ingestion/save_data_item_to_storage.py b/cognee/tasks/ingestion/save_data_item_to_storage.py index 814e908b1..4eacf4eff 100644 --- a/cognee/tasks/ingestion/save_data_item_to_storage.py +++ b/cognee/tasks/ingestion/save_data_item_to_storage.py @@ -1,11 +1,15 @@ import os +from pathlib import Path from urllib.parse import urlparse from typing import Union, BinaryIO, Any from cognee.modules.ingestion.exceptions import IngestionError from cognee.modules.ingestion import save_data_to_file +from cognee.shared.logging_utils import get_logger from pydantic_settings import BaseSettings, SettingsConfigDict +logger = get_logger() + class SaveDataSettings(BaseSettings): accept_local_file_path: bool = True @@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str if isinstance(data_item, str): parsed_url = urlparse(data_item) + try: + # In case data item is a string with a relative path transform data item to absolute path and check + # if the file exists + abs_path = (Path.cwd() / Path(data_item)).resolve() + abs_path.is_file() + except (OSError, ValueError): + # In case file path is too long it's most likely not a relative path + logger.debug(f"Data item was too long to be a possible file path: {abs_path}") + abs_path = Path("") + # data is s3 file path if parsed_url.scheme == "s3": return data_item @@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str return file_path else: raise IngestionError(message="Local files are not accepted.") + # Data is a relative file path + elif abs_path.is_file(): + if settings.accept_local_file_path: + # Normalize path separators before creating file URL + normalized_path = os.path.normpath(abs_path) + # Use forward slashes in file URLs for consistency + url_path = normalized_path.replace(os.sep, "/") + file_path = "file://" + url_path + return file_path # data is text, save it to data storage and return the file path return await save_data_to_file(data_item)