From fc8a6d704ef78484d170673928f301b6fde0bb6f Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 6 Sep 2025 04:08:45 -0700 Subject: [PATCH 1/6] added issue with path fixes --- cognee/tasks/ingestion/data_item_to_text_file.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py index 9fcafca57..573fcd9e9 100644 --- a/cognee/tasks/ingestion/data_item_to_text_file.py +++ b/cognee/tasks/ingestion/data_item_to_text_file.py @@ -75,5 +75,16 @@ async def data_item_to_text_file( else: raise IngestionError(message="Local files are not accepted.") + # data is a relative file path (e.g., "file.json", "data/file.txt") + else: + # This is a relative file path - check if it exists and can be loaded + if settings.accept_local_file_path: + loader = get_loader_engine() + return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader( + data_item_path, preferred_loaders + ) + else: + raise IngestionError(message="Local files are not accepted.") + # data is not a supported type raise IngestionError(message=f"Data type not supported: {type(data_item_path)}") From 5486fe7eb42244f96ab7ccb068f723178a04aefc Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 7 Sep 2025 15:21:08 -0700 Subject: [PATCH 2/6] added fix --- cognee/tasks/ingestion/data_item_to_text_file.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py index 573fcd9e9..d56509a28 100644 --- a/cognee/tasks/ingestion/data_item_to_text_file.py +++ b/cognee/tasks/ingestion/data_item_to_text_file.py @@ -76,7 +76,7 @@ async def data_item_to_text_file( raise IngestionError(message="Local files are not accepted.") # data is a relative file path (e.g., "file.json", "data/file.txt") - else: + elif parsed_url.scheme == "" and not Path(data_item_path).is_absolute(): # This is a relative file path - check if it exists and can be loaded if settings.accept_local_file_path: loader = get_loader_engine() @@ -85,6 +85,10 @@ async def data_item_to_text_file( ) else: raise IngestionError(message="Local files are not accepted.") + + # data has unsupported URL scheme or format + else: + raise IngestionError(message=f"Unsupported data source format: {data_item_path}") # data is not a supported type raise IngestionError(message=f"Data type not supported: {type(data_item_path)}") From f46fc1f071338646ffc7a2b9f652757daa96313e Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 7 Sep 2025 15:30:29 -0700 Subject: [PATCH 3/6] formatting --- cognee/tasks/ingestion/data_item_to_text_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py index d56509a28..c231cdb9e 100644 --- a/cognee/tasks/ingestion/data_item_to_text_file.py +++ b/cognee/tasks/ingestion/data_item_to_text_file.py @@ -85,7 +85,7 @@ async def data_item_to_text_file( ) else: raise IngestionError(message="Local files are not accepted.") - + # data has unsupported URL scheme or format else: raise IngestionError(message=f"Unsupported data source format: {data_item_path}") From 15bedfc1a772d1b6835007f4ec33a96a240a93bd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 10 Sep 2025 13:01:00 +0200 Subject: [PATCH 4/6] fix: Resolve issue with relative path on cognee add --- cognee/tasks/ingestion/data_item_to_text_file.py | 11 ----------- cognee/tasks/ingestion/save_data_item_to_storage.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py index c231cdb9e..5dbcfc93f 100644 --- a/cognee/tasks/ingestion/data_item_to_text_file.py +++ b/cognee/tasks/ingestion/data_item_to_text_file.py @@ -75,17 +75,6 @@ async def data_item_to_text_file( else: raise IngestionError(message="Local files are not accepted.") - # data is a relative file path (e.g., "file.json", "data/file.txt") - elif parsed_url.scheme == "" and not Path(data_item_path).is_absolute(): - # This is a relative file path - check if it exists and can be loaded - if settings.accept_local_file_path: - loader = get_loader_engine() - return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader( - data_item_path, preferred_loaders - ) - else: - raise IngestionError(message="Local files are not accepted.") - # data has unsupported URL scheme or format else: raise IngestionError(message=f"Unsupported data source format: {data_item_path}") diff --git a/cognee/tasks/ingestion/save_data_item_to_storage.py b/cognee/tasks/ingestion/save_data_item_to_storage.py index 814e908b1..58ef1c83a 100644 --- a/cognee/tasks/ingestion/save_data_item_to_storage.py +++ b/cognee/tasks/ingestion/save_data_item_to_storage.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from urllib.parse import urlparse from typing import Union, BinaryIO, Any @@ -29,6 +30,9 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str if isinstance(data_item, str): parsed_url = urlparse(data_item) + # In case data item is a string with a relative path transform data item to absolute path and check + # if the file exists + abs_path = (Path.cwd() / Path(data_item)).resolve() # data is s3 file path if parsed_url.scheme == "s3": @@ -56,6 +60,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str return file_path else: raise IngestionError(message="Local files are not accepted.") + # Data is a relative file path + elif abs_path.is_file(): + if settings.accept_local_file_path: + # Normalize path separators before creating file URL + normalized_path = os.path.normpath(abs_path) + # Use forward slashes in file URLs for consistency + url_path = normalized_path.replace(os.sep, "/") + file_path = "file://" + url_path + return file_path # data is text, save it to data storage and return the file path return await save_data_to_file(data_item) From e975cde3e726941d5419183d5daa837ed7c620c7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 10 Sep 2025 13:34:16 +0200 Subject: [PATCH 5/6] fix: Resolve issue with path being too long when trying to determine if input is a relative path --- .../tasks/ingestion/save_data_item_to_storage.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cognee/tasks/ingestion/save_data_item_to_storage.py b/cognee/tasks/ingestion/save_data_item_to_storage.py index 58ef1c83a..4eacf4eff 100644 --- a/cognee/tasks/ingestion/save_data_item_to_storage.py +++ b/cognee/tasks/ingestion/save_data_item_to_storage.py @@ -5,8 +5,11 @@ from typing import Union, BinaryIO, Any from cognee.modules.ingestion.exceptions import IngestionError from cognee.modules.ingestion import save_data_to_file +from cognee.shared.logging_utils import get_logger from pydantic_settings import BaseSettings, SettingsConfigDict +logger = get_logger() + class SaveDataSettings(BaseSettings): accept_local_file_path: bool = True @@ -30,9 +33,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str if isinstance(data_item, str): parsed_url = urlparse(data_item) - # In case data item is a string with a relative path transform data item to absolute path and check - # if the file exists - abs_path = (Path.cwd() / Path(data_item)).resolve() + + try: + # In case data item is a string with a relative path transform data item to absolute path and check + # if the file exists + abs_path = (Path.cwd() / Path(data_item)).resolve() + abs_path.is_file() + except (OSError, ValueError): + # In case file path is too long it's most likely not a relative path + logger.debug(f"Data item was too long to be a possible file path: {abs_path}") + abs_path = Path("") # data is s3 file path if parsed_url.scheme == "s3": From fd6ddbf90a24dbb4459dba5f1867cd53186337cd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 10 Sep 2025 13:38:23 +0200 Subject: [PATCH 6/6] refactor: Remove unused else statement --- cognee/tasks/ingestion/data_item_to_text_file.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cognee/tasks/ingestion/data_item_to_text_file.py b/cognee/tasks/ingestion/data_item_to_text_file.py index 5dbcfc93f..9fcafca57 100644 --- a/cognee/tasks/ingestion/data_item_to_text_file.py +++ b/cognee/tasks/ingestion/data_item_to_text_file.py @@ -75,9 +75,5 @@ async def data_item_to_text_file( else: raise IngestionError(message="Local files are not accepted.") - # data has unsupported URL scheme or format - else: - raise IngestionError(message=f"Unsupported data source format: {data_item_path}") - # data is not a supported type raise IngestionError(message=f"Data type not supported: {type(data_item_path)}")