fix: Fix issue with path (#1337)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Vasilije 2025-09-11 05:43:50 -07:00 committed by GitHub
commit 70084cf620
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,11 +1,15 @@
import os import os
from pathlib import Path
from urllib.parse import urlparse from urllib.parse import urlparse
from typing import Union, BinaryIO, Any from typing import Union, BinaryIO, Any
from cognee.modules.ingestion.exceptions import IngestionError from cognee.modules.ingestion.exceptions import IngestionError
from cognee.modules.ingestion import save_data_to_file from cognee.modules.ingestion import save_data_to_file
from cognee.shared.logging_utils import get_logger
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
logger = get_logger()
class SaveDataSettings(BaseSettings): class SaveDataSettings(BaseSettings):
accept_local_file_path: bool = True accept_local_file_path: bool = True
@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
if isinstance(data_item, str): if isinstance(data_item, str):
parsed_url = urlparse(data_item) parsed_url = urlparse(data_item)
try:
# In case data item is a string with a relative path transform data item to absolute path and check
# if the file exists
abs_path = (Path.cwd() / Path(data_item)).resolve()
abs_path.is_file()
except (OSError, ValueError):
# In case file path is too long it's most likely not a relative path
logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
abs_path = Path("")
# data is s3 file path # data is s3 file path
if parsed_url.scheme == "s3": if parsed_url.scheme == "s3":
return data_item return data_item
@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
return file_path return file_path
else: else:
raise IngestionError(message="Local files are not accepted.") raise IngestionError(message="Local files are not accepted.")
# Data is a relative file path
elif abs_path.is_file():
if settings.accept_local_file_path:
# Normalize path separators before creating file URL
normalized_path = os.path.normpath(abs_path)
# Use forward slashes in file URLs for consistency
url_path = normalized_path.replace(os.sep, "/")
file_path = "file://" + url_path
return file_path
# data is text, save it to data storage and return the file path # data is text, save it to data storage and return the file path
return await save_data_to_file(data_item) return await save_data_to_file(data_item)