fix: Fix issue with path (#1337)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
70084cf620
1 changed files with 23 additions and 0 deletions
|
|
@ -1,11 +1,15 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from typing import Union, BinaryIO, Any
|
||||
|
||||
from cognee.modules.ingestion.exceptions import IngestionError
|
||||
from cognee.modules.ingestion import save_data_to_file
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class SaveDataSettings(BaseSettings):
|
||||
accept_local_file_path: bool = True
|
||||
|
|
@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
|
|||
if isinstance(data_item, str):
|
||||
parsed_url = urlparse(data_item)
|
||||
|
||||
try:
|
||||
# In case data item is a string with a relative path transform data item to absolute path and check
|
||||
# if the file exists
|
||||
abs_path = (Path.cwd() / Path(data_item)).resolve()
|
||||
abs_path.is_file()
|
||||
except (OSError, ValueError):
|
||||
# In case file path is too long it's most likely not a relative path
|
||||
logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
|
||||
abs_path = Path("")
|
||||
|
||||
# data is s3 file path
|
||||
if parsed_url.scheme == "s3":
|
||||
return data_item
|
||||
|
|
@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
|
|||
return file_path
|
||||
else:
|
||||
raise IngestionError(message="Local files are not accepted.")
|
||||
# Data is a relative file path
|
||||
elif abs_path.is_file():
|
||||
if settings.accept_local_file_path:
|
||||
# Normalize path separators before creating file URL
|
||||
normalized_path = os.path.normpath(abs_path)
|
||||
# Use forward slashes in file URLs for consistency
|
||||
url_path = normalized_path.replace(os.sep, "/")
|
||||
file_path = "file://" + url_path
|
||||
return file_path
|
||||
|
||||
# data is text, save it to data storage and return the file path
|
||||
return await save_data_to_file(data_item)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue