fix: Resolve S3 adding specific files issue

This commit is contained in:
Igor Ilic 2025-07-10 23:03:48 +02:00
parent 67b61ff964
commit dc38ff3838
2 changed files with 6 additions and 1 deletions

View file

@ -93,13 +93,15 @@ async def run_tasks(
if not isinstance(data, list):
data = [data]
if incremental_loading:
data = await resolve_data_directories(data)
# TODO: Convert to async gather task instead of for loop (just make sure it can work there were some issues when async gathering datasets)
for data_item in data:
# If incremental_loading of data is set to True don't process documents already processed by pipeline
if incremental_loading:
# If data is being added to Cognee for the first time calculate the id of the data
if not isinstance(data_item, Data):
data = await resolve_data_directories(data)
file_path = await save_data_item_to_storage(data_item, dataset.name)
# Ingest data and add metadata
with open_data_file(file_path, s3fs=fs) as file:

View file

@ -40,6 +40,9 @@ async def resolve_data_directories(
if include_subdirectories:
base_path = item if item.endswith("/") else item + "/"
s3_keys = fs.glob(base_path + "**")
# If path is not directory attempt to add item directly
if not s3_keys:
s3_keys = fs.ls(item)
else:
s3_keys = fs.ls(item)
# Filter out keys that represent directories using fs.isdir