diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 8e4eb3b4e..e2450c095 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -7,7 +7,7 @@ from fastapi import status from fastapi import APIRouter from fastapi.encoders import jsonable_encoder from fastapi import HTTPException, Query, Depends -from fastapi.responses import JSONResponse, FileResponse +from fastapi.responses import JSONResponse, FileResponse, StreamingResponse from urllib.parse import urlparse from pathlib import Path @@ -482,12 +482,32 @@ def get_datasets_router() -> APIRouter: if raw_location.startswith("file:"): raw_location = urlparse(raw_location).path + if raw_location.startswith("s3://"): + from cognee.infrastructure.files.utils.open_data_file import open_data_file + from cognee.infrastructure.utils.run_async import run_async + + parsed = urlparse(raw_location) + download_name = Path(parsed.path).name or data.name + media_type = data.mime_type or "application/octet-stream" + + async def file_iterator(chunk_size: int = 1024 * 1024): + async with open_data_file(raw_location, mode="rb") as file: + while True: + chunk = await run_async(file.read, chunk_size) + if not chunk: + break + yield chunk + + return StreamingResponse( + file_iterator(), + media_type=media_type, + headers={"Content-Disposition": f'attachment; filename="{download_name}"'}, + ) + path = Path(raw_location) if not path.is_file(): - raise DataNotFoundError( - message=f"Raw file not found on disk for data ({data_id})." - ) + raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).") return FileResponse(path=path)