fix: fixes s3 access in get_raw_data endpoint

This commit is contained in:
hajdul88 2025-12-17 14:47:10 +01:00
parent 8340c69f85
commit b975161d68

View file

@ -7,7 +7,7 @@ from fastapi import status
from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder
from fastapi import HTTPException, Query, Depends
from fastapi.responses import JSONResponse, FileResponse
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse
from urllib.parse import urlparse
from pathlib import Path
@ -482,12 +482,32 @@ def get_datasets_router() -> APIRouter:
if raw_location.startswith("file:"):
raw_location = urlparse(raw_location).path
if raw_location.startswith("s3://"):
from cognee.infrastructure.files.utils.open_data_file import open_data_file
from cognee.infrastructure.utils.run_async import run_async
parsed = urlparse(raw_location)
download_name = Path(parsed.path).name or data.name
media_type = data.mime_type or "application/octet-stream"
async def file_iterator(chunk_size: int = 1024 * 1024):
async with open_data_file(raw_location, mode="rb") as file:
while True:
chunk = await run_async(file.read, chunk_size)
if not chunk:
break
yield chunk
return StreamingResponse(
file_iterator(),
media_type=media_type,
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
)
path = Path(raw_location)
if not path.is_file():
raise DataNotFoundError(
message=f"Raw file not found on disk for data ({data_id})."
)
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
return FileResponse(path=path)