fix: fixes s3 access in get_raw_data endpoint
This commit is contained in:
parent
8340c69f85
commit
b975161d68
1 changed files with 24 additions and 4 deletions
|
|
@ -7,7 +7,7 @@ from fastapi import status
|
|||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi import HTTPException, Query, Depends
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -482,12 +482,32 @@ def get_datasets_router() -> APIRouter:
|
|||
if raw_location.startswith("file:"):
|
||||
raw_location = urlparse(raw_location).path
|
||||
|
||||
if raw_location.startswith("s3://"):
|
||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.infrastructure.utils.run_async import run_async
|
||||
|
||||
parsed = urlparse(raw_location)
|
||||
download_name = Path(parsed.path).name or data.name
|
||||
media_type = data.mime_type or "application/octet-stream"
|
||||
|
||||
async def file_iterator(chunk_size: int = 1024 * 1024):
|
||||
async with open_data_file(raw_location, mode="rb") as file:
|
||||
while True:
|
||||
chunk = await run_async(file.read, chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
file_iterator(),
|
||||
media_type=media_type,
|
||||
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
|
||||
)
|
||||
|
||||
path = Path(raw_location)
|
||||
|
||||
if not path.is_file():
|
||||
raise DataNotFoundError(
|
||||
message=f"Raw file not found on disk for data ({data_id})."
|
||||
)
|
||||
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
|
||||
|
||||
return FileResponse(path=path)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue