fix: fixes s3 access in get_raw_data endpoint
This commit is contained in:
parent
8340c69f85
commit
b975161d68
1 changed files with 24 additions and 4 deletions
|
|
@ -7,7 +7,7 @@ from fastapi import status
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
from fastapi import HTTPException, Query, Depends
|
from fastapi import HTTPException, Query, Depends
|
||||||
from fastapi.responses import JSONResponse, FileResponse
|
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -482,12 +482,32 @@ def get_datasets_router() -> APIRouter:
|
||||||
if raw_location.startswith("file:"):
|
if raw_location.startswith("file:"):
|
||||||
raw_location = urlparse(raw_location).path
|
raw_location = urlparse(raw_location).path
|
||||||
|
|
||||||
|
if raw_location.startswith("s3://"):
|
||||||
|
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||||
|
from cognee.infrastructure.utils.run_async import run_async
|
||||||
|
|
||||||
|
parsed = urlparse(raw_location)
|
||||||
|
download_name = Path(parsed.path).name or data.name
|
||||||
|
media_type = data.mime_type or "application/octet-stream"
|
||||||
|
|
||||||
|
async def file_iterator(chunk_size: int = 1024 * 1024):
|
||||||
|
async with open_data_file(raw_location, mode="rb") as file:
|
||||||
|
while True:
|
||||||
|
chunk = await run_async(file.read, chunk_size)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
file_iterator(),
|
||||||
|
media_type=media_type,
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
|
||||||
|
)
|
||||||
|
|
||||||
path = Path(raw_location)
|
path = Path(raw_location)
|
||||||
|
|
||||||
if not path.is_file():
|
if not path.is_file():
|
||||||
raise DataNotFoundError(
|
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
|
||||||
message=f"Raw file not found on disk for data ({data_id})."
|
|
||||||
)
|
|
||||||
|
|
||||||
return FileResponse(path=path)
|
return FileResponse(path=path)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue