Merge branch 'dev' into feature/cog-3532-empower-test_search-db-retrievers-tests-reorg-4
This commit is contained in:
commit
ef51dcfb7a
4 changed files with 195 additions and 23 deletions
|
|
@ -7,7 +7,9 @@ from fastapi import status
|
|||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi import HTTPException, Query, Depends
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
|
|
@ -476,6 +478,40 @@ def get_datasets_router() -> APIRouter:
|
|||
message=f"Data ({data_id}) not found in dataset ({dataset_id})."
|
||||
)
|
||||
|
||||
return data.raw_data_location
|
||||
raw_location = data.raw_data_location
|
||||
|
||||
if raw_location.startswith("file://"):
|
||||
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
||||
|
||||
raw_location = get_data_file_path(raw_location)
|
||||
|
||||
if raw_location.startswith("s3://"):
|
||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.infrastructure.utils.run_async import run_async
|
||||
|
||||
parsed = urlparse(raw_location)
|
||||
download_name = Path(parsed.path).name or data.name
|
||||
media_type = data.mime_type or "application/octet-stream"
|
||||
|
||||
async def file_iterator(chunk_size: int = 1024 * 1024):
|
||||
async with open_data_file(raw_location, mode="rb") as file:
|
||||
while True:
|
||||
chunk = await run_async(file.read, chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
file_iterator(),
|
||||
media_type=media_type,
|
||||
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
|
||||
)
|
||||
|
||||
path = Path(raw_location)
|
||||
|
||||
if not path.is_file():
|
||||
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
|
||||
|
||||
return FileResponse(path=path)
|
||||
|
||||
return router
|
||||
|
|
|
|||
136
cognee/tests/unit/api/test_get_raw_data_endpoint.py
Normal file
136
cognee/tests/unit/api/test_get_raw_data_endpoint.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import io
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_client():
|
||||
from cognee.api.v1.datasets.routers.get_datasets_router import get_datasets_router
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(get_datasets_router(), prefix="/api/v1/datasets")
|
||||
|
||||
with TestClient(app) as c:
|
||||
yield c
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(test_client):
|
||||
async def override_get_authenticated_user():
|
||||
return SimpleNamespace(
|
||||
id=str(uuid.uuid4()),
|
||||
email="default@example.com",
|
||||
is_active=True,
|
||||
tenant_id=str(uuid.uuid4()),
|
||||
)
|
||||
|
||||
import importlib
|
||||
|
||||
datasets_router_module = importlib.import_module(
|
||||
"cognee.api.v1.datasets.routers.get_datasets_router"
|
||||
)
|
||||
datasets_router_module.send_telemetry = lambda *args, **kwargs: None
|
||||
|
||||
test_client.app.dependency_overrides[get_authenticated_user] = override_get_authenticated_user
|
||||
yield test_client
|
||||
test_client.app.dependency_overrides.pop(get_authenticated_user, None)
|
||||
|
||||
|
||||
def _patch_raw_download_dependencies(
|
||||
monkeypatch, *, dataset_id, data_id, raw_data_location, name, mime_type
|
||||
):
|
||||
"""
|
||||
Patch the internal dataset/data lookups used by GET /datasets/{dataset_id}/data/{data_id}/raw.
|
||||
Keeps the test focused on response behavior (FileResponse vs StreamingResponse).
|
||||
"""
|
||||
import importlib
|
||||
|
||||
datasets_router_module = importlib.import_module(
|
||||
"cognee.api.v1.datasets.routers.get_datasets_router"
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
datasets_router_module,
|
||||
"get_authorized_existing_datasets",
|
||||
AsyncMock(return_value=[SimpleNamespace(id=dataset_id)]),
|
||||
)
|
||||
|
||||
import cognee.modules.data.methods as data_methods_module
|
||||
|
||||
monkeypatch.setattr(
|
||||
data_methods_module,
|
||||
"get_dataset_data",
|
||||
AsyncMock(return_value=[SimpleNamespace(id=data_id)]),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
data_methods_module,
|
||||
"get_data",
|
||||
AsyncMock(
|
||||
return_value=SimpleNamespace(
|
||||
id=data_id,
|
||||
raw_data_location=raw_data_location,
|
||||
name=name,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_get_raw_data_local_file_downloads_bytes(client, monkeypatch, tmp_path):
|
||||
"""Downloads bytes from a file:// raw_data_location."""
|
||||
dataset_id = uuid.uuid4()
|
||||
data_id = uuid.uuid4()
|
||||
|
||||
file_path = tmp_path / "example.txt"
|
||||
content = b"hello from disk"
|
||||
file_path.write_bytes(content)
|
||||
|
||||
_patch_raw_download_dependencies(
|
||||
monkeypatch,
|
||||
dataset_id=dataset_id,
|
||||
data_id=data_id,
|
||||
raw_data_location=file_path.as_uri(),
|
||||
name="example.txt",
|
||||
mime_type="text/plain",
|
||||
)
|
||||
|
||||
response = client.get(f"/api/v1/datasets/{dataset_id}/data/{data_id}/raw")
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
|
||||
def test_get_raw_data_s3_streams_bytes_without_s3_dependency(client, monkeypatch):
|
||||
"""Streams bytes from an s3:// raw_data_location (mocked)."""
|
||||
dataset_id = uuid.uuid4()
|
||||
data_id = uuid.uuid4()
|
||||
|
||||
_patch_raw_download_dependencies(
|
||||
monkeypatch,
|
||||
dataset_id=dataset_id,
|
||||
data_id=data_id,
|
||||
raw_data_location="s3://bucket/path/to/file.txt",
|
||||
name="file.txt",
|
||||
mime_type="text/plain",
|
||||
)
|
||||
|
||||
import cognee.infrastructure.files.utils.open_data_file as open_data_file_module
|
||||
|
||||
@asynccontextmanager
|
||||
async def fake_open_data_file(_file_path: str, mode: str = "rb", **_kwargs):
|
||||
assert mode == "rb"
|
||||
yield io.BytesIO(b"hello from s3")
|
||||
|
||||
monkeypatch.setattr(open_data_file_module, "open_data_file", fake_open_data_file)
|
||||
|
||||
response = client.get(f"/api/v1/datasets/{dataset_id}/data/{data_id}/raw")
|
||||
assert response.status_code == 200
|
||||
assert response.content == b"hello from s3"
|
||||
assert response.headers.get("content-disposition") == 'attachment; filename="file.txt"'
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
[project]
|
||||
name = "cognee"
|
||||
|
||||
version = "0.5.0"
|
||||
version = "0.5.1.dev0"
|
||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||
authors = [
|
||||
{ name = "Vasilije Markovic" },
|
||||
|
|
|
|||
40
uv.lock
generated
40
uv.lock
generated
|
|
@ -1,5 +1,5 @@
|
|||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.10, <3.14"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.13' and sys_platform == 'darwin'",
|
||||
|
|
@ -927,7 +927,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "cognee"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1.dev0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "aiofiles" },
|
||||
|
|
@ -5216,7 +5216,7 @@ name = "nvidia-cudnn-cu12"
|
|||
version = "9.10.2.21"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-cublas-cu12" },
|
||||
{ name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
|
||||
|
|
@ -5227,7 +5227,7 @@ name = "nvidia-cufft-cu12"
|
|||
version = "11.3.3.83"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-nvjitlink-cu12" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
|
||||
|
|
@ -5254,9 +5254,9 @@ name = "nvidia-cusolver-cu12"
|
|||
version = "11.7.3.90"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-cublas-cu12" },
|
||||
{ name = "nvidia-cusparse-cu12" },
|
||||
{ name = "nvidia-nvjitlink-cu12" },
|
||||
{ name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
|
||||
|
|
@ -5267,7 +5267,7 @@ name = "nvidia-cusparse-cu12"
|
|||
version = "12.5.8.93"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-nvjitlink-cu12" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
|
||||
|
|
@ -5327,9 +5327,9 @@ name = "ocrmac"
|
|||
version = "1.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "click" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pyobjc-framework-vision" },
|
||||
{ name = "click", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pillow", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-vision", marker = "sys_platform == 'darwin'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dd/dc/de3e9635774b97d9766f6815bbb3f5ec9bce347115f10d9abbf2733a9316/ocrmac-1.0.0.tar.gz", hash = "sha256:5b299e9030c973d1f60f82db000d6c2e5ff271601878c7db0885e850597d1d2e", size = 1463997, upload-time = "2024-11-07T12:00:00.197Z" }
|
||||
wheels = [
|
||||
|
|
@ -6937,7 +6937,7 @@ name = "pyobjc-framework-cocoa"
|
|||
version = "12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyobjc-core" },
|
||||
{ name = "pyobjc-core", marker = "sys_platform == 'darwin'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/02/a3/16ca9a15e77c061a9250afbae2eae26f2e1579eb8ca9462ae2d2c71e1169/pyobjc_framework_cocoa-12.1.tar.gz", hash = "sha256:5556c87db95711b985d5efdaaf01c917ddd41d148b1e52a0c66b1a2e2c5c1640", size = 2772191, upload-time = "2025-11-14T10:13:02.069Z" }
|
||||
wheels = [
|
||||
|
|
@ -6953,8 +6953,8 @@ name = "pyobjc-framework-coreml"
|
|||
version = "12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyobjc-core" },
|
||||
{ name = "pyobjc-framework-cocoa" },
|
||||
{ name = "pyobjc-core", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/30/2d/baa9ea02cbb1c200683cb7273b69b4bee5070e86f2060b77e6a27c2a9d7e/pyobjc_framework_coreml-12.1.tar.gz", hash = "sha256:0d1a4216891a18775c9e0170d908714c18e4f53f9dc79fb0f5263b2aa81609ba", size = 40465, upload-time = "2025-11-14T10:14:02.265Z" }
|
||||
wheels = [
|
||||
|
|
@ -6970,8 +6970,8 @@ name = "pyobjc-framework-quartz"
|
|||
version = "12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyobjc-core" },
|
||||
{ name = "pyobjc-framework-cocoa" },
|
||||
{ name = "pyobjc-core", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/94/18/cc59f3d4355c9456fc945eae7fe8797003c4da99212dd531ad1b0de8a0c6/pyobjc_framework_quartz-12.1.tar.gz", hash = "sha256:27f782f3513ac88ec9b6c82d9767eef95a5cf4175ce88a1e5a65875fee799608", size = 3159099, upload-time = "2025-11-14T10:21:24.31Z" }
|
||||
wheels = [
|
||||
|
|
@ -6987,10 +6987,10 @@ name = "pyobjc-framework-vision"
|
|||
version = "12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyobjc-core" },
|
||||
{ name = "pyobjc-framework-cocoa" },
|
||||
{ name = "pyobjc-framework-coreml" },
|
||||
{ name = "pyobjc-framework-quartz" },
|
||||
{ name = "pyobjc-core", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-coreml", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c2/5a/08bb3e278f870443d226c141af14205ff41c0274da1e053b72b11dfc9fb2/pyobjc_framework_vision-12.1.tar.gz", hash = "sha256:a30959100e85dcede3a786c544e621ad6eb65ff6abf85721f805822b8c5fe9b0", size = 59538, upload-time = "2025-11-14T10:23:21.979Z" }
|
||||
wheels = [
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue