chore: introduces 1 file upload in ontology endpoint (#1899)

<!-- .github/pull_request_template.md -->

## Description
This PR fixes the ontology upload endpoint by forcing 1 file upload at
the time. Tests are adjusted in both server start and ontology endpoint
unit test. API was tested.

Do not merge it together with
https://github.com/topoteretes/cognee/pull/1898 its either that or this
one.



## Type of Change
<!-- Please check the relevant option -->
- [x] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have added necessary documentation (if applicable)
- [x] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [x] I have linked any relevant issues in the description
- [x] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **API Changes**
* Ontology upload now accepts exactly one file per request; field
renamed from "descriptions" to "description" and validated as a plain
string.
* Stricter form validation and tighter 400/500 error handling for
malformed submissions.

* **Tests**
* Tests converted to real HTTP-style interactions using a shared test
client and dependency overrides.
* Payloads now use plain string fields; added coverage for single-file
constraints and specific error responses.

* **Style**
  * Minor formatting cleanups with no functional impact.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
hajdul88 2025-12-15 18:30:35 +01:00 committed by GitHub
parent 14d9540d1b
commit 622f8fa79e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 100 additions and 118 deletions

View file

@ -1,4 +1,4 @@
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from typing import Optional, List from typing import Optional, List
@ -15,28 +15,25 @@ def get_ontology_router() -> APIRouter:
@router.post("", response_model=dict) @router.post("", response_model=dict)
async def upload_ontology( async def upload_ontology(
request: Request,
ontology_key: str = Form(...), ontology_key: str = Form(...),
ontology_file: List[UploadFile] = File(...), ontology_file: UploadFile = File(...),
descriptions: Optional[str] = Form(None), description: Optional[str] = Form(None),
user: User = Depends(get_authenticated_user), user: User = Depends(get_authenticated_user),
): ):
""" """
Upload ontology files with their respective keys for later use in cognify operations. Upload a single ontology file for later use in cognify operations.
Supports both single and multiple file uploads:
- Single file: ontology_key=["key"], ontology_file=[file]
- Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
## Request Parameters ## Request Parameters
- **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies - **ontology_key** (str): User-defined identifier for the ontology.
- **ontology_file** (List[UploadFile]): OWL format ontology files - **ontology_file** (UploadFile): Single OWL format ontology file
- **descriptions** (Optional[str]): JSON array string of optional descriptions - **description** (Optional[str]): Optional description for the ontology.
## Response ## Response
Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps. Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
## Error Codes ## Error Codes
- **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
- **500 Internal Server Error**: File system or processing errors - **500 Internal Server Error**: File system or processing errors
""" """
send_telemetry( send_telemetry(
@ -49,16 +46,22 @@ def get_ontology_router() -> APIRouter:
) )
try: try:
import json # Enforce: exactly one uploaded file for "ontology_file"
form = await request.form()
uploaded_files = form.getlist("ontology_file")
if len(uploaded_files) != 1:
raise ValueError("Only one ontology_file is allowed")
ontology_keys = json.loads(ontology_key) if ontology_key.strip().startswith(("[", "{")):
description_list = json.loads(descriptions) if descriptions else None raise ValueError("ontology_key must be a string")
if description is not None and description.strip().startswith(("[", "{")):
raise ValueError("description must be a string")
if not isinstance(ontology_keys, list): result = await ontology_service.upload_ontology(
raise ValueError("ontology_key must be a JSON array") ontology_key=ontology_key,
file=ontology_file,
results = await ontology_service.upload_ontologies( user=user,
ontology_keys, ontology_file, user, description_list description=description,
) )
return { return {
@ -70,10 +73,9 @@ def get_ontology_router() -> APIRouter:
"uploaded_at": result.uploaded_at, "uploaded_at": result.uploaded_at,
"description": result.description, "description": result.description,
} }
for result in results
] ]
} }
except (json.JSONDecodeError, ValueError) as e: except ValueError as e:
return JSONResponse(status_code=400, content={"error": str(e)}) return JSONResponse(status_code=400, content={"error": str(e)})
except Exception as e: except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)}) return JSONResponse(status_code=500, content={"error": str(e)})

View file

@ -148,8 +148,8 @@ class TestCogneeServerStart(unittest.TestCase):
headers=headers, headers=headers,
files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
data={ data={
"ontology_key": json.dumps([ontology_key]), "ontology_key": ontology_key,
"description": json.dumps(["Test ontology"]), "description": "Test ontology",
}, },
) )
self.assertEqual(ontology_response.status_code, 200) self.assertEqual(ontology_response.status_code, 200)

View file

@ -1,17 +1,28 @@
import pytest import pytest
import uuid import uuid
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from unittest.mock import patch, Mock, AsyncMock from unittest.mock import Mock
from types import SimpleNamespace from types import SimpleNamespace
import importlib
from cognee.api.client import app from cognee.api.client import app
from cognee.modules.users.methods import get_authenticated_user
gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
@pytest.fixture(scope="session")
def test_client():
# Keep a single TestClient (and event loop) for the whole module.
# Re-creating TestClient repeatedly can break async DB connections (asyncpg loop mismatch).
with TestClient(app) as c:
yield c
@pytest.fixture @pytest.fixture
def client(): def client(test_client, mock_default_user):
return TestClient(app) async def override_get_authenticated_user():
return mock_default_user
app.dependency_overrides[get_authenticated_user] = override_get_authenticated_user
yield test_client
app.dependency_overrides.pop(get_authenticated_user, None)
@pytest.fixture @pytest.fixture
@ -32,12 +43,8 @@ def mock_default_user():
) )
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_success(client):
def test_upload_ontology_success(mock_get_default_user, client, mock_default_user):
"""Test successful ontology upload""" """Test successful ontology upload"""
import json
mock_get_default_user.return_value = mock_default_user
ontology_content = ( ontology_content = (
b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>" b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
) )
@ -46,7 +53,7 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use
response = client.post( response = client.post(
"/api/v1/ontologies", "/api/v1/ontologies",
files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
data={"ontology_key": json.dumps([unique_key]), "description": json.dumps(["Test"])}, data={"ontology_key": unique_key, "description": "Test"},
) )
assert response.status_code == 200 assert response.status_code == 200
@ -55,10 +62,8 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use
assert "uploaded_at" in data["uploaded_ontologies"][0] assert "uploaded_at" in data["uploaded_ontologies"][0]
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_invalid_file(client):
def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_default_user):
"""Test 400 response for non-.owl files""" """Test 400 response for non-.owl files"""
mock_get_default_user.return_value = mock_default_user
unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
response = client.post( response = client.post(
"/api/v1/ontologies", "/api/v1/ontologies",
@ -68,14 +73,10 @@ def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_defaul
assert response.status_code == 400 assert response.status_code == 400
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_missing_data(client):
def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user):
"""Test 400 response for missing file or key""" """Test 400 response for missing file or key"""
import json
mock_get_default_user.return_value = mock_default_user
# Missing file # Missing file
response = client.post("/api/v1/ontologies", data={"ontology_key": json.dumps(["test"])}) response = client.post("/api/v1/ontologies", data={"ontology_key": "test"})
assert response.status_code == 400 assert response.status_code == 400
# Missing key # Missing key
@ -85,34 +86,25 @@ def test_upload_ontology_missing_data(mock_get_default_user, client, mock_defaul
assert response.status_code == 400 assert response.status_code == 400
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_without_auth_header(client):
def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user): """Test behavior when no explicit authentication header is provided."""
"""Test behavior when default user is provided (no explicit authentication)"""
import json
unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
mock_get_default_user.return_value = mock_default_user
response = client.post( response = client.post(
"/api/v1/ontologies", "/api/v1/ontologies",
files=[("ontology_file", ("test.owl", b"<rdf></rdf>", "application/xml"))], files=[("ontology_file", ("test.owl", b"<rdf></rdf>", "application/xml"))],
data={"ontology_key": json.dumps([unique_key])}, data={"ontology_key": unique_key},
) )
# The current system provides a default user when no explicit authentication is given
# This test verifies the system works with conditional authentication
assert response.status_code == 200 assert response.status_code == 200
data = response.json() data = response.json()
assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
assert "uploaded_at" in data["uploaded_ontologies"][0] assert "uploaded_at" in data["uploaded_ontologies"][0]
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_multiple_ontologies_in_single_request_is_rejected(client):
def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user): """Uploading multiple ontology files in a single request should fail."""
"""Test uploading multiple ontology files in single request"""
import io import io
mock_get_default_user.return_value = mock_default_user
# Create mock files
file1_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>" file1_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
file2_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>" file2_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
@ -120,45 +112,34 @@ def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_
("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")), ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")), ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
] ]
data = { data = {"ontology_key": "vehicles", "description": "Base vehicles"}
"ontology_key": '["vehicles", "manufacturers"]',
"descriptions": '["Base vehicles", "Car manufacturers"]',
}
response = client.post("/api/v1/ontologies", files=files, data=data) response = client.post("/api/v1/ontologies", files=files, data=data)
assert response.status_code == 200 assert response.status_code == 400
result = response.json() assert "Only one ontology_file is allowed" in response.json()["error"]
assert "uploaded_ontologies" in result
assert len(result["uploaded_ontologies"]) == 2
assert result["uploaded_ontologies"][0]["ontology_key"] == "vehicles"
assert result["uploaded_ontologies"][1]["ontology_key"] == "manufacturers"
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_endpoint_rejects_array_style_fields(client):
def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user): """Array-style form values should be rejected (no backwards compatibility)."""
"""Test that upload endpoint accepts array parameters"""
import io import io
import json import json
mock_get_default_user.return_value = mock_default_user
file_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>" file_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))] files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))]
data = { data = {
"ontology_key": json.dumps(["single_key"]), "ontology_key": json.dumps(["single_key"]),
"descriptions": json.dumps(["Single ontology"]), "description": json.dumps(["Single ontology"]),
} }
response = client.post("/api/v1/ontologies", files=files, data=data) response = client.post("/api/v1/ontologies", files=files, data=data)
assert response.status_code == 200 assert response.status_code == 400
result = response.json() assert "ontology_key must be a string" in response.json()["error"]
assert result["uploaded_ontologies"][0]["ontology_key"] == "single_key"
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_cognify_with_multiple_ontologies(client):
def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user):
"""Test cognify endpoint accepts multiple ontology keys""" """Test cognify endpoint accepts multiple ontology keys"""
payload = { payload = {
"datasets": ["test_dataset"], "datasets": ["test_dataset"],
@ -172,14 +153,11 @@ def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_de
assert response.status_code in [200, 400, 409] # May fail for other reasons, not type assert response.status_code in [200, 400, 409] # May fail for other reasons, not type
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_complete_multifile_workflow(client):
def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user): """Test workflow: upload ontologies one-by-one → cognify with multiple keys"""
"""Test complete workflow: upload multiple ontologies → cognify with multiple keys"""
import io import io
import json
mock_get_default_user.return_value = mock_default_user # Step 1: Upload two ontologies (one-by-one)
# Step 1: Upload multiple ontologies
file1_content = b"""<?xml version="1.0"?> file1_content = b"""<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:owl="http://www.w3.org/2002/07/owl#"> xmlns:owl="http://www.w3.org/2002/07/owl#">
@ -192,17 +170,21 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default
<owl:Class rdf:ID="Manufacturer"/> <owl:Class rdf:ID="Manufacturer"/>
</rdf:RDF>""" </rdf:RDF>"""
files = [ upload_response_1 = client.post(
("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")), "/api/v1/ontologies",
("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")), files=[("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml"))],
] data={"ontology_key": "vehicles", "description": "Vehicle ontology"},
data = { )
"ontology_key": json.dumps(["vehicles", "manufacturers"]), assert upload_response_1.status_code == 200
"descriptions": json.dumps(["Vehicle ontology", "Manufacturer ontology"]),
}
upload_response = client.post("/api/v1/ontologies", files=files, data=data) upload_response_2 = client.post(
assert upload_response.status_code == 200 "/api/v1/ontologies",
files=[
("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml"))
],
data={"ontology_key": "manufacturers", "description": "Manufacturer ontology"},
)
assert upload_response_2.status_code == 200
# Step 2: Verify ontologies are listed # Step 2: Verify ontologies are listed
list_response = client.get("/api/v1/ontologies") list_response = client.get("/api/v1/ontologies")
@ -223,44 +205,42 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default
assert cognify_response.status_code != 400 # Not a validation error assert cognify_response.status_code != 400 # Not a validation error
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_error_handling(client):
def test_multifile_error_handling(mock_get_default_user, client, mock_default_user): """Test error handling for invalid uploads (single-file endpoint)."""
"""Test error handling for invalid multifile uploads"""
import io import io
import json import json
# Test mismatched array lengths # Array-style key should be rejected
file_content = b"<rdf:RDF></rdf:RDF>" file_content = b"<rdf:RDF></rdf:RDF>"
files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))] files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))]
data = { data = {
"ontology_key": json.dumps(["key1", "key2"]), # 2 keys, 1 file "ontology_key": json.dumps(["key1", "key2"]),
"descriptions": json.dumps(["desc1"]), "description": "desc1",
} }
response = client.post("/api/v1/ontologies", files=files, data=data) response = client.post("/api/v1/ontologies", files=files, data=data)
assert response.status_code == 400 assert response.status_code == 400
assert "Number of keys must match number of files" in response.json()["error"] assert "ontology_key must be a string" in response.json()["error"]
# Test duplicate keys # Duplicate key should be rejected
files = [ response_1 = client.post(
("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml")), "/api/v1/ontologies",
("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml")), files=[("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml"))],
] data={"ontology_key": "duplicate", "description": "desc1"},
data = { )
"ontology_key": json.dumps(["duplicate", "duplicate"]), assert response_1.status_code == 200
"descriptions": json.dumps(["desc1", "desc2"]),
}
response = client.post("/api/v1/ontologies", files=files, data=data) response_2 = client.post(
assert response.status_code == 400 "/api/v1/ontologies",
assert "Duplicate ontology keys not allowed" in response.json()["error"] files=[("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml"))],
data={"ontology_key": "duplicate", "description": "desc2"},
)
assert response_2.status_code == 400
assert "already exists" in response_2.json()["error"]
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_cognify_missing_ontology_key(client):
def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user):
"""Test cognify with non-existent ontology key""" """Test cognify with non-existent ontology key"""
mock_get_default_user.return_value = mock_default_user
payload = { payload = {
"datasets": ["test_dataset"], "datasets": ["test_dataset"],
"ontology_key": ["nonexistent_key"], "ontology_key": ["nonexistent_key"],