feat: enhance ontology handling to support multiple uploads and retrievals

This commit is contained in:
Fahad Shoaib 2025-11-14 22:13:00 +05:00
parent 79bd2b2576
commit 844b8d635a
4 changed files with 202 additions and 91 deletions

View file

@ -41,8 +41,8 @@ class CognifyPayloadDTO(InDTO):
custom_prompt: Optional[str] = Field(
default="", description="Custom prompt for entity extraction and graph generation"
)
ontology_key: Optional[str] = Field(
default=None, description="Reference to previously uploaded ontology"
ontology_key: Optional[List[str]] = Field(
default=None, description="Reference to one or more previously uploaded ontologies"
)
@ -71,7 +71,7 @@ def get_cognify_router() -> APIRouter:
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
- **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
- **ontology_key** (Optional[str]): Reference to a previously uploaded ontology file to use for knowledge graph construction.
- **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.
## Response
- **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@ -87,7 +87,7 @@ def get_cognify_router() -> APIRouter:
"datasets": ["research_papers", "documentation"],
"run_in_background": false,
"custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
"ontology_key": "medical_ontology_v1"
"ontology_key": ["medical_ontology_v1"]
}
```
@ -121,29 +121,22 @@ def get_cognify_router() -> APIRouter:
if payload.ontology_key:
ontology_service = OntologyService()
try:
ontology_content = ontology_service.get_ontology_content(
payload.ontology_key, user
)
ontology_contents = ontology_service.get_ontology_contents(
payload.ontology_key, user
)
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
RDFLibOntologyResolver,
)
from io import StringIO
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
RDFLibOntologyResolver,
)
from io import StringIO
ontology_stream = StringIO(ontology_content)
config_to_use: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(
ontology_file=ontology_stream
)
}
ontology_streams = [StringIO(content) for content in ontology_contents]
config_to_use: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
}
except ValueError as e:
return JSONResponse(
status_code=400, content={"error": f"Ontology error: {str(e)}"}
)
}
cognify_run = await cognee_cognify(
datasets,

View file

@ -3,7 +3,7 @@ import json
import tempfile
from pathlib import Path
from datetime import datetime, timezone
from typing import Optional
from typing import Optional, List
from dataclasses import dataclass
@ -47,28 +47,23 @@ class OntologyService:
async def upload_ontology(
self, ontology_key: str, file, user, description: Optional[str] = None
) -> OntologyMetadata:
# Validate file format
if not file.filename.lower().endswith(".owl"):
raise ValueError("File must be in .owl format")
user_dir = self._get_user_dir(str(user.id))
metadata = self._load_metadata(user_dir)
# Check for duplicate key
if ontology_key in metadata:
raise ValueError(f"Ontology key '{ontology_key}' already exists")
# Read file content
content = await file.read()
if len(content) > 10 * 1024 * 1024: # 10MB limit
if len(content) > 10 * 1024 * 1024:
raise ValueError("File size exceeds 10MB limit")
# Save file
file_path = user_dir / f"{ontology_key}.owl"
with open(file_path, "wb") as f:
f.write(content)
# Update metadata
ontology_metadata = {
"filename": file.filename,
"size_bytes": len(content),
@ -86,19 +81,102 @@ class OntologyService:
description=description,
)
def get_ontology_content(self, ontology_key: str, user) -> str:
async def upload_ontologies(
self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
) -> List[OntologyMetadata]:
"""
Upload ontology files with their respective keys.
Args:
ontology_key: List of unique keys for each ontology
files: List of UploadFile objects (same length as keys)
user: Authenticated user
descriptions: Optional list of descriptions for each file
Returns:
List of OntologyMetadata objects for uploaded files
Raises:
ValueError: If keys duplicate, file format invalid, or array lengths don't match
"""
if len(ontology_key) != len(files):
raise ValueError("Number of keys must match number of files")
if len(set(ontology_key)) != len(ontology_key):
raise ValueError("Duplicate ontology keys not allowed")
if descriptions and len(descriptions) != len(files):
raise ValueError("Number of descriptions must match number of files")
results = []
user_dir = self._get_user_dir(str(user.id))
metadata = self._load_metadata(user_dir)
if ontology_key not in metadata:
raise ValueError(f"Ontology key '{ontology_key}' not found")
for i, (key, file) in enumerate(zip(ontology_key, files)):
if key in metadata:
raise ValueError(f"Ontology key '{key}' already exists")
file_path = user_dir / f"{ontology_key}.owl"
if not file_path.exists():
raise ValueError(f"Ontology file for key '{ontology_key}' not found")
if not file.filename.lower().endswith(".owl"):
raise ValueError(f"File '{file.filename}' must be in .owl format")
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
content = await file.read()
if len(content) > 10 * 1024 * 1024:
raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
file_path = user_dir / f"{key}.owl"
with open(file_path, "wb") as f:
f.write(content)
ontology_metadata = {
"filename": file.filename,
"size_bytes": len(content),
"uploaded_at": datetime.now(timezone.utc).isoformat(),
"description": descriptions[i] if descriptions else None,
}
metadata[key] = ontology_metadata
results.append(
OntologyMetadata(
ontology_key=key,
filename=file.filename,
size_bytes=len(content),
uploaded_at=ontology_metadata["uploaded_at"],
description=descriptions[i] if descriptions else None,
)
)
self._save_metadata(user_dir, metadata)
return results
def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
"""
Retrieve ontology content for one or more keys.
Args:
ontology_key: List of ontology keys to retrieve (can contain single item)
user: Authenticated user
Returns:
List of ontology content strings
Raises:
ValueError: If any ontology key not found
"""
user_dir = self._get_user_dir(str(user.id))
metadata = self._load_metadata(user_dir)
contents = []
for key in ontology_key:
if key not in metadata:
raise ValueError(f"Ontology key '{key}' not found")
file_path = user_dir / f"{key}.owl"
if not file_path.exists():
raise ValueError(f"Ontology file for key '{key}' not found")
with open(file_path, "r", encoding="utf-8") as f:
contents.append(f.read())
return contents
def list_ontologies(self, user) -> dict:
user_dir = self._get_user_dir(str(user.id))

View file

@ -1,6 +1,6 @@
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
from fastapi.responses import JSONResponse
from typing import Optional
from typing import Optional, List
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
@ -16,23 +16,27 @@ def get_ontology_router() -> APIRouter:
@router.post("", response_model=dict)
async def upload_ontology(
ontology_key: str = Form(...),
ontology_file: UploadFile = File(...),
description: Optional[str] = Form(None),
ontology_file: List[UploadFile] = File(...),
descriptions: Optional[str] = Form(None),
user: User = Depends(get_authenticated_user),
):
"""
Upload an ontology file with a named key for later use in cognify operations.
Upload ontology files with their respective keys for later use in cognify operations.
Supports both single and multiple file uploads:
- Single file: ontology_key=["key"], ontology_file=[file]
- Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
## Request Parameters
- **ontology_key** (str): User-defined identifier for the ontology
- **ontology_file** (UploadFile): OWL format ontology file
- **description** (Optional[str]): Optional description of the ontology
- **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
- **ontology_file** (List[UploadFile]): OWL format ontology files
- **descriptions** (Optional[str]): JSON array string of optional descriptions
## Response
Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
## Error Codes
- **400 Bad Request**: Invalid file format, duplicate key, file size exceeded
- **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
- **500 Internal Server Error**: File system or processing errors
"""
send_telemetry(
@ -45,16 +49,31 @@ def get_ontology_router() -> APIRouter:
)
try:
result = await ontology_service.upload_ontology(
ontology_key, ontology_file, user, description
import json
ontology_keys = json.loads(ontology_key)
description_list = json.loads(descriptions) if descriptions else None
if not isinstance(ontology_keys, list):
raise ValueError("ontology_key must be a JSON array")
results = await ontology_service.upload_ontologies(
ontology_keys, ontology_file, user, description_list
)
return {
"ontology_key": result.ontology_key,
"filename": result.filename,
"size_bytes": result.size_bytes,
"uploaded_at": result.uploaded_at,
"uploaded_ontologies": [
{
"ontology_key": result.ontology_key,
"filename": result.filename,
"size_bytes": result.size_bytes,
"uploaded_at": result.uploaded_at,
"description": result.description,
}
for result in results
]
}
except ValueError as e:
except (json.JSONDecodeError, ValueError) as e:
return JSONResponse(status_code=400, content={"error": str(e)})
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})

View file

@ -26,7 +26,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
def __init__(
self,
ontology_file: Optional[Union[str, List[str], IO]] = None,
ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None,
matching_strategy: Optional[MatchingStrategy] = None,
) -> None:
super().__init__(matching_strategy)
@ -34,47 +34,68 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
try:
self.graph = None
if ontology_file is not None:
files_to_load = []
file_objects = []
if hasattr(ontology_file, "read"):
self.graph = Graph()
content = ontology_file.read()
self.graph.parse(data=content, format="xml")
logger.info("Ontology loaded successfully from file object")
else:
files_to_load = []
if isinstance(ontology_file, str):
files_to_load = [ontology_file]
elif isinstance(ontology_file, list):
file_objects = [ontology_file]
elif isinstance(ontology_file, str):
files_to_load = [ontology_file]
elif isinstance(ontology_file, list):
if all(hasattr(item, "read") for item in ontology_file):
file_objects = ontology_file
else:
files_to_load = ontology_file
else:
raise ValueError(
f"ontology_file must be a string, list of strings, file-like object, or None. Got: {type(ontology_file)}"
)
else:
raise ValueError(
f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}"
)
if files_to_load:
self.graph = Graph()
loaded_files = []
for file_path in files_to_load:
if os.path.exists(file_path):
self.graph.parse(file_path)
loaded_files.append(file_path)
logger.info("Ontology loaded successfully from file: %s", file_path)
else:
logger.warning(
"Ontology file '%s' not found. Skipping this file.",
file_path,
)
if file_objects:
self.graph = Graph()
loaded_objects = []
for file_obj in file_objects:
try:
content = file_obj.read()
self.graph.parse(data=content, format="xml")
loaded_objects.append(file_obj)
logger.info("Ontology loaded successfully from file object")
except Exception as e:
logger.warning("Failed to parse ontology file object: %s", str(e))
if not loaded_files:
logger.info(
"No valid ontology files found. No owl ontology will be attached to the graph."
)
self.graph = None
else:
logger.info("Total ontology files loaded: %d", len(loaded_files))
else:
if not loaded_objects:
logger.info(
"No ontology file provided. No owl ontology will be attached to the graph."
"No valid ontology file objects found. No owl ontology will be attached to the graph."
)
self.graph = None
else:
logger.info("Total ontology file objects loaded: %d", len(loaded_objects))
elif files_to_load:
self.graph = Graph()
loaded_files = []
for file_path in files_to_load:
if os.path.exists(file_path):
self.graph.parse(file_path)
loaded_files.append(file_path)
logger.info("Ontology loaded successfully from file: %s", file_path)
else:
logger.warning(
"Ontology file '%s' not found. Skipping this file.",
file_path,
)
if not loaded_files:
logger.info(
"No valid ontology files found. No owl ontology will be attached to the graph."
)
self.graph = None
else:
logger.info("Total ontology files loaded: %d", len(loaded_files))
else:
logger.info(
"No ontology file provided. No owl ontology will be attached to the graph."
)
else:
logger.info(
"No ontology file provided. No owl ontology will be attached to the graph."