feat: enhance ontology handling to support multiple uploads and retrievals

2025-11-14 22:13:00 +05:00 · 2025-11-14 22:13:00 +05:00 · 844b8d635a
commit 844b8d635a
parent 79bd2b2576
4 changed files with 202 additions and 91 deletions
--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -41,8 +41,8 @@ class CognifyPayloadDTO(InDTO):
    custom_prompt: Optional[str] = Field(
        default="", description="Custom prompt for entity extraction and graph generation"
    )
-    ontology_key: Optional[str] = Field(
-        default=None, description="Reference to previously uploaded ontology"
+    ontology_key: Optional[List[str]] = Field(
+        default=None, description="Reference to one or more previously uploaded ontologies"
    )


@ -71,7 +71,7 @@ def get_cognify_router() -> APIRouter:
        - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
        - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
        - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
-        - **ontology_key** (Optional[str]): Reference to a previously uploaded ontology file to use for knowledge graph construction.
+        - **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.

        ## Response
        - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@ -87,7 +87,7 @@ def get_cognify_router() -> APIRouter:
            "datasets": ["research_papers", "documentation"],
            "run_in_background": false,
            "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
-            "ontology_key": "medical_ontology_v1"
+            "ontology_key": ["medical_ontology_v1"]
        }
        ```

@ -121,29 +121,22 @@ def get_cognify_router() -> APIRouter:

            if payload.ontology_key:
                ontology_service = OntologyService()
-                try:
-                    ontology_content = ontology_service.get_ontology_content(
-                        payload.ontology_key, user
-                    )
+                ontology_contents = ontology_service.get_ontology_contents(
+                    payload.ontology_key, user
+                )

-                    from cognee.modules.ontology.ontology_config import Config
-                    from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
-                        RDFLibOntologyResolver,
-                    )
-                    from io import StringIO
+                from cognee.modules.ontology.ontology_config import Config
+                from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
+                    RDFLibOntologyResolver,
+                )
+                from io import StringIO

-                    ontology_stream = StringIO(ontology_content)
-                    config_to_use: Config = {
-                        "ontology_config": {
-                            "ontology_resolver": RDFLibOntologyResolver(
-                                ontology_file=ontology_stream
-                            )
-                        }
+                ontology_streams = [StringIO(content) for content in ontology_contents]
+                config_to_use: Config = {
+                    "ontology_config": {
+                        "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
                    }
-                except ValueError as e:
-                    return JSONResponse(
-                        status_code=400, content={"error": f"Ontology error: {str(e)}"}
-                    )
+                }

            cognify_run = await cognee_cognify(
                datasets,
--- a/cognee/api/v1/ontologies/ontologies.py
+++ b/cognee/api/v1/ontologies/ontologies.py
@ -3,7 +3,7 @@ import json
 import tempfile
 from pathlib import Path
 from datetime import datetime, timezone
-from typing import Optional
+from typing import Optional, List
 from dataclasses import dataclass


@ -47,28 +47,23 @@ class OntologyService:
    async def upload_ontology(
        self, ontology_key: str, file, user, description: Optional[str] = None
    ) -> OntologyMetadata:
-        # Validate file format
        if not file.filename.lower().endswith(".owl"):
            raise ValueError("File must be in .owl format")

        user_dir = self._get_user_dir(str(user.id))
        metadata = self._load_metadata(user_dir)

-        # Check for duplicate key
        if ontology_key in metadata:
            raise ValueError(f"Ontology key '{ontology_key}' already exists")

-        # Read file content
        content = await file.read()
-        if len(content) > 10 * 1024 * 1024:  # 10MB limit
+        if len(content) > 10 * 1024 * 1024:
            raise ValueError("File size exceeds 10MB limit")

-        # Save file
        file_path = user_dir / f"{ontology_key}.owl"
        with open(file_path, "wb") as f:
            f.write(content)

-        # Update metadata
        ontology_metadata = {
            "filename": file.filename,
            "size_bytes": len(content),
@ -86,19 +81,102 @@ class OntologyService:
            description=description,
        )

-    def get_ontology_content(self, ontology_key: str, user) -> str:
+    async def upload_ontologies(
+        self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
+    ) -> List[OntologyMetadata]:
+        """
+        Upload ontology files with their respective keys.
+
+        Args:
+            ontology_key: List of unique keys for each ontology
+            files: List of UploadFile objects (same length as keys)
+            user: Authenticated user
+            descriptions: Optional list of descriptions for each file
+
+        Returns:
+            List of OntologyMetadata objects for uploaded files
+
+        Raises:
+            ValueError: If keys duplicate, file format invalid, or array lengths don't match
+        """
+        if len(ontology_key) != len(files):
+            raise ValueError("Number of keys must match number of files")
+
+        if len(set(ontology_key)) != len(ontology_key):
+            raise ValueError("Duplicate ontology keys not allowed")
+
+        if descriptions and len(descriptions) != len(files):
+            raise ValueError("Number of descriptions must match number of files")
+
+        results = []
        user_dir = self._get_user_dir(str(user.id))
        metadata = self._load_metadata(user_dir)

-        if ontology_key not in metadata:
-            raise ValueError(f"Ontology key '{ontology_key}' not found")
+        for i, (key, file) in enumerate(zip(ontology_key, files)):
+            if key in metadata:
+                raise ValueError(f"Ontology key '{key}' already exists")

-        file_path = user_dir / f"{ontology_key}.owl"
-        if not file_path.exists():
-            raise ValueError(f"Ontology file for key '{ontology_key}' not found")
+            if not file.filename.lower().endswith(".owl"):
+                raise ValueError(f"File '{file.filename}' must be in .owl format")

-        with open(file_path, "r", encoding="utf-8") as f:
-            return f.read()
+            content = await file.read()
+            if len(content) > 10 * 1024 * 1024:
+                raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
+
+            file_path = user_dir / f"{key}.owl"
+            with open(file_path, "wb") as f:
+                f.write(content)
+
+            ontology_metadata = {
+                "filename": file.filename,
+                "size_bytes": len(content),
+                "uploaded_at": datetime.now(timezone.utc).isoformat(),
+                "description": descriptions[i] if descriptions else None,
+            }
+            metadata[key] = ontology_metadata
+
+            results.append(
+                OntologyMetadata(
+                    ontology_key=key,
+                    filename=file.filename,
+                    size_bytes=len(content),
+                    uploaded_at=ontology_metadata["uploaded_at"],
+                    description=descriptions[i] if descriptions else None,
+                )
+            )
+
+        self._save_metadata(user_dir, metadata)
+        return results
+
+    def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
+        """
+        Retrieve ontology content for one or more keys.
+
+        Args:
+            ontology_key: List of ontology keys to retrieve (can contain single item)
+            user: Authenticated user
+
+        Returns:
+            List of ontology content strings
+
+        Raises:
+            ValueError: If any ontology key not found
+        """
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        contents = []
+        for key in ontology_key:
+            if key not in metadata:
+                raise ValueError(f"Ontology key '{key}' not found")
+
+            file_path = user_dir / f"{key}.owl"
+            if not file_path.exists():
+                raise ValueError(f"Ontology file for key '{key}' not found")
+
+            with open(file_path, "r", encoding="utf-8") as f:
+                contents.append(f.read())
+        return contents

    def list_ontologies(self, user) -> dict:
        user_dir = self._get_user_dir(str(user.id))
--- a/cognee/api/v1/ontologies/routers/get_ontology_router.py
+++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py
@ -1,6 +1,6 @@
 from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
 from fastapi.responses import JSONResponse
-from typing import Optional
+from typing import Optional, List

 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
@ -16,23 +16,27 @@ def get_ontology_router() -> APIRouter:
    @router.post("", response_model=dict)
    async def upload_ontology(
        ontology_key: str = Form(...),
-        ontology_file: UploadFile = File(...),
-        description: Optional[str] = Form(None),
+        ontology_file: List[UploadFile] = File(...),
+        descriptions: Optional[str] = Form(None),
        user: User = Depends(get_authenticated_user),
    ):
        """
-        Upload an ontology file with a named key for later use in cognify operations.
+        Upload ontology files with their respective keys for later use in cognify operations.
+
+        Supports both single and multiple file uploads:
+        - Single file: ontology_key=["key"], ontology_file=[file]
+        - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]

        ## Request Parameters
-        - **ontology_key** (str): User-defined identifier for the ontology
-        - **ontology_file** (UploadFile): OWL format ontology file
-        - **description** (Optional[str]): Optional description of the ontology
+        - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
+        - **ontology_file** (List[UploadFile]): OWL format ontology files
+        - **descriptions** (Optional[str]): JSON array string of optional descriptions

        ## Response
-        Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
+        Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.

        ## Error Codes
-        - **400 Bad Request**: Invalid file format, duplicate key, file size exceeded
+        - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
        - **500 Internal Server Error**: File system or processing errors
        """
        send_telemetry(
@ -45,16 +49,31 @@ def get_ontology_router() -> APIRouter:
        )

        try:
-            result = await ontology_service.upload_ontology(
-                ontology_key, ontology_file, user, description
+            import json
+
+            ontology_keys = json.loads(ontology_key)
+            description_list = json.loads(descriptions) if descriptions else None
+
+            if not isinstance(ontology_keys, list):
+                raise ValueError("ontology_key must be a JSON array")
+
+            results = await ontology_service.upload_ontologies(
+                ontology_keys, ontology_file, user, description_list
            )
+
            return {
-                "ontology_key": result.ontology_key,
-                "filename": result.filename,
-                "size_bytes": result.size_bytes,
-                "uploaded_at": result.uploaded_at,
+                "uploaded_ontologies": [
+                    {
+                        "ontology_key": result.ontology_key,
+                        "filename": result.filename,
+                        "size_bytes": result.size_bytes,
+                        "uploaded_at": result.uploaded_at,
+                        "description": result.description,
+                    }
+                    for result in results
+                ]
            }
-        except ValueError as e:
+        except (json.JSONDecodeError, ValueError) as e:
            return JSONResponse(status_code=400, content={"error": str(e)})
        except Exception as e:
            return JSONResponse(status_code=500, content={"error": str(e)})
--- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
+++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
@ -26,7 +26,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver):

    def __init__(
        self,
-        ontology_file: Optional[Union[str, List[str], IO]] = None,
+        ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None,
        matching_strategy: Optional[MatchingStrategy] = None,
    ) -> None:
        super().__init__(matching_strategy)
@ -34,47 +34,68 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
        try:
            self.graph = None
            if ontology_file is not None:
+                files_to_load = []
+                file_objects = []
+
                if hasattr(ontology_file, "read"):
-                    self.graph = Graph()
-                    content = ontology_file.read()
-                    self.graph.parse(data=content, format="xml")
-                    logger.info("Ontology loaded successfully from file object")
-                else:
-                    files_to_load = []
-                    if isinstance(ontology_file, str):
-                        files_to_load = [ontology_file]
-                    elif isinstance(ontology_file, list):
+                    file_objects = [ontology_file]
+                elif isinstance(ontology_file, str):
+                    files_to_load = [ontology_file]
+                elif isinstance(ontology_file, list):
+                    if all(hasattr(item, "read") for item in ontology_file):
+                        file_objects = ontology_file
+                    else:
                        files_to_load = ontology_file
-                    else:
-                        raise ValueError(
-                            f"ontology_file must be a string, list of strings, file-like object, or None. Got: {type(ontology_file)}"
-                        )
+                else:
+                    raise ValueError(
+                        f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}"
+                    )

-                    if files_to_load:
-                        self.graph = Graph()
-                        loaded_files = []
-                        for file_path in files_to_load:
-                            if os.path.exists(file_path):
-                                self.graph.parse(file_path)
-                                loaded_files.append(file_path)
-                                logger.info("Ontology loaded successfully from file: %s", file_path)
-                            else:
-                                logger.warning(
-                                    "Ontology file '%s' not found. Skipping this file.",
-                                    file_path,
-                                )
+                if file_objects:
+                    self.graph = Graph()
+                    loaded_objects = []
+                    for file_obj in file_objects:
+                        try:
+                            content = file_obj.read()
+                            self.graph.parse(data=content, format="xml")
+                            loaded_objects.append(file_obj)
+                            logger.info("Ontology loaded successfully from file object")
+                        except Exception as e:
+                            logger.warning("Failed to parse ontology file object: %s", str(e))

-                        if not loaded_files:
-                            logger.info(
-                                "No valid ontology files found. No owl ontology will be attached to the graph."
-                            )
-                            self.graph = None
-                        else:
-                            logger.info("Total ontology files loaded: %d", len(loaded_files))
-                    else:
+                    if not loaded_objects:
                        logger.info(
-                            "No ontology file provided. No owl ontology will be attached to the graph."
+                            "No valid ontology file objects found. No owl ontology will be attached to the graph."
                        )
+                        self.graph = None
+                    else:
+                        logger.info("Total ontology file objects loaded: %d", len(loaded_objects))
+
+                elif files_to_load:
+                    self.graph = Graph()
+                    loaded_files = []
+                    for file_path in files_to_load:
+                        if os.path.exists(file_path):
+                            self.graph.parse(file_path)
+                            loaded_files.append(file_path)
+                            logger.info("Ontology loaded successfully from file: %s", file_path)
+                        else:
+                            logger.warning(
+                                "Ontology file '%s' not found. Skipping this file.",
+                                file_path,
+                            )
+
+                    if not loaded_files:
+                        logger.info(
+                            "No valid ontology files found. No owl ontology will be attached to the graph."
+                        )
+                        self.graph = None
+                    else:
+                        logger.info("Total ontology files loaded: %d", len(loaded_files))
+                else:
+                    logger.info(
+                        "No ontology file provided. No owl ontology will be attached to the graph."
+                    )
            else:
                logger.info(
                    "No ontology file provided. No owl ontology will be attached to the graph."