Merge branch 'dev' into feature/bedrock-llm-provider

2025-11-24 16:48:45 +01:00 · 2025-11-24 16:48:45 +01:00 · d97acba78e
commit d97acba78e
parent f732fbf55f 58fa95605a
40 changed files with 1234 additions and 90 deletions
--- a/.env.template
+++ b/.env.template
@ -21,6 +21,10 @@ LLM_PROVIDER="openai"
 LLM_ENDPOINT=""
 LLM_API_VERSION=""
 LLM_MAX_TOKENS="16384"
+# Instructor's modes determine how structured data is requested from and extracted from LLM responses
+# You can change this type (i.e. mode) via this env variable
+# Each LLM has its own default value, e.g. gpt-5 models have "json_schema_mode"
+LLM_INSTRUCTOR_MODE=""

 EMBEDDING_PROVIDER="openai"
 EMBEDDING_MODEL="openai/text-embedding-3-large"
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -23,6 +23,7 @@ from cognee.api.v1.settings.routers import get_settings_router
 from cognee.api.v1.datasets.routers import get_datasets_router
 from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
 from cognee.api.v1.search.routers import get_search_router
+from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
 from cognee.api.v1.memify.routers import get_memify_router
 from cognee.api.v1.add.routers import get_add_router
 from cognee.api.v1.delete.routers import get_delete_router
@ -263,6 +264,8 @@ app.include_router(

 app.include_router(get_datasets_router(), prefix="/api/v1/datasets", tags=["datasets"])

+app.include_router(get_ontology_router(), prefix="/api/v1/ontologies", tags=["ontologies"])
+
 app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["settings"])

 app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -41,6 +41,9 @@ class CognifyPayloadDTO(InDTO):
    custom_prompt: Optional[str] = Field(
        default="", description="Custom prompt for entity extraction and graph generation"
    )
+    ontology_key: Optional[List[str]] = Field(
+        default=None, description="Reference to one or more previously uploaded ontologies"
+    )


 def get_cognify_router() -> APIRouter:
@ -68,6 +71,7 @@ def get_cognify_router() -> APIRouter:
        - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
        - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
        - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
+        - **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.

        ## Response
        - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@ -82,7 +86,8 @@ def get_cognify_router() -> APIRouter:
        {
            "datasets": ["research_papers", "documentation"],
            "run_in_background": false,
-            "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
+            "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
+            "ontology_key": ["medical_ontology_v1"]
        }
        ```

@ -108,13 +113,35 @@ def get_cognify_router() -> APIRouter:
            )

        from cognee.api.v1.cognify import cognify as cognee_cognify
+        from cognee.api.v1.ontologies.ontologies import OntologyService

        try:
            datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
+            config_to_use = None
+
+            if payload.ontology_key:
+                ontology_service = OntologyService()
+                ontology_contents = ontology_service.get_ontology_contents(
+                    payload.ontology_key, user
+                )
+
+                from cognee.modules.ontology.ontology_config import Config
+                from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
+                    RDFLibOntologyResolver,
+                )
+                from io import StringIO
+
+                ontology_streams = [StringIO(content) for content in ontology_contents]
+                config_to_use: Config = {
+                    "ontology_config": {
+                        "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
+                    }
+                }

            cognify_run = await cognee_cognify(
                datasets,
                user,
+                config=config_to_use,
                run_in_background=payload.run_in_background,
                custom_prompt=payload.custom_prompt,
            )
--- a/cognee/api/v1/ontologies/init.py
+++ b/cognee/api/v1/ontologies/init.py
@ -0,0 +1,4 @@
+from .ontologies import OntologyService
+from .routers.get_ontology_router import get_ontology_router
+
+__all__ = ["OntologyService", "get_ontology_router"]
--- a/cognee/api/v1/ontologies/ontologies.py
+++ b/cognee/api/v1/ontologies/ontologies.py
@ -0,0 +1,183 @@
+import os
+import json
+import tempfile
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional, List
+from dataclasses import dataclass
+
+
+@dataclass
+class OntologyMetadata:
+    ontology_key: str
+    filename: str
+    size_bytes: int
+    uploaded_at: str
+    description: Optional[str] = None
+
+
+class OntologyService:
+    def __init__(self):
+        pass
+
+    @property
+    def base_dir(self) -> Path:
+        return Path(tempfile.gettempdir()) / "ontologies"
+
+    def _get_user_dir(self, user_id: str) -> Path:
+        user_dir = self.base_dir / str(user_id)
+        user_dir.mkdir(parents=True, exist_ok=True)
+        return user_dir
+
+    def _get_metadata_path(self, user_dir: Path) -> Path:
+        return user_dir / "metadata.json"
+
+    def _load_metadata(self, user_dir: Path) -> dict:
+        metadata_path = self._get_metadata_path(user_dir)
+        if metadata_path.exists():
+            with open(metadata_path, "r") as f:
+                return json.load(f)
+        return {}
+
+    def _save_metadata(self, user_dir: Path, metadata: dict):
+        metadata_path = self._get_metadata_path(user_dir)
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+    async def upload_ontology(
+        self, ontology_key: str, file, user, description: Optional[str] = None
+    ) -> OntologyMetadata:
+        if not file.filename.lower().endswith(".owl"):
+            raise ValueError("File must be in .owl format")
+
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        if ontology_key in metadata:
+            raise ValueError(f"Ontology key '{ontology_key}' already exists")
+
+        content = await file.read()
+        if len(content) > 10 * 1024 * 1024:
+            raise ValueError("File size exceeds 10MB limit")
+
+        file_path = user_dir / f"{ontology_key}.owl"
+        with open(file_path, "wb") as f:
+            f.write(content)
+
+        ontology_metadata = {
+            "filename": file.filename,
+            "size_bytes": len(content),
+            "uploaded_at": datetime.now(timezone.utc).isoformat(),
+            "description": description,
+        }
+        metadata[ontology_key] = ontology_metadata
+        self._save_metadata(user_dir, metadata)
+
+        return OntologyMetadata(
+            ontology_key=ontology_key,
+            filename=file.filename,
+            size_bytes=len(content),
+            uploaded_at=ontology_metadata["uploaded_at"],
+            description=description,
+        )
+
+    async def upload_ontologies(
+        self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
+    ) -> List[OntologyMetadata]:
+        """
+        Upload ontology files with their respective keys.
+
+        Args:
+            ontology_key: List of unique keys for each ontology
+            files: List of UploadFile objects (same length as keys)
+            user: Authenticated user
+            descriptions: Optional list of descriptions for each file
+
+        Returns:
+            List of OntologyMetadata objects for uploaded files
+
+        Raises:
+            ValueError: If keys duplicate, file format invalid, or array lengths don't match
+        """
+        if len(ontology_key) != len(files):
+            raise ValueError("Number of keys must match number of files")
+
+        if len(set(ontology_key)) != len(ontology_key):
+            raise ValueError("Duplicate ontology keys not allowed")
+
+        if descriptions and len(descriptions) != len(files):
+            raise ValueError("Number of descriptions must match number of files")
+
+        results = []
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        for i, (key, file) in enumerate(zip(ontology_key, files)):
+            if key in metadata:
+                raise ValueError(f"Ontology key '{key}' already exists")
+
+            if not file.filename.lower().endswith(".owl"):
+                raise ValueError(f"File '{file.filename}' must be in .owl format")
+
+            content = await file.read()
+            if len(content) > 10 * 1024 * 1024:
+                raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
+
+            file_path = user_dir / f"{key}.owl"
+            with open(file_path, "wb") as f:
+                f.write(content)
+
+            ontology_metadata = {
+                "filename": file.filename,
+                "size_bytes": len(content),
+                "uploaded_at": datetime.now(timezone.utc).isoformat(),
+                "description": descriptions[i] if descriptions else None,
+            }
+            metadata[key] = ontology_metadata
+
+            results.append(
+                OntologyMetadata(
+                    ontology_key=key,
+                    filename=file.filename,
+                    size_bytes=len(content),
+                    uploaded_at=ontology_metadata["uploaded_at"],
+                    description=descriptions[i] if descriptions else None,
+                )
+            )
+
+        self._save_metadata(user_dir, metadata)
+        return results
+
+    def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
+        """
+        Retrieve ontology content for one or more keys.
+
+        Args:
+            ontology_key: List of ontology keys to retrieve (can contain single item)
+            user: Authenticated user
+
+        Returns:
+            List of ontology content strings
+
+        Raises:
+            ValueError: If any ontology key not found
+        """
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
+
+        contents = []
+        for key in ontology_key:
+            if key not in metadata:
+                raise ValueError(f"Ontology key '{key}' not found")
+
+            file_path = user_dir / f"{key}.owl"
+            if not file_path.exists():
+                raise ValueError(f"Ontology file for key '{key}' not found")
+
+            with open(file_path, "r", encoding="utf-8") as f:
+                contents.append(f.read())
+        return contents
+
+    def list_ontologies(self, user) -> dict:
+        user_dir = self._get_user_dir(str(user.id))
+        return self._load_metadata(user_dir)
--- a/cognee/api/v1/ontologies/routers/init.py
+++ b/cognee/api/v1/ontologies/routers/init.py
--- a/cognee/api/v1/ontologies/routers/get_ontology_router.py
+++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py
@ -0,0 +1,107 @@
+from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
+from fastapi.responses import JSONResponse
+from typing import Optional, List
+
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
+from ..ontologies import OntologyService
+
+
+def get_ontology_router() -> APIRouter:
+    router = APIRouter()
+    ontology_service = OntologyService()
+
+    @router.post("", response_model=dict)
+    async def upload_ontology(
+        ontology_key: str = Form(...),
+        ontology_file: List[UploadFile] = File(...),
+        descriptions: Optional[str] = Form(None),
+        user: User = Depends(get_authenticated_user),
+    ):
+        """
+        Upload ontology files with their respective keys for later use in cognify operations.
+
+        Supports both single and multiple file uploads:
+        - Single file: ontology_key=["key"], ontology_file=[file]
+        - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
+
+        ## Request Parameters
+        - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
+        - **ontology_file** (List[UploadFile]): OWL format ontology files
+        - **descriptions** (Optional[str]): JSON array string of optional descriptions
+
+        ## Response
+        Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
+
+        ## Error Codes
+        - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
+        - **500 Internal Server Error**: File system or processing errors
+        """
+        send_telemetry(
+            "Ontology Upload API Endpoint Invoked",
+            user.id,
+            additional_properties={
+                "endpoint": "POST /api/v1/ontologies",
+                "cognee_version": cognee_version,
+            },
+        )
+
+        try:
+            import json
+
+            ontology_keys = json.loads(ontology_key)
+            description_list = json.loads(descriptions) if descriptions else None
+
+            if not isinstance(ontology_keys, list):
+                raise ValueError("ontology_key must be a JSON array")
+
+            results = await ontology_service.upload_ontologies(
+                ontology_keys, ontology_file, user, description_list
+            )
+
+            return {
+                "uploaded_ontologies": [
+                    {
+                        "ontology_key": result.ontology_key,
+                        "filename": result.filename,
+                        "size_bytes": result.size_bytes,
+                        "uploaded_at": result.uploaded_at,
+                        "description": result.description,
+                    }
+                    for result in results
+                ]
+            }
+        except (json.JSONDecodeError, ValueError) as e:
+            return JSONResponse(status_code=400, content={"error": str(e)})
+        except Exception as e:
+            return JSONResponse(status_code=500, content={"error": str(e)})
+
+    @router.get("", response_model=dict)
+    async def list_ontologies(user: User = Depends(get_authenticated_user)):
+        """
+        List all uploaded ontologies for the authenticated user.
+
+        ## Response
+        Returns a dictionary mapping ontology keys to their metadata including filename, size, and upload timestamp.
+
+        ## Error Codes
+        - **500 Internal Server Error**: File system or processing errors
+        """
+        send_telemetry(
+            "Ontology List API Endpoint Invoked",
+            user.id,
+            additional_properties={
+                "endpoint": "GET /api/v1/ontologies",
+                "cognee_version": cognee_version,
+            },
+        )
+
+        try:
+            metadata = ontology_service.list_ontologies(user)
+            return metadata
+        except Exception as e:
+            return JSONResponse(status_code=500, content={"error": str(e)})
+
+    return router
--- a/cognee/cli/commands/cognify_command.py
+++ b/cognee/cli/commands/cognify_command.py
@ -22,7 +22,7 @@ relationships, and creates semantic connections for enhanced search and reasonin

 Processing Pipeline:
 1. **Document Classification**: Identifies document types and structures
-2. **Permission Validation**: Ensures user has processing rights  
+2. **Permission Validation**: Ensures user has processing rights
 3. **Text Chunking**: Breaks content into semantically meaningful segments
 4. **Entity Extraction**: Identifies key concepts, people, places, organizations
 5. **Relationship Detection**: Discovers connections between entities
@ -97,6 +97,13 @@ After successful cognify processing, use `cognee search` to query the knowledge
                            chunker_class = LangchainChunker
                        except ImportError:
                            fmt.warning("LangchainChunker not available, using TextChunker")
+                    elif args.chunker == "CsvChunker":
+                        try:
+                            from cognee.modules.chunking.CsvChunker import CsvChunker
+
+                            chunker_class = CsvChunker
+                        except ImportError:
+                            fmt.warning("CsvChunker not available, using TextChunker")

                    result = await cognee.cognify(
                        datasets=datasets,
--- a/cognee/cli/config.py
+++ b/cognee/cli/config.py
@ -26,7 +26,7 @@ SEARCH_TYPE_CHOICES = [
 ]

 # Chunker choices
-CHUNKER_CHOICES = ["TextChunker", "LangchainChunker"]
+CHUNKER_CHOICES = ["TextChunker", "LangchainChunker", "CsvChunker"]

 # Output format choices
 OUTPUT_FORMAT_CHOICES = ["json", "pretty", "simple"]
--- a/cognee/infrastructure/files/utils/guess_file_type.py
+++ b/cognee/infrastructure/files/utils/guess_file_type.py
@ -55,6 +55,10 @@ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type
        file_type = Type("text/plain", "txt")
        return file_type

+    if ext in [".csv"]:
+        file_type = Type("text/csv", "csv")
+        return file_type
+
    file_type = filetype.guess(file)

    # If file type could not be determined consider it a plain text file as they don't have magic number encoding
--- a/cognee/infrastructure/llm/config.py
+++ b/cognee/infrastructure/llm/config.py
@ -38,6 +38,7 @@ class LLMConfig(BaseSettings):
    """

    structured_output_framework: str = "instructor"
+    llm_instructor_mode: str = ""
    llm_provider: str = "openai"
    llm_model: str = "openai/gpt-5-mini"
    llm_endpoint: str = ""
@ -181,6 +182,7 @@ class LLMConfig(BaseSettings):
              instance.
        """
        return {
+            "llm_instructor_mode": self.llm_instructor_mode.lower(),
            "provider": self.llm_provider,
            "model": self.llm_model,
            "endpoint": self.llm_endpoint,
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@ -28,13 +28,16 @@ class AnthropicAdapter(LLMInterface):

    name = "Anthropic"
    model: str
+    default_instructor_mode = "anthropic_tools"

-    def __init__(self, max_completion_tokens: int, model: str = None):
+    def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
        import anthropic

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.patch(
            create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
-            mode=instructor.Mode.ANTHROPIC_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
        )

        self.model = model
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@ -41,6 +41,7 @@ class GeminiAdapter(LLMInterface):
    name: str
    model: str
    api_key: str
+    default_instructor_mode = "json_mode"

    def __init__(
        self,
@ -49,6 +50,7 @@ class GeminiAdapter(LLMInterface):
        model: str,
        api_version: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
@ -63,7 +65,11 @@ class GeminiAdapter(LLMInterface):
        self.fallback_api_key = fallback_api_key
        self.fallback_endpoint = fallback_endpoint

-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )

    @retry(
        stop=stop_after_delay(128),
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@ -41,6 +41,7 @@ class GenericAPIAdapter(LLMInterface):
    name: str
    model: str
    api_key: str
+    default_instructor_mode = "json_mode"

    def __init__(
        self,
@ -49,6 +50,7 @@ class GenericAPIAdapter(LLMInterface):
        model: str,
        name: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
@ -63,7 +65,11 @@ class GenericAPIAdapter(LLMInterface):
        self.fallback_api_key = fallback_api_key
        self.fallback_endpoint = fallback_endpoint

-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )

    @retry(
        stop=stop_after_delay(128),
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
@ -83,6 +83,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            model=llm_config.llm_model,
            transcription_model=llm_config.transcription_model,
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
            streaming=llm_config.llm_streaming,
            fallback_api_key=llm_config.fallback_api_key,
            fallback_endpoint=llm_config.fallback_endpoint,
@ -103,6 +104,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            llm_config.llm_model,
            "Ollama",
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.ANTHROPIC:
@ -111,7 +113,9 @@ def get_llm_client(raise_api_key_error: bool = True):
        )

        return AnthropicAdapter(
-            max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
+            max_completion_tokens=max_completion_tokens,
+            model=llm_config.llm_model,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.CUSTOM:
@ -128,6 +132,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            llm_config.llm_model,
            "Custom",
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
            fallback_api_key=llm_config.fallback_api_key,
            fallback_endpoint=llm_config.fallback_endpoint,
            fallback_model=llm_config.fallback_model,
@ -147,6 +152,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            max_completion_tokens=max_completion_tokens,
            endpoint=llm_config.llm_endpoint,
            api_version=llm_config.llm_api_version,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.MISTRAL:
@ -162,6 +168,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            model=llm_config.llm_model,
            max_completion_tokens=max_completion_tokens,
            endpoint=llm_config.llm_endpoint,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.BEDROCK:
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
@ -37,16 +37,26 @@ class MistralAdapter(LLMInterface):
    model: str
    api_key: str
    max_completion_tokens: int
+    default_instructor_mode = "mistral_tools"

-    def __init__(self, api_key: str, model: str, max_completion_tokens: int, endpoint: str = None):
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        max_completion_tokens: int,
+        endpoint: str = None,
+        instructor_mode: str = None,
+    ):
        from mistralai import Mistral

        self.model = model
        self.max_completion_tokens = max_completion_tokens

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.from_litellm(
            litellm.acompletion,
-            mode=instructor.Mode.MISTRAL_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
            api_key=get_llm_config().llm_api_key,
        )

--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
@ -42,8 +42,16 @@ class OllamaAPIAdapter(LLMInterface):
    - aclient
    """

+    default_instructor_mode = "json_mode"
+
    def __init__(
-        self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
+        self,
+        endpoint: str,
+        api_key: str,
+        model: str,
+        name: str,
+        max_completion_tokens: int,
+        instructor_mode: str = None,
    ):
        self.name = name
        self.model = model
@ -51,8 +59,11 @@ class OllamaAPIAdapter(LLMInterface):
        self.endpoint = endpoint
        self.max_completion_tokens = max_completion_tokens

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.from_openai(
-            OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
+            OpenAI(base_url=self.endpoint, api_key=self.api_key),
+            mode=instructor.Mode(self.instructor_mode),
        )

    @retry(
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@ -56,6 +56,7 @@ class OpenAIAdapter(LLMInterface):
    model: str
    api_key: str
    api_version: str
+    default_instructor_mode = "json_schema_mode"

    MAX_RETRIES = 5

@ -69,19 +70,21 @@ class OpenAIAdapter(LLMInterface):
        model: str,
        transcription_model: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        streaming: bool = False,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
    ):
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
        # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs.
        #       Make sure all new gpt models will work with this mode as well.
        if "gpt-5" in model:
            self.aclient = instructor.from_litellm(
-                litellm.acompletion, mode=instructor.Mode.JSON_SCHEMA
+                litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
            )
            self.client = instructor.from_litellm(
-                litellm.completion, mode=instructor.Mode.JSON_SCHEMA
+                litellm.completion, mode=instructor.Mode(self.instructor_mode)
            )
        else:
            self.aclient = instructor.from_litellm(litellm.acompletion)
--- a/cognee/infrastructure/loaders/LoaderEngine.py
+++ b/cognee/infrastructure/loaders/LoaderEngine.py
@ -31,6 +31,7 @@ class LoaderEngine:
            "pypdf_loader",
            "image_loader",
            "audio_loader",
+            "csv_loader",
            "unstructured_loader",
            "advanced_pdf_loader",
        ]
--- a/cognee/infrastructure/loaders/core/init.py
+++ b/cognee/infrastructure/loaders/core/init.py
@ -3,5 +3,6 @@
 from .text_loader import TextLoader
 from .audio_loader import AudioLoader
 from .image_loader import ImageLoader
+from .csv_loader import CsvLoader

-__all__ = ["TextLoader", "AudioLoader", "ImageLoader"]
+__all__ = ["TextLoader", "AudioLoader", "ImageLoader", "CsvLoader"]
--- a/cognee/infrastructure/loaders/core/csv_loader.py
+++ b/cognee/infrastructure/loaders/core/csv_loader.py
@ -0,0 +1,93 @@
+import os
+from typing import List
+import csv
+from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
+from cognee.infrastructure.files.storage import get_file_storage, get_storage_config
+from cognee.infrastructure.files.utils.get_file_metadata import get_file_metadata
+
+
+class CsvLoader(LoaderInterface):
+    """
+    Core CSV file loader that handles basic CSV file formats.
+    """
+
+    @property
+    def supported_extensions(self) -> List[str]:
+        """Supported text file extensions."""
+        return [
+            "csv",
+        ]
+
+    @property
+    def supported_mime_types(self) -> List[str]:
+        """Supported MIME types for text content."""
+        return [
+            "text/csv",
+        ]
+
+    @property
+    def loader_name(self) -> str:
+        """Unique identifier for this loader."""
+        return "csv_loader"
+
+    def can_handle(self, extension: str, mime_type: str) -> bool:
+        """
+        Check if this loader can handle the given file.
+
+        Args:
+            extension: File extension
+            mime_type: Optional MIME type
+
+        Returns:
+            True if file can be handled, False otherwise
+        """
+        if extension in self.supported_extensions and mime_type in self.supported_mime_types:
+            return True
+
+        return False
+
+    async def load(self, file_path: str, encoding: str = "utf-8", **kwargs):
+        """
+        Load and process the csv file.
+
+        Args:
+            file_path: Path to the file to load
+            encoding: Text encoding to use (default: utf-8)
+            **kwargs: Additional configuration (unused)
+
+        Returns:
+            LoaderResult containing the file content and metadata
+
+        Raises:
+            FileNotFoundError: If file doesn't exist
+            UnicodeDecodeError: If file cannot be decoded with specified encoding
+            OSError: If file cannot be read
+        """
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        with open(file_path, "rb") as f:
+            file_metadata = await get_file_metadata(f)
+        # Name ingested file of current loader based on original file content hash
+        storage_file_name = "text_" + file_metadata["content_hash"] + ".txt"
+
+        row_texts = []
+        row_index = 1
+
+        with open(file_path, "r", encoding=encoding, newline="") as file:
+            reader = csv.DictReader(file)
+            for row in reader:
+                pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()]
+                row_text = ", ".join(pairs)
+                row_texts.append(f"Row {row_index}:\n{row_text}\n")
+                row_index += 1
+
+        content = "\n".join(row_texts)
+
+        storage_config = get_storage_config()
+        data_root_directory = storage_config["data_root_directory"]
+        storage = get_file_storage(data_root_directory)
+
+        full_file_path = await storage.store(storage_file_name, content)
+
+        return full_file_path
--- a/cognee/infrastructure/loaders/core/text_loader.py
+++ b/cognee/infrastructure/loaders/core/text_loader.py
@ -16,7 +16,7 @@ class TextLoader(LoaderInterface):
    @property
    def supported_extensions(self) -> List[str]:
        """Supported text file extensions."""
-        return ["txt", "md", "csv", "json", "xml", "yaml", "yml", "log"]
+        return ["txt", "md", "json", "xml", "yaml", "yml", "log"]

    @property
    def supported_mime_types(self) -> List[str]:
@ -24,7 +24,6 @@ class TextLoader(LoaderInterface):
        return [
            "text/plain",
            "text/markdown",
-            "text/csv",
            "application/json",
            "text/xml",
            "application/xml",
--- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
+++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py
@ -227,12 +227,3 @@ class AdvancedPdfLoader(LoaderInterface):
        if value is None:
            return ""
        return str(value).replace("\xa0", " ").strip()
-
-
-if __name__ == "__main__":
-    loader = AdvancedPdfLoader()
-    asyncio.run(
-        loader.load(
-            "/Users/xiaotao/work/cognee/cognee/infrastructure/loaders/external/attention_is_all_you_need.pdf"
-        )
-    )
--- a/cognee/infrastructure/loaders/supported_loaders.py
+++ b/cognee/infrastructure/loaders/supported_loaders.py
@ -1,5 +1,5 @@
 from cognee.infrastructure.loaders.external import PyPdfLoader
-from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader
+from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader, CsvLoader

 # Registry for loader implementations
 supported_loaders = {
@ -7,6 +7,7 @@ supported_loaders = {
    TextLoader.loader_name: TextLoader,
    ImageLoader.loader_name: ImageLoader,
    AudioLoader.loader_name: AudioLoader,
+    CsvLoader.loader_name: CsvLoader,
 }

 # Try adding optional loaders
--- a/cognee/modules/chunking/CsvChunker.py
+++ b/cognee/modules/chunking/CsvChunker.py
@ -0,0 +1,35 @@
+from cognee.shared.logging_utils import get_logger
+
+
+from cognee.tasks.chunks import chunk_by_row
+from cognee.modules.chunking.Chunker import Chunker
+from .models.DocumentChunk import DocumentChunk
+
+logger = get_logger()
+
+
+class CsvChunker(Chunker):
+    async def read(self):
+        async for content_text in self.get_text():
+            if content_text is None:
+                continue
+
+            for chunk_data in chunk_by_row(content_text, self.max_chunk_size):
+                if chunk_data["chunk_size"] <= self.max_chunk_size:
+                    yield DocumentChunk(
+                        id=chunk_data["chunk_id"],
+                        text=chunk_data["text"],
+                        chunk_size=chunk_data["chunk_size"],
+                        is_part_of=self.document,
+                        chunk_index=self.chunk_index,
+                        cut_type=chunk_data["cut_type"],
+                        contains=[],
+                        metadata={
+                            "index_fields": ["text"],
+                        },
+                    )
+                    self.chunk_index += 1
+                else:
+                    raise ValueError(
+                        f"Chunk size is larger than the maximum chunk size {self.max_chunk_size}"
+                    )
--- a/cognee/modules/data/processing/document_types/CsvDocument.py
+++ b/cognee/modules/data/processing/document_types/CsvDocument.py
@ -0,0 +1,33 @@
+import io
+import csv
+from typing import Type
+
+from cognee.modules.chunking.Chunker import Chunker
+from cognee.infrastructure.files.utils.open_data_file import open_data_file
+from .Document import Document
+
+
+class CsvDocument(Document):
+    type: str = "csv"
+    mime_type: str = "text/csv"
+
+    async def read(self, chunker_cls: Type[Chunker], max_chunk_size: int):
+        async def get_text():
+            async with open_data_file(
+                self.raw_data_location, mode="r", encoding="utf-8", newline=""
+            ) as file:
+                content = file.read()
+                file_like_obj = io.StringIO(content)
+                reader = csv.DictReader(file_like_obj)
+
+                for row in reader:
+                    pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()]
+                    row_text = ", ".join(pairs)
+                    if not row_text.strip():
+                        break
+                    yield row_text
+
+        chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text)
+
+        async for chunk in chunker.read():
+            yield chunk
--- a/cognee/modules/data/processing/document_types/init.py
+++ b/cognee/modules/data/processing/document_types/init.py
@ -4,3 +4,4 @@ from .TextDocument import TextDocument
 from .ImageDocument import ImageDocument
 from .AudioDocument import AudioDocument
 from .UnstructuredDocument import UnstructuredDocument
+from .CsvDocument import CsvDocument
--- a/cognee/modules/notebooks/operations/run_in_local_sandbox.py
+++ b/cognee/modules/notebooks/operations/run_in_local_sandbox.py
@ -2,6 +2,8 @@ import io
 import sys
 import traceback

+import cognee
+

 def wrap_in_async_handler(user_code: str) -> str:
    return (
@ -34,6 +36,7 @@ def run_in_local_sandbox(code, environment=None, loop=None):

    environment["print"] = customPrintFunction
    environment["running_loop"] = loop
+    environment["cognee"] = cognee

    try:
        exec(code, environment)
--- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
+++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
@ -2,7 +2,7 @@ import os
 import difflib
 from cognee.shared.logging_utils import get_logger
 from collections import deque
-from typing import List, Tuple, Dict, Optional, Any, Union
+from typing import List, Tuple, Dict, Optional, Any, Union, IO
 from rdflib import Graph, URIRef, RDF, RDFS, OWL

 from cognee.modules.ontology.exceptions import (
@ -26,44 +26,76 @@ class RDFLibOntologyResolver(BaseOntologyResolver):

    def __init__(
        self,
-        ontology_file: Optional[Union[str, List[str]]] = None,
+        ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None,
        matching_strategy: Optional[MatchingStrategy] = None,
    ) -> None:
        super().__init__(matching_strategy)
        self.ontology_file = ontology_file
        try:
-            files_to_load = []
+            self.graph = None
            if ontology_file is not None:
-                if isinstance(ontology_file, str):
+                files_to_load = []
+                file_objects = []
+
+                if hasattr(ontology_file, "read"):
+                    file_objects = [ontology_file]
+                elif isinstance(ontology_file, str):
                    files_to_load = [ontology_file]
                elif isinstance(ontology_file, list):
-                    files_to_load = ontology_file
+                    if all(hasattr(item, "read") for item in ontology_file):
+                        file_objects = ontology_file
+                    else:
+                        files_to_load = ontology_file
                else:
                    raise ValueError(
-                        f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
+                        f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}"
                    )

-            if files_to_load:
-                self.graph = Graph()
-                loaded_files = []
-                for file_path in files_to_load:
-                    if os.path.exists(file_path):
-                        self.graph.parse(file_path)
-                        loaded_files.append(file_path)
-                        logger.info("Ontology loaded successfully from file: %s", file_path)
-                    else:
-                        logger.warning(
-                            "Ontology file '%s' not found. Skipping this file.",
-                            file_path,
+                if file_objects:
+                    self.graph = Graph()
+                    loaded_objects = []
+                    for file_obj in file_objects:
+                        try:
+                            content = file_obj.read()
+                            self.graph.parse(data=content, format="xml")
+                            loaded_objects.append(file_obj)
+                            logger.info("Ontology loaded successfully from file object")
+                        except Exception as e:
+                            logger.warning("Failed to parse ontology file object: %s", str(e))
+
+                    if not loaded_objects:
+                        logger.info(
+                            "No valid ontology file objects found. No owl ontology will be attached to the graph."
                        )
+                        self.graph = None
+                    else:
+                        logger.info("Total ontology file objects loaded: %d", len(loaded_objects))

-                if not loaded_files:
-                    logger.info(
-                        "No valid ontology files found. No owl ontology will be attached to the graph."
-                    )
-                    self.graph = None
+                elif files_to_load:
+                    self.graph = Graph()
+                    loaded_files = []
+                    for file_path in files_to_load:
+                        if os.path.exists(file_path):
+                            self.graph.parse(file_path)
+                            loaded_files.append(file_path)
+                            logger.info("Ontology loaded successfully from file: %s", file_path)
+                        else:
+                            logger.warning(
+                                "Ontology file '%s' not found. Skipping this file.",
+                                file_path,
+                            )
+
+                    if not loaded_files:
+                        logger.info(
+                            "No valid ontology files found. No owl ontology will be attached to the graph."
+                        )
+                        self.graph = None
+                    else:
+                        logger.info("Total ontology files loaded: %d", len(loaded_files))
                else:
-                    logger.info("Total ontology files loaded: %d", len(loaded_files))
+                    logger.info(
+                        "No ontology file provided. No owl ontology will be attached to the graph."
+                    )
            else:
                logger.info(
                    "No ontology file provided. No owl ontology will be attached to the graph."
--- a/cognee/tasks/chunks/init.py
+++ b/cognee/tasks/chunks/init.py
@ -1,4 +1,5 @@
 from .chunk_by_word import chunk_by_word
 from .chunk_by_sentence import chunk_by_sentence
 from .chunk_by_paragraph import chunk_by_paragraph
+from .chunk_by_row import chunk_by_row
 from .remove_disconnected_chunks import remove_disconnected_chunks
--- a/cognee/tasks/chunks/chunk_by_row.py
+++ b/cognee/tasks/chunks/chunk_by_row.py
@ -0,0 +1,94 @@
+from typing import Any, Dict, Iterator
+from uuid import NAMESPACE_OID, uuid5
+
+from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
+
+
+def _get_pair_size(pair_text: str) -> int:
+    """
+    Calculate the size of a given text in terms of tokens.
+
+    If an embedding engine's tokenizer is available, count the tokens for the provided word.
+    If the tokenizer is not available, assume the word counts as one token.
+
+    Parameters:
+    -----------
+
+        - pair_text (str): The key:value pair text for which the token size is to be calculated.
+
+    Returns:
+    --------
+
+        - int: The number of tokens representing the text, typically an integer, depending
+          on the tokenizer's output.
+    """
+    embedding_engine = get_embedding_engine()
+    if embedding_engine.tokenizer:
+        return embedding_engine.tokenizer.count_tokens(pair_text)
+    else:
+        return 3
+
+
+def chunk_by_row(
+    data: str,
+    max_chunk_size,
+) -> Iterator[Dict[str, Any]]:
+    """
+    Chunk the input text by row while enabling exact text reconstruction.
+
+    This function divides the given text data into smaller chunks on a line-by-line basis,
+    ensuring that the size of each chunk is less than or equal to the specified maximum
+    chunk size. It guarantees that when the generated chunks are concatenated, they
+    reproduce the original text accurately. The tokenization process is handled by
+    adapters compatible with the vector engine's embedding model.
+
+    Parameters:
+    -----------
+
+        - data (str): The input text to be chunked.
+        - max_chunk_size: The maximum allowed size for each chunk, in terms of tokens or
+          words.
+    """
+    current_chunk_list = []
+    chunk_index = 0
+    current_chunk_size = 0
+
+    lines = data.split("\n\n")
+    for line in lines:
+        pairs_text = line.split(", ")
+
+        for pair_text in pairs_text:
+            pair_size = _get_pair_size(pair_text)
+            if current_chunk_size > 0 and (current_chunk_size + pair_size > max_chunk_size):
+                # Yield current cut chunk
+                current_chunk = ", ".join(current_chunk_list)
+                chunk_dict = {
+                    "text": current_chunk,
+                    "chunk_size": current_chunk_size,
+                    "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
+                    "chunk_index": chunk_index,
+                    "cut_type": "row_cut",
+                }
+
+                yield chunk_dict
+
+                # Start new chunk with current pair text
+                current_chunk_list = []
+                current_chunk_size = 0
+                chunk_index += 1
+
+            current_chunk_list.append(pair_text)
+            current_chunk_size += pair_size
+
+        # Yield row chunk
+        current_chunk = ", ".join(current_chunk_list)
+        if current_chunk:
+            chunk_dict = {
+                "text": current_chunk,
+                "chunk_size": current_chunk_size,
+                "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
+                "chunk_index": chunk_index,
+                "cut_type": "row_end",
+            }
+
+            yield chunk_dict
--- a/cognee/tasks/documents/classify_documents.py
+++ b/cognee/tasks/documents/classify_documents.py
@ -7,6 +7,7 @@ from cognee.modules.data.processing.document_types import (
    ImageDocument,
    TextDocument,
    UnstructuredDocument,
+    CsvDocument,
 )
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.engine.utils.generate_node_id import generate_node_id
@ -15,6 +16,7 @@ from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
 EXTENSION_TO_DOCUMENT_CLASS = {
    "pdf": PdfDocument,  # Text documents
    "txt": TextDocument,
+    "csv": CsvDocument,
    "docx": UnstructuredDocument,
    "doc": UnstructuredDocument,
    "odt": UnstructuredDocument,
--- a/cognee/tests/integration/documents/CsvDocument_test.py
+++ b/cognee/tests/integration/documents/CsvDocument_test.py
@ -0,0 +1,70 @@
+import os
+import sys
+import uuid
+import pytest
+import pathlib
+from unittest.mock import patch
+
+from cognee.modules.chunking.CsvChunker import CsvChunker
+from cognee.modules.data.processing.document_types.CsvDocument import CsvDocument
+from cognee.tests.integration.documents.AudioDocument_test import mock_get_embedding_engine
+from cognee.tests.integration.documents.async_gen_zip import async_gen_zip
+
+chunk_by_row_module = sys.modules.get("cognee.tasks.chunks.chunk_by_row")
+
+
+GROUND_TRUTH = {
+    "chunk_size_10": [
+        {"token_count": 9, "len_text": 26, "cut_type": "row_cut", "chunk_index": 0},
+        {"token_count": 6, "len_text": 29, "cut_type": "row_end", "chunk_index": 1},
+        {"token_count": 9, "len_text": 25, "cut_type": "row_cut", "chunk_index": 2},
+        {"token_count": 6, "len_text": 30, "cut_type": "row_end", "chunk_index": 3},
+    ],
+    "chunk_size_128": [
+        {"token_count": 15, "len_text": 57, "cut_type": "row_end", "chunk_index": 0},
+        {"token_count": 15, "len_text": 57, "cut_type": "row_end", "chunk_index": 1},
+    ],
+}
+
+
+@pytest.mark.parametrize(
+    "input_file,chunk_size",
+    [("example_with_header.csv", 10), ("example_with_header.csv", 128)],
+)
+@patch.object(chunk_by_row_module, "get_embedding_engine", side_effect=mock_get_embedding_engine)
+@pytest.mark.asyncio
+async def test_CsvDocument(mock_engine, input_file, chunk_size):
+    # Define file paths of test data
+    csv_file_path = os.path.join(
+        pathlib.Path(__file__).parent.parent.parent,
+        "test_data",
+        input_file,
+    )
+
+    # Define test documents
+    csv_document = CsvDocument(
+        id=uuid.uuid4(),
+        name="example_with_header.csv",
+        raw_data_location=csv_file_path,
+        external_metadata="",
+        mime_type="text/csv",
+    )
+
+    # TEST CSV
+    ground_truth_key = f"chunk_size_{chunk_size}"
+    async for ground_truth, row_data in async_gen_zip(
+        GROUND_TRUTH[ground_truth_key],
+        csv_document.read(chunker_cls=CsvChunker, max_chunk_size=chunk_size),
+    ):
+        assert ground_truth["token_count"] == row_data.chunk_size, (
+            f'{ground_truth["token_count"] = } != {row_data.chunk_size = }'
+        )
+        assert ground_truth["len_text"] == len(row_data.text), (
+            f'{ground_truth["len_text"] = } != {len(row_data.text) = }'
+        )
+        assert ground_truth["cut_type"] == row_data.cut_type, (
+            f'{ground_truth["cut_type"] = } != {row_data.cut_type = }'
+        )
+        assert ground_truth["chunk_index"] == row_data.chunk_index, (
+            f'{ground_truth["chunk_index"] = } != {row_data.chunk_index = }'
+        )
--- a/cognee/tests/test_cognee_server_start.py
+++ b/cognee/tests/test_cognee_server_start.py
@ -7,6 +7,7 @@ import requests
 from pathlib import Path
 import sys
 import uuid
+import json


 class TestCogneeServerStart(unittest.TestCase):
@ -90,12 +91,71 @@ class TestCogneeServerStart(unittest.TestCase):
            )
        }

-        payload = {"datasets": [dataset_name]}
+        ontology_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
+        payload = {"datasets": [dataset_name], "ontology_key": [ontology_key]}

        add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
        if add_response.status_code not in [200, 201]:
            add_response.raise_for_status()

+        ontology_content = b"""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:owl="http://www.w3.org/2002/07/owl#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns="http://example.org/ontology#"
+         xml:base="http://example.org/ontology">
+
+                <owl:Ontology rdf:about="http://example.org/ontology"/>
+
+                <!-- Classes -->
+                <owl:Class rdf:ID="Problem"/>
+                <owl:Class rdf:ID="HardwareProblem"/>
+                <owl:Class rdf:ID="SoftwareProblem"/>
+                <owl:Class rdf:ID="Concept"/>
+                <owl:Class rdf:ID="Object"/>
+                <owl:Class rdf:ID="Joke"/>
+                <owl:Class rdf:ID="Image"/>
+                <owl:Class rdf:ID="Person"/>
+
+                <rdf:Description rdf:about="#HardwareProblem">
+                    <rdfs:subClassOf rdf:resource="#Problem"/>
+                    <rdfs:comment>A failure caused by physical components.</rdfs:comment>
+                </rdf:Description>
+
+                <rdf:Description rdf:about="#SoftwareProblem">
+                    <rdfs:subClassOf rdf:resource="#Problem"/>
+                    <rdfs:comment>An error caused by software logic or configuration.</rdfs:comment>
+                </rdf:Description>
+
+                <rdf:Description rdf:about="#Person">
+                    <rdfs:comment>A human being or individual.</rdfs:comment>
+                </rdf:Description>
+
+                <!-- Individuals -->
+                <Person rdf:ID="programmers">
+                    <rdfs:label>Programmers</rdfs:label>
+                </Person>
+
+                <Object rdf:ID="light_bulb">
+                    <rdfs:label>Light Bulb</rdfs:label>
+                </Object>
+
+                <HardwareProblem rdf:ID="hardware_problem">
+                    <rdfs:label>Hardware Problem</rdfs:label>
+                </HardwareProblem>
+
+            </rdf:RDF>"""
+
+        ontology_response = requests.post(
+            "http://127.0.0.1:8000/api/v1/ontologies",
+            headers=headers,
+            files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
+            data={
+                "ontology_key": json.dumps([ontology_key]),
+                "description": json.dumps(["Test ontology"]),
+            },
+        )
+        self.assertEqual(ontology_response.status_code, 200)
+
        # Cognify request
        url = "http://127.0.0.1:8000/api/v1/cognify"
        headers = {
@ -107,6 +167,29 @@ class TestCogneeServerStart(unittest.TestCase):
        if cognify_response.status_code not in [200, 201]:
            cognify_response.raise_for_status()

+        datasets_response = requests.get("http://127.0.0.1:8000/api/v1/datasets", headers=headers)
+
+        datasets = datasets_response.json()
+        dataset_id = None
+        for dataset in datasets:
+            if dataset["name"] == dataset_name:
+                dataset_id = dataset["id"]
+                break
+
+        graph_response = requests.get(
+            f"http://127.0.0.1:8000/api/v1/datasets/{dataset_id}/graph", headers=headers
+        )
+        self.assertEqual(graph_response.status_code, 200)
+
+        graph_data = graph_response.json()
+        ontology_nodes = [
+            node for node in graph_data.get("nodes") if node.get("properties").get("ontology_valid")
+        ]
+
+        self.assertGreater(
+            len(ontology_nodes), 0, "No ontology nodes found - ontology was not integrated"
+        )
+
        # TODO: Add test to verify cognify pipeline is complete before testing search

        # Search request
--- a/cognee/tests/test_data/example_with_header.csv
+++ b/cognee/tests/test_data/example_with_header.csv
@ -0,0 +1,3 @@
+id,name,age,city,country
+1,Eric,30,Beijing,China
+2,Joe,35,Berlin,Germany
--- a/cognee/tests/unit/api/test_ontology_endpoint.py
+++ b/cognee/tests/unit/api/test_ontology_endpoint.py
@ -0,0 +1,264 @@
+import pytest
+import uuid
+from fastapi.testclient import TestClient
+from unittest.mock import patch, Mock, AsyncMock
+from types import SimpleNamespace
+import importlib
+from cognee.api.client import app
+
+gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
+
+
+@pytest.fixture
+def client():
+    return TestClient(app)
+
+
+@pytest.fixture
+def mock_user():
+    user = Mock()
+    user.id = "test-user-123"
+    return user
+
+
+@pytest.fixture
+def mock_default_user():
+    """Mock default user for testing."""
+    return SimpleNamespace(
+        id=uuid.uuid4(), email="default@example.com", is_active=True, tenant_id=uuid.uuid4()
+    )
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_upload_ontology_success(mock_get_default_user, client, mock_default_user):
+    """Test successful ontology upload"""
+    import json
+
+    mock_get_default_user.return_value = mock_default_user
+    ontology_content = (
+        b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
+    )
+    unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
+
+    response = client.post(
+        "/api/v1/ontologies",
+        files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
+        data={"ontology_key": json.dumps([unique_key]), "description": json.dumps(["Test"])},
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
+    assert "uploaded_at" in data["uploaded_ontologies"][0]
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_default_user):
+    """Test 400 response for non-.owl files"""
+    mock_get_default_user.return_value = mock_default_user
+    unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
+    response = client.post(
+        "/api/v1/ontologies",
+        files={"ontology_file": ("test.txt", b"not xml")},
+        data={"ontology_key": unique_key},
+    )
+    assert response.status_code == 400
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user):
+    """Test 400 response for missing file or key"""
+    import json
+
+    mock_get_default_user.return_value = mock_default_user
+    # Missing file
+    response = client.post("/api/v1/ontologies", data={"ontology_key": json.dumps(["test"])})
+    assert response.status_code == 400
+
+    # Missing key
+    response = client.post(
+        "/api/v1/ontologies", files=[("ontology_file", ("test.owl", b"xml", "application/xml"))]
+    )
+    assert response.status_code == 400
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user):
+    """Test behavior when default user is provided (no explicit authentication)"""
+    import json
+
+    unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
+    mock_get_default_user.return_value = mock_default_user
+    response = client.post(
+        "/api/v1/ontologies",
+        files=[("ontology_file", ("test.owl", b"<rdf></rdf>", "application/xml"))],
+        data={"ontology_key": json.dumps([unique_key])},
+    )
+
+    # The current system provides a default user when no explicit authentication is given
+    # This test verifies the system works with conditional authentication
+    assert response.status_code == 200
+    data = response.json()
+    assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
+    assert "uploaded_at" in data["uploaded_ontologies"][0]
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user):
+    """Test uploading multiple ontology files in single request"""
+    import io
+
+    # Create mock files
+    file1_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
+    file2_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
+
+    files = [
+        ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
+        ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
+    ]
+    data = {
+        "ontology_key": '["vehicles", "manufacturers"]',
+        "descriptions": '["Base vehicles", "Car manufacturers"]',
+    }
+
+    response = client.post("/api/v1/ontologies", files=files, data=data)
+
+    assert response.status_code == 200
+    result = response.json()
+    assert "uploaded_ontologies" in result
+    assert len(result["uploaded_ontologies"]) == 2
+    assert result["uploaded_ontologies"][0]["ontology_key"] == "vehicles"
+    assert result["uploaded_ontologies"][1]["ontology_key"] == "manufacturers"
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user):
+    """Test that upload endpoint accepts array parameters"""
+    import io
+    import json
+
+    file_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
+
+    files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))]
+    data = {
+        "ontology_key": json.dumps(["single_key"]),
+        "descriptions": json.dumps(["Single ontology"]),
+    }
+
+    response = client.post("/api/v1/ontologies", files=files, data=data)
+
+    assert response.status_code == 200
+    result = response.json()
+    assert result["uploaded_ontologies"][0]["ontology_key"] == "single_key"
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user):
+    """Test cognify endpoint accepts multiple ontology keys"""
+    payload = {
+        "datasets": ["test_dataset"],
+        "ontology_key": ["ontology1", "ontology2"],  # Array instead of string
+        "run_in_background": False,
+    }
+
+    response = client.post("/api/v1/cognify", json=payload)
+
+    # Should not fail due to ontology_key type
+    assert response.status_code in [200, 400, 409]  # May fail for other reasons, not type
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user):
+    """Test complete workflow: upload multiple ontologies → cognify with multiple keys"""
+    import io
+    import json
+
+    # Step 1: Upload multiple ontologies
+    file1_content = b"""<?xml version="1.0"?>
+    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+             xmlns:owl="http://www.w3.org/2002/07/owl#">
+        <owl:Class rdf:ID="Vehicle"/>
+    </rdf:RDF>"""
+
+    file2_content = b"""<?xml version="1.0"?>
+    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+             xmlns:owl="http://www.w3.org/2002/07/owl#">
+        <owl:Class rdf:ID="Manufacturer"/>
+    </rdf:RDF>"""
+
+    files = [
+        ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
+        ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
+    ]
+    data = {
+        "ontology_key": json.dumps(["vehicles", "manufacturers"]),
+        "descriptions": json.dumps(["Vehicle ontology", "Manufacturer ontology"]),
+    }
+
+    upload_response = client.post("/api/v1/ontologies", files=files, data=data)
+    assert upload_response.status_code == 200
+
+    # Step 2: Verify ontologies are listed
+    list_response = client.get("/api/v1/ontologies")
+    assert list_response.status_code == 200
+    ontologies = list_response.json()
+    assert "vehicles" in ontologies
+    assert "manufacturers" in ontologies
+
+    # Step 3: Test cognify with multiple ontologies
+    cognify_payload = {
+        "datasets": ["test_dataset"],
+        "ontology_key": ["vehicles", "manufacturers"],
+        "run_in_background": False,
+    }
+
+    cognify_response = client.post("/api/v1/cognify", json=cognify_payload)
+    # Should not fail due to ontology handling (may fail for dataset reasons)
+    assert cognify_response.status_code != 400  # Not a validation error
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_multifile_error_handling(mock_get_default_user, client, mock_default_user):
+    """Test error handling for invalid multifile uploads"""
+    import io
+    import json
+
+    # Test mismatched array lengths
+    file_content = b"<rdf:RDF></rdf:RDF>"
+    files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))]
+    data = {
+        "ontology_key": json.dumps(["key1", "key2"]),  # 2 keys, 1 file
+        "descriptions": json.dumps(["desc1"]),
+    }
+
+    response = client.post("/api/v1/ontologies", files=files, data=data)
+    assert response.status_code == 400
+    assert "Number of keys must match number of files" in response.json()["error"]
+
+    # Test duplicate keys
+    files = [
+        ("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml")),
+        ("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml")),
+    ]
+    data = {
+        "ontology_key": json.dumps(["duplicate", "duplicate"]),
+        "descriptions": json.dumps(["desc1", "desc2"]),
+    }
+
+    response = client.post("/api/v1/ontologies", files=files, data=data)
+    assert response.status_code == 400
+    assert "Duplicate ontology keys not allowed" in response.json()["error"]
+
+
+@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
+def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user):
+    """Test cognify with non-existent ontology key"""
+    payload = {
+        "datasets": ["test_dataset"],
+        "ontology_key": ["nonexistent_key"],
+        "run_in_background": False,
+    }
+
+    response = client.post("/api/v1/cognify", json=payload)
+    assert response.status_code == 409
+    assert "Ontology key 'nonexistent_key' not found" in response.json()["error"]
--- a/cognee/tests/unit/processing/chunks/chunk_by_row_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_row_test.py
@ -0,0 +1,52 @@
+from itertools import product
+
+import numpy as np
+import pytest
+
+from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
+from cognee.tasks.chunks import chunk_by_row
+
+INPUT_TEXTS = "name: John, age: 30, city: New York, country: USA"
+max_chunk_size_vals = [8, 32]
+
+
+@pytest.mark.parametrize(
+    "input_text,max_chunk_size",
+    list(product([INPUT_TEXTS], max_chunk_size_vals)),
+)
+def test_chunk_by_row_isomorphism(input_text, max_chunk_size):
+    chunks = chunk_by_row(input_text, max_chunk_size)
+    reconstructed_text = ", ".join([chunk["text"] for chunk in chunks])
+    assert reconstructed_text == input_text, (
+        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    )
+
+
+@pytest.mark.parametrize(
+    "input_text,max_chunk_size",
+    list(product([INPUT_TEXTS], max_chunk_size_vals)),
+)
+def test_row_chunk_length(input_text, max_chunk_size):
+    chunks = list(chunk_by_row(data=input_text, max_chunk_size=max_chunk_size))
+    embedding_engine = get_embedding_engine()
+
+    chunk_lengths = np.array(
+        [embedding_engine.tokenizer.count_tokens(chunk["text"]) for chunk in chunks]
+    )
+
+    larger_chunks = chunk_lengths[chunk_lengths > max_chunk_size]
+    assert np.all(chunk_lengths <= max_chunk_size), (
+        f"{max_chunk_size = }: {larger_chunks} are too large"
+    )
+
+
+@pytest.mark.parametrize(
+    "input_text,max_chunk_size",
+    list(product([INPUT_TEXTS], max_chunk_size_vals)),
+)
+def test_chunk_by_row_chunk_numbering(input_text, max_chunk_size):
+    chunks = chunk_by_row(data=input_text, max_chunk_size=max_chunk_size)
+    chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
+    assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
+        f"{chunk_indices = } are not monotonically increasing"
+    )
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.

 [[package]]
 name = "accelerate"
@ -1231,12 +1231,12 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["main", "dev"]
+groups = ["main"]
+markers = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")"
 files = [
    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\""}

 [[package]]
 name = "coloredlogs"
@ -2347,7 +2347,7 @@ version = "1.3.0"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main"]
 markers = "python_version == \"3.10\""
 files = [
    {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"},
@ -3732,14 +3732,14 @@ type = ["pytest-mypy"]
 name = "iniconfig"
 version = "2.1.0"
 description = "brain-dead simple config-ini parsing"
-optional = false
+optional = true
 python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
+markers = "extra == \"deepeval\" or extra == \"dev\""
 files = [
    {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
    {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
 ]
-markers = {main = "extra == \"deepeval\" or extra == \"dev\""}

 [[package]]
 name = "instructor"
@ -4196,8 +4196,6 @@ groups = ["main"]
 markers = "extra == \"dlt\""
 files = [
    {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
-    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
-    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]

 [package.dependencies]
@ -7634,7 +7632,7 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
 files = [
    {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
    {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@ -8289,14 +8287,14 @@ kaleido = ["kaleido (>=1.0.0)"]
 name = "pluggy"
 version = "1.6.0"
 description = "plugin and hook calling mechanisms for python"
-optional = false
+optional = true
 python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
+markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\""
 files = [
    {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
    {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
 ]
-markers = {main = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\""}

 [package.extras]
 dev = ["pre-commit", "tox"]
@ -8656,7 +8654,6 @@ files = [
    {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
    {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
    {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
-    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
    {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
    {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
    {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@ -8718,7 +8715,6 @@ files = [
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
-    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@ -9698,14 +9694,14 @@ files = [
 name = "pytest"
 version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
-optional = false
+optional = true
 python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main"]
+markers = "extra == \"deepeval\" or extra == \"dev\""
 files = [
    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
 ]
-markers = {main = "extra == \"deepeval\" or extra == \"dev\""}

 [package.dependencies]
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
@ -9792,21 +9788,6 @@ files = [
 packaging = ">=17.1"
 pytest = ">=6.2"

-[[package]]
-name = "pytest-timeout"
-version = "2.4.0"
-description = "pytest plugin to abort hanging tests"
-optional = false
-python-versions = ">=3.7"
-groups = ["dev"]
-files = [
-    {file = "pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"},
-    {file = "pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a"},
-]
-
-[package.dependencies]
-pytest = ">=7.0.0"
-
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"
@ -11656,7 +11637,9 @@ groups = ["main"]
 files = [
    {file = "SQLAlchemy-2.0.43-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:21ba7a08a4253c5825d1db389d4299f64a100ef9800e4624c8bf70d8f136e6ed"},
    {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11b9503fa6f8721bef9b8567730f664c5a5153d25e247aadc69247c4bc605227"},
+    {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07097c0a1886c150ef2adba2ff7437e84d40c0f7dcb44a2c2b9c905ccfc6361c"},
    {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cdeff998cb294896a34e5b2f00e383e7c5c4ef3b4bfa375d9104723f15186443"},
+    {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:bcf0724a62a5670e5718957e05c56ec2d6850267ea859f8ad2481838f889b42c"},
    {file = "SQLAlchemy-2.0.43-cp37-cp37m-win32.whl", hash = "sha256:c697575d0e2b0a5f0433f679bda22f63873821d991e95a90e9e52aae517b2e32"},
    {file = "SQLAlchemy-2.0.43-cp37-cp37m-win_amd64.whl", hash = "sha256:d34c0f6dbefd2e816e8f341d0df7d4763d382e3f452423e752ffd1e213da2512"},
    {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70322986c0c699dca241418fcf18e637a4369e0ec50540a2b907b184c8bca069"},
@ -11691,12 +11674,20 @@ files = [
    {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164"},
    {file = "sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d"},
    {file = "sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197"},
+    {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4e6aeb2e0932f32950cf56a8b4813cb15ff792fc0c9b3752eaf067cfe298496a"},
+    {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:61f964a05356f4bca4112e6334ed7c208174511bd56e6b8fc86dad4d024d4185"},
    {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46293c39252f93ea0910aababa8752ad628bcce3a10d3f260648dd472256983f"},
+    {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:136063a68644eca9339d02e6693932116f6a8591ac013b0014479a1de664e40a"},
    {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6e2bf13d9256398d037fef09fd8bf9b0bf77876e22647d10761d35593b9ac547"},
+    {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:44337823462291f17f994d64282a71c51d738fc9ef561bf265f1d0fd9116a782"},
    {file = "sqlalchemy-2.0.43-cp38-cp38-win32.whl", hash = "sha256:13194276e69bb2af56198fef7909d48fd34820de01d9c92711a5fa45497cc7ed"},
    {file = "sqlalchemy-2.0.43-cp38-cp38-win_amd64.whl", hash = "sha256:334f41fa28de9f9be4b78445e68530da3c5fa054c907176460c81494f4ae1f5e"},
+    {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ceb5c832cc30663aeaf5e39657712f4c4241ad1f638d487ef7216258f6d41fe7"},
+    {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11f43c39b4b2ec755573952bbcc58d976779d482f6f832d7f33a8d869ae891bf"},
    {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:413391b2239db55be14fa4223034d7e13325a1812c8396ecd4f2c08696d5ccad"},
+    {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c379e37b08c6c527181a397212346be39319fb64323741d23e46abd97a400d34"},
    {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03d73ab2a37d9e40dec4984d1813d7878e01dbdc742448d44a7341b7a9f408c7"},
+    {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8cee08f15d9e238ede42e9bbc1d6e7158d0ca4f176e4eab21f88ac819ae3bd7b"},
    {file = "sqlalchemy-2.0.43-cp39-cp39-win32.whl", hash = "sha256:b3edaec7e8b6dc5cd94523c6df4f294014df67097c8217a89929c99975811414"},
    {file = "sqlalchemy-2.0.43-cp39-cp39-win_amd64.whl", hash = "sha256:227119ce0a89e762ecd882dc661e0aa677a690c914e358f0dd8932a2e8b2765b"},
    {file = "sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc"},
@ -12065,7 +12056,7 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
 markers = "python_version == \"3.10\""
 files = [
    {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
@ -12537,12 +12528,11 @@ version = "4.15.0"
 description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
 files = [
    {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
    {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
 ]
-markers = {dev = "python_version == \"3.10\""}

 [[package]]
 name = "typing-inspect"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "cognee"

-version = "0.3.9"
+version = "0.5.0.dev0"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
    { name = "Vasilije Markovic" },
--- a/uv.lock
+++ b/uv.lock
@ -929,7 +929,7 @@ wheels = [

 [[package]]
 name = "cognee"
-version = "0.3.9"
+version = "0.5.0.dev0"
 source = { editable = "." }
 dependencies = [
    { name = "aiofiles" },
@ -2560,6 +2560,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" },
    { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" },
    { url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" },
    { url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" },
    { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" },
    { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" },
@ -2569,6 +2571,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" },
    { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" },
    { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" },
+    { url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" },
    { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" },
    { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
    { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
@ -2578,6 +2582,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
    { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
    { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
+    { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
+    { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
    { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
    { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
    { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
@ -2587,6 +2593,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
    { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
    { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
    { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
 ]