From 0a4b1068a253df8fb4e39a93ee18a73c911ee49e Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Mon, 17 Nov 2025 17:42:22 +0100
Subject: [PATCH 1/9] feat: add kwargs to openai adapter functions

---
 .../litellm_instructor/llm/openai/adapter.py        | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
index 305b426b8..152f43e33 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@@ -108,7 +108,7 @@ class OpenAIAdapter(LLMInterface):
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -149,6 +149,7 @@ class OpenAIAdapter(LLMInterface):
                 api_version=self.api_version,
                 response_model=response_model,
                 max_retries=self.MAX_RETRIES,
+                **kwargs,
             )
         except (
             ContentFilterFinishReasonError,
@@ -174,6 +175,7 @@ class OpenAIAdapter(LLMInterface):
                     # api_base=self.fallback_endpoint,
                     response_model=response_model,
                     max_retries=self.MAX_RETRIES,
+                    **kwargs,
                 )
             except (
                 ContentFilterFinishReasonError,
@@ -199,7 +201,7 @@ class OpenAIAdapter(LLMInterface):
         reraise=True,
     )
     def create_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -239,6 +241,7 @@ class OpenAIAdapter(LLMInterface):
             api_version=self.api_version,
             response_model=response_model,
             max_retries=self.MAX_RETRIES,
+            **kwargs,
         )
 
     @retry(
@@ -248,7 +251,7 @@ class OpenAIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def create_transcript(self, input):
+    async def create_transcript(self, input, **kwargs):
         """
         Generate an audio transcript from a user query.
 
@@ -275,6 +278,7 @@ class OpenAIAdapter(LLMInterface):
                 api_base=self.endpoint,
                 api_version=self.api_version,
                 max_retries=self.MAX_RETRIES,
+                **kwargs,
             )
 
         return transcription
@@ -286,7 +290,7 @@ class OpenAIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def transcribe_image(self, input) -> BaseModel:
+    async def transcribe_image(self, input, **kwargs) -> BaseModel:
         """
         Generate a transcription of an image from a user query.
 
@@ -331,4 +335,5 @@ class OpenAIAdapter(LLMInterface):
             api_version=self.api_version,
             max_completion_tokens=300,
             max_retries=self.MAX_RETRIES,
+            **kwargs,
         )

From aa8afefe8a7ae4233e82edc71ee9441f0b68d325 Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 27 Nov 2025 17:05:37 +0100
Subject: [PATCH 2/9] feat: add kwargs to cognify and related tasks

---
 cognee/api/v1/cognify/cognify.py                              | 4 ++++
 cognee/infrastructure/llm/LLMGateway.py                       | 4 ++--
 .../llm/extraction/knowledge_graph/extract_content_graph.py   | 4 ++--
 cognee/tasks/graph/extract_graph_from_data.py                 | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 0fa345176..bb2ebe86e 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -53,6 +53,7 @@ async def cognify(
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
     data_per_batch: int = 20,
+    **kwargs
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -224,6 +225,7 @@ async def cognify(
             config=config,
             custom_prompt=custom_prompt,
             chunks_per_batch=chunks_per_batch,
+            **kwargs,
         )
 
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -251,6 +253,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     config: Config = None,
     custom_prompt: Optional[str] = None,
     chunks_per_batch: int = 100,
+    **kwargs,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -286,6 +289,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             config=config,
             custom_prompt=custom_prompt,
             task_config={"batch_size": chunks_per_batch},
+            **kwargs,
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py
index ab5bb35d7..fd42eb55e 100644
--- a/cognee/infrastructure/llm/LLMGateway.py
+++ b/cognee/infrastructure/llm/LLMGateway.py
@@ -11,7 +11,7 @@ class LLMGateway:
 
     @staticmethod
     def acreate_structured_output(
-        text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> Coroutine:
         llm_config = get_llm_config()
         if llm_config.structured_output_framework.upper() == "BAML":
@@ -31,7 +31,7 @@ class LLMGateway:
 
             llm_client = get_llm_client()
             return llm_client.acreate_structured_output(
-                text_input=text_input, system_prompt=system_prompt, response_model=response_model
+                text_input=text_input, system_prompt=system_prompt, response_model=response_model, **kwargs
             )
 
     @staticmethod
diff --git a/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py b/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py
index 59e6f563a..4a40979f4 100644
--- a/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py
+++ b/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py
@@ -10,7 +10,7 @@ from cognee.infrastructure.llm.config import (
 
 
 async def extract_content_graph(
-    content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None
+    content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None, **kwargs
 ):
     if custom_prompt:
         system_prompt = custom_prompt
@@ -30,7 +30,7 @@ async def extract_content_graph(
         system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
 
     content_graph = await LLMGateway.acreate_structured_output(
-        content, system_prompt, response_model
+        content, system_prompt, response_model, **kwargs
     )
 
     return content_graph
diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py
index 49b51af2d..965214677 100644
--- a/cognee/tasks/graph/extract_graph_from_data.py
+++ b/cognee/tasks/graph/extract_graph_from_data.py
@@ -99,6 +99,7 @@ async def extract_graph_from_data(
     graph_model: Type[BaseModel],
     config: Config = None,
     custom_prompt: Optional[str] = None,
+    **kwargs,
 ) -> List[DocumentChunk]:
     """
     Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
@@ -113,7 +114,7 @@ async def extract_graph_from_data(
 
     chunk_graphs = await asyncio.gather(
         *[
-            extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
+            extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt, **kwargs)
             for chunk in data_chunks
         ]
     )

From af8c5bedcc48e18c3723a2fbfa8afba3de242cbb Mon Sep 17 00:00:00 2001
From: Andrej Milicevic <milicevi@Andrejs-MacBook-Pro.local>
Date: Thu, 11 Dec 2025 17:47:23 +0100
Subject: [PATCH 3/9] feat: add kwargs to other adapters

---
 .../litellm_instructor/llm/anthropic/adapter.py             | 2 +-
 .../litellm_instructor/llm/gemini/adapter.py                | 2 +-
 .../litellm_instructor/llm/generic_llm_api/adapter.py       | 2 +-
 .../litellm_instructor/llm/mistral/adapter.py               | 2 +-
 .../litellm_instructor/llm/ollama/adapter.py                | 6 +++---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
index dbf0dfbea..46e2b2736 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@@ -51,7 +51,7 @@ class AnthropicAdapter(LLMInterface):
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
index 226f291d7..66d53b842 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@@ -79,7 +79,7 @@ class GeminiAdapter(LLMInterface):
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
index 9d7f25fc5..3049b3c4f 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@@ -79,7 +79,7 @@ class GenericAPIAdapter(LLMInterface):
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
index 355cdae0b..146d0a07a 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
@@ -68,7 +68,7 @@ class MistralAdapter(LLMInterface):
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from the user query.
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
index aabd19867..5ae09a4ac 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
@@ -74,7 +74,7 @@ class OllamaAPIAdapter(LLMInterface):
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a structured output from the LLM using the provided text and system prompt.
@@ -121,7 +121,7 @@ class OllamaAPIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def create_transcript(self, input_file: str) -> str:
+    async def create_transcript(self, input_file: str, **kwargs) -> str:
         """
         Generate an audio transcript from a user query.
 
@@ -160,7 +160,7 @@ class OllamaAPIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def transcribe_image(self, input_file: str) -> str:
+    async def transcribe_image(self, input_file: str, **kwargs) -> str:
         """
         Transcribe content from an image using base64 encoding.
 

From 14ff94f269599140df6e830761ef3b6f2c99eb28 Mon Sep 17 00:00:00 2001
From: Pavel Zorin <pazonec@yandex.ru>
Date: Thu, 11 Dec 2025 12:38:19 +0100
Subject: [PATCH 4/9] Initial release pipeline

---
 .github/workflows/release.yml | 154 ++++++++++++++++++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 000000000..a19635628
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,154 @@
+name: release.yml
+on:
+  workflow_dispatch:
+    inputs:
+      flavour:
+        required: true
+        default: dev
+        type: choice
+        options:
+          - dev
+          - main
+        description: Dev or Main release
+      test_mode:
+        required: true
+        type: boolean
+        description: Aka Dry Run. If true, it won't affect public indices or repositories
+
+jobs:
+  release-github:
+    name: Create GitHub Release from ${{ inputs.flavour }}
+    outputs:
+      tag: ${{ steps.create_tag.outputs.tag }}
+      version: ${{ steps.create_tag.outputs.version }}
+    permissions:
+      contents: write
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out ${{ inputs.flavour }}
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.flavour }}
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Create and push git tag
+        id: create_tag
+        env: 
+          TEST_MODE: ${{ inputs.test_mode }}
+        run: |
+          VERSION="$(uv version --short)"
+          TAG="v${VERSION}"
+
+          echo "Tag to create: ${TAG}"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+          echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
+          echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
+
+          if [ "$TEST_MODE" = "false" ]; then
+            git tag "${TAG}"
+            git push origin "${TAG}"
+          else 
+            echo "Test mode is enabled. Skipping tag creation and push."
+          fi
+
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ steps.create_tag.outputs.tag }}
+          generate_release_notes: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  
+  release-pypi-package:    
+    needs: release-github
+    name: Release PyPI Package from ${{ inputs.flavour }}
+    permissions:
+      contents: read
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out ${{ inputs.flavour }}
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.flavour }}
+      
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+      
+      - name: Install Python
+        run: uv python install
+      
+      - name: Install dependencies
+        run: uv sync --locked --all-extras
+      
+      - name: Build distributions
+        run: uv build
+      
+      - name: Publish ${{ inputs.flavour }} release to TestPyPI
+        if: ${{ !inputs.test_mode }}
+        env:
+          UV_PUBLISH_TOKEN: ${{ secrets.TEST_PYPI_TOKEN }}
+        run: uv publish --publish-url https://test.pypi.org/legacy/
+      
+      - name: Publish ${{ inputs.flavour }} release to PyPI
+        if: ${{ !inputs.test_mode }}
+        env:
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
+        run: uv publish
+  
+  release-docker-image:    
+    needs: release-github
+    name: Release Docker Image from ${{ inputs.flavour }}
+    permissions:
+      contents: read
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out ${{ inputs.flavour }}
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.flavour }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}      
+
+      - name: Build and push Dev Docker Image
+        if: ${{ inputs.flavour == 'dev' }}        
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ !inputs.test_mode }}
+          tags: cognee/cognee:${{ needs.release-github.outputs.version }}
+          labels: |
+            version=${{ needs.release-github.outputs.version }}
+            flavour=${{ inputs.flavour }}
+          cache-from: type=registry,ref=cognee/cognee:buildcache
+          cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max
+
+      - name: Build and push Main Docker Image
+        if: ${{ inputs.flavour == 'main' }}
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ !inputs.test_mode }}
+          tags: | 
+            cognee/cognee:${{ needs.release-github.outputs.version }}
+            cognee/cognee:latest
+          labels: |
+            version=${{ needs.release-github.outputs.version }}
+            flavour=${{ inputs.flavour }}
+          cache-from: type=registry,ref=cognee/cognee:buildcache
+          cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max

From a6bc27afaaeb901e5e771a84ca5e9ba2af473aba Mon Sep 17 00:00:00 2001
From: Pavel Zorin <pazonec@yandex.ru>
Date: Fri, 12 Dec 2025 17:31:54 +0100
Subject: [PATCH 5/9] Cleanup

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a19635628..ff2f809f3 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -90,7 +90,7 @@ jobs:
         run: uv build
       
       - name: Publish ${{ inputs.flavour }} release to TestPyPI
-        if: ${{ !inputs.test_mode }}
+        if: ${{ inputs.test_mode }}
         env:
           UV_PUBLISH_TOKEN: ${{ secrets.TEST_PYPI_TOKEN }}
         run: uv publish --publish-url https://test.pypi.org/legacy/

From 14d9540d1b9d1aa3504baad0a026d7f92556c2e4 Mon Sep 17 00:00:00 2001
From: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Date: Mon, 15 Dec 2025 18:15:48 +0100
Subject: [PATCH 6/9] feat: Add database deletion on dataset delete (#1893)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!-- .github/pull_request_template.md -->

## Description
- Add support for database deletion when dataset is deleted
- Simplify dataset handler usage in Cognee

## Type of Change
<!-- Please check the relevant option -->
- [x] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Bug Fixes**
* Improved dataset deletion: stronger authorization checks and reliable
removal of associated graph and vector storage.

* **Tests**
* Added end-to-end test to verify complete dataset deletion and cleanup
of all related storage components.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 .github/workflows/e2e_tests.yml               | 25 ++++++
 cognee/api/v1/cognify/cognify.py              |  2 +-
 .../datasets/routers/get_datasets_router.py   |  6 +-
 .../databases/utils/__init__.py               |  2 +
 .../get_graph_dataset_database_handler.py     | 10 +++
 .../get_vector_dataset_database_handler.py    | 10 +++
 ...esolve_dataset_database_connection_info.py | 34 ++++-----
 cognee/infrastructure/llm/LLMGateway.py       |  5 +-
 cognee/modules/data/deletion/prune_system.py  | 38 +++-------
 cognee/modules/data/methods/delete_dataset.py | 26 +++++++
 cognee/tests/test_dataset_delete.py           | 76 +++++++++++++++++++
 11 files changed, 183 insertions(+), 51 deletions(-)
 create mode 100644 cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py
 create mode 100644 cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py
 create mode 100644 cognee/tests/test_dataset_delete.py

diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index cb69e9ef6..8cd62910c 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -237,6 +237,31 @@ jobs:
           EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
         run: uv run python ./cognee/tests/test_dataset_database_handler.py
 
+  test-dataset-database-deletion:
+    name: Test dataset database deletion in Cognee
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: '3.11.x'
+
+      - name: Run dataset databases deletion test
+        env:
+          ENV: 'dev'
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        run: uv run python ./cognee/tests/test_dataset_delete.py
+
   test-permissions:
     name: Test permissions with different situations in Cognee
     runs-on: ubuntu-22.04
diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index 9371f7ffd..ffc903d68 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -53,7 +53,7 @@ async def cognify(
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
     data_per_batch: int = 20,
-    **kwargs
+    **kwargs,
 ):
     """
     Transform ingested data into a structured knowledge graph.
diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py
index eff87b3af..ca738dfbe 100644
--- a/cognee/api/v1/datasets/routers/get_datasets_router.py
+++ b/cognee/api/v1/datasets/routers/get_datasets_router.py
@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
             },
         )
 
-        from cognee.modules.data.methods import get_dataset, delete_dataset
+        from cognee.modules.data.methods import delete_dataset
 
-        dataset = await get_dataset(user.id, dataset_id)
+        dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)
 
         if dataset is None:
             raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
 
-        await delete_dataset(dataset)
+        await delete_dataset(dataset[0])
 
     @router.delete(
         "/{dataset_id}/data/{data_id}",
diff --git a/cognee/infrastructure/databases/utils/__init__.py b/cognee/infrastructure/databases/utils/__init__.py
index f31d1e0dc..3907b4325 100644
--- a/cognee/infrastructure/databases/utils/__init__.py
+++ b/cognee/infrastructure/databases/utils/__init__.py
@@ -1,2 +1,4 @@
 from .get_or_create_dataset_database import get_or_create_dataset_database
 from .resolve_dataset_database_connection_info import resolve_dataset_database_connection_info
+from .get_graph_dataset_database_handler import get_graph_dataset_database_handler
+from .get_vector_dataset_database_handler import get_vector_dataset_database_handler
diff --git a/cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py b/cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py
new file mode 100644
index 000000000..d88685b48
--- /dev/null
+++ b/cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py
@@ -0,0 +1,10 @@
+from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
+
+
+def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
+    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
+        supported_dataset_database_handlers,
+    )
+
+    handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
+    return handler
diff --git a/cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py b/cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py
new file mode 100644
index 000000000..5d1152c04
--- /dev/null
+++ b/cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py
@@ -0,0 +1,10 @@
+from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
+
+
+def get_vector_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
+    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
+        supported_dataset_database_handlers,
+    )
+
+    handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
+    return handler
diff --git a/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py b/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py
index d33169642..561268eaf 100644
--- a/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py
+++ b/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py
@@ -1,24 +1,12 @@
+from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
+    get_graph_dataset_database_handler,
+)
+from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
+    get_vector_dataset_database_handler,
+)
 from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
 
 
-async def _get_vector_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase:
-    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
-        supported_dataset_database_handlers,
-    )
-
-    handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
-    return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database)
-
-
-async def _get_graph_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase:
-    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
-        supported_dataset_database_handlers,
-    )
-
-    handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
-    return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database)
-
-
 async def resolve_dataset_database_connection_info(
     dataset_database: DatasetDatabase,
 ) -> DatasetDatabase:
@@ -31,6 +19,12 @@ async def resolve_dataset_database_connection_info(
     Returns:
         DatasetDatabase instance with resolved connection info
     """
-    dataset_database = await _get_vector_db_connection_info(dataset_database)
-    dataset_database = await _get_graph_db_connection_info(dataset_database)
+    vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
+    graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
+    dataset_database = await vector_dataset_database_handler[
+        "handler_instance"
+    ].resolve_dataset_connection_info(dataset_database)
+    dataset_database = await graph_dataset_database_handler[
+        "handler_instance"
+    ].resolve_dataset_connection_info(dataset_database)
     return dataset_database
diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py
index fd42eb55e..7bec9ca01 100644
--- a/cognee/infrastructure/llm/LLMGateway.py
+++ b/cognee/infrastructure/llm/LLMGateway.py
@@ -31,7 +31,10 @@ class LLMGateway:
 
             llm_client = get_llm_client()
             return llm_client.acreate_structured_output(
-                text_input=text_input, system_prompt=system_prompt, response_model=response_model, **kwargs
+                text_input=text_input,
+                system_prompt=system_prompt,
+                response_model=response_model,
+                **kwargs,
             )
 
     @staticmethod
diff --git a/cognee/modules/data/deletion/prune_system.py b/cognee/modules/data/deletion/prune_system.py
index 645e1a223..22a0fde5f 100644
--- a/cognee/modules/data/deletion/prune_system.py
+++ b/cognee/modules/data/deletion/prune_system.py
@@ -5,6 +5,10 @@ from cognee.context_global_variables import backend_access_control_enabled
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
 from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.infrastructure.databases.utils import (
+    get_graph_dataset_database_handler,
+    get_vector_dataset_database_handler,
+)
 from cognee.shared.cache import delete_cache
 from cognee.modules.users.models import DatasetDatabase
 from cognee.shared.logging_utils import get_logger
@@ -13,22 +17,13 @@ logger = get_logger()
 
 
 async def prune_graph_databases():
-    async def _prune_graph_db(dataset_database: DatasetDatabase) -> dict:
-        from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
-            supported_dataset_database_handlers,
-        )
-
-        handler = supported_dataset_database_handlers[
-            dataset_database.graph_dataset_database_handler
-        ]
-        return await handler["handler_instance"].delete_dataset(dataset_database)
-
     db_engine = get_relational_engine()
     try:
-        data = await db_engine.get_all_data_from_table("dataset_database")
+        dataset_databases = await db_engine.get_all_data_from_table("dataset_database")
         # Go through each dataset database and delete the graph database
-        for data_item in data:
-            await _prune_graph_db(data_item)
+        for dataset_database in dataset_databases:
+            handler = get_graph_dataset_database_handler(dataset_database)
+            await handler["handler_instance"].delete_dataset(dataset_database)
     except (OperationalError, EntityNotFoundError) as e:
         logger.debug(
             "Skipping pruning of graph DB. Error when accessing dataset_database table: %s",
@@ -38,22 +33,13 @@ async def prune_graph_databases():
 
 
 async def prune_vector_databases():
-    async def _prune_vector_db(dataset_database: DatasetDatabase) -> dict:
-        from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
-            supported_dataset_database_handlers,
-        )
-
-        handler = supported_dataset_database_handlers[
-            dataset_database.vector_dataset_database_handler
-        ]
-        return await handler["handler_instance"].delete_dataset(dataset_database)
-
     db_engine = get_relational_engine()
     try:
-        data = await db_engine.get_all_data_from_table("dataset_database")
+        dataset_databases = await db_engine.get_all_data_from_table("dataset_database")
         # Go through each dataset database and delete the vector database
-        for data_item in data:
-            await _prune_vector_db(data_item)
+        for dataset_database in dataset_databases:
+            handler = get_vector_dataset_database_handler(dataset_database)
+            await handler["handler_instance"].delete_dataset(dataset_database)
     except (OperationalError, EntityNotFoundError) as e:
         logger.debug(
             "Skipping pruning of vector DB. Error when accessing dataset_database table: %s",
diff --git a/cognee/modules/data/methods/delete_dataset.py b/cognee/modules/data/methods/delete_dataset.py
index ff20ff9e7..dea10e741 100644
--- a/cognee/modules/data/methods/delete_dataset.py
+++ b/cognee/modules/data/methods/delete_dataset.py
@@ -1,8 +1,34 @@
+from cognee.modules.users.models import DatasetDatabase
+from sqlalchemy import select
+
 from cognee.modules.data.models import Dataset
+from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
+    get_vector_dataset_database_handler,
+)
+from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
+    get_graph_dataset_database_handler,
+)
 from cognee.infrastructure.databases.relational import get_relational_engine
 
 
 async def delete_dataset(dataset: Dataset):
     db_engine = get_relational_engine()
 
+    async with db_engine.get_async_session() as session:
+        stmt = select(DatasetDatabase).where(
+            DatasetDatabase.dataset_id == dataset.id,
+        )
+        dataset_database: DatasetDatabase = await session.scalar(stmt)
+        if dataset_database:
+            graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
+            vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
+            await graph_dataset_database_handler["handler_instance"].delete_dataset(
+                dataset_database
+            )
+            await vector_dataset_database_handler["handler_instance"].delete_dataset(
+                dataset_database
+            )
+    # TODO: Remove dataset from pipeline_run_status in Data objects related to dataset as well
+    #       This blocks recreation of the dataset with the same name and data after deletion as
+    #       it's marked as completed and will be just skipped even though it's empty.
     return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
diff --git a/cognee/tests/test_dataset_delete.py b/cognee/tests/test_dataset_delete.py
new file mode 100644
index 000000000..372945bdb
--- /dev/null
+++ b/cognee/tests/test_dataset_delete.py
@@ -0,0 +1,76 @@
+import os
+import asyncio
+import pathlib
+from uuid import UUID
+
+import cognee
+from cognee.shared.logging_utils import setup_logging, ERROR
+from cognee.modules.data.methods.delete_dataset import delete_dataset
+from cognee.modules.data.methods.get_dataset import get_dataset
+from cognee.modules.users.methods import get_default_user
+
+
+async def main():
+    # Set data and system directory paths
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
+
+    # Create a clean slate for cognee -- reset data and system state
+    print("Resetting cognee data...")
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    print("Data reset complete.\n")
+
+    # cognee knowledge graph will be created based on this text
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """
+
+    # Add the text, and make it available for cognify
+    await cognee.add(text, "nlp_dataset")
+    await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")
+
+    # Use LLMs and cognee to create knowledge graph
+    ret_val = await cognee.cognify()
+    user = await get_default_user()
+
+    for val in ret_val:
+        dataset_id = str(val)
+        vector_db_path = os.path.join(
+            cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
+        )
+        graph_db_path = os.path.join(
+            cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
+        )
+
+        # Check if databases are properly created and exist before deletion
+        assert os.path.exists(graph_db_path), "Graph database file not found."
+        assert os.path.exists(vector_db_path), "Vector database file not found."
+
+        dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
+        await delete_dataset(dataset)
+
+        # Confirm databases have been deleted
+        assert not os.path.exists(graph_db_path), "Graph database file found."
+        assert not os.path.exists(vector_db_path), "Vector database file found."
+
+
+if __name__ == "__main__":
+    logger = setup_logging(log_level=ERROR)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())

From 622f8fa79e459d4cec8000de0cbf704957405b05 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Mon, 15 Dec 2025 18:30:35 +0100
Subject: [PATCH 7/9] chore: introduces 1 file upload in ontology endpoint
 (#1899)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!-- .github/pull_request_template.md -->

## Description
This PR fixes the ontology upload endpoint by forcing 1 file upload at
the time. Tests are adjusted in both server start and ontology endpoint
unit test. API was tested.

Do not merge it together with
https://github.com/topoteretes/cognee/pull/1898 its either that or this
one.


## Type of Change
<!-- Please check the relevant option -->
- [x] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have added necessary documentation (if applicable)
- [x] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [x] I have linked any relevant issues in the description
- [x] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **API Changes**
* Ontology upload now accepts exactly one file per request; field
renamed from "descriptions" to "description" and validated as a plain
string.
* Stricter form validation and tighter 400/500 error handling for
malformed submissions.

* **Tests**
* Tests converted to real HTTP-style interactions using a shared test
client and dependency overrides.
* Payloads now use plain string fields; added coverage for single-file
constraints and specific error responses.

* **Style**
  * Minor formatting cleanups with no functional impact.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 .../ontologies/routers/get_ontology_router.py |  48 ++---
 cognee/tests/test_cognee_server_start.py      |   4 +-
 .../tests/unit/api/test_ontology_endpoint.py  | 166 ++++++++----------
 3 files changed, 100 insertions(+), 118 deletions(-)

diff --git a/cognee/api/v1/ontologies/routers/get_ontology_router.py b/cognee/api/v1/ontologies/routers/get_ontology_router.py
index ee31c683f..77667d88d 100644
--- a/cognee/api/v1/ontologies/routers/get_ontology_router.py
+++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
+from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
 from fastapi.responses import JSONResponse
 from typing import Optional, List
 
@@ -15,28 +15,25 @@ def get_ontology_router() -> APIRouter:
 
     @router.post("", response_model=dict)
     async def upload_ontology(
+        request: Request,
         ontology_key: str = Form(...),
-        ontology_file: List[UploadFile] = File(...),
-        descriptions: Optional[str] = Form(None),
+        ontology_file: UploadFile = File(...),
+        description: Optional[str] = Form(None),
         user: User = Depends(get_authenticated_user),
     ):
         """
-        Upload ontology files with their respective keys for later use in cognify operations.
-
-        Supports both single and multiple file uploads:
-        - Single file: ontology_key=["key"], ontology_file=[file]
-        - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
+        Upload a single ontology file for later use in cognify operations.
 
         ## Request Parameters
-        - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
-        - **ontology_file** (List[UploadFile]): OWL format ontology files
-        - **descriptions** (Optional[str]): JSON array string of optional descriptions
+        - **ontology_key** (str): User-defined identifier for the ontology.
+        - **ontology_file** (UploadFile): Single OWL format ontology file
+        - **description** (Optional[str]): Optional description for the ontology.
 
         ## Response
-        Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
+        Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
 
         ## Error Codes
-        - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
+        - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
         - **500 Internal Server Error**: File system or processing errors
         """
         send_telemetry(
@@ -49,16 +46,22 @@ def get_ontology_router() -> APIRouter:
         )
 
         try:
-            import json
+            # Enforce: exactly one uploaded file for "ontology_file"
+            form = await request.form()
+            uploaded_files = form.getlist("ontology_file")
+            if len(uploaded_files) != 1:
+                raise ValueError("Only one ontology_file is allowed")
 
-            ontology_keys = json.loads(ontology_key)
-            description_list = json.loads(descriptions) if descriptions else None
+            if ontology_key.strip().startswith(("[", "{")):
+                raise ValueError("ontology_key must be a string")
+            if description is not None and description.strip().startswith(("[", "{")):
+                raise ValueError("description must be a string")
 
-            if not isinstance(ontology_keys, list):
-                raise ValueError("ontology_key must be a JSON array")
-
-            results = await ontology_service.upload_ontologies(
-                ontology_keys, ontology_file, user, description_list
+            result = await ontology_service.upload_ontology(
+                ontology_key=ontology_key,
+                file=ontology_file,
+                user=user,
+                description=description,
             )
 
             return {
@@ -70,10 +73,9 @@ def get_ontology_router() -> APIRouter:
                         "uploaded_at": result.uploaded_at,
                         "description": result.description,
                     }
-                    for result in results
                 ]
             }
-        except (json.JSONDecodeError, ValueError) as e:
+        except ValueError as e:
             return JSONResponse(status_code=400, content={"error": str(e)})
         except Exception as e:
             return JSONResponse(status_code=500, content={"error": str(e)})
diff --git a/cognee/tests/test_cognee_server_start.py b/cognee/tests/test_cognee_server_start.py
index fece88240..a626088a3 100644
--- a/cognee/tests/test_cognee_server_start.py
+++ b/cognee/tests/test_cognee_server_start.py
@@ -148,8 +148,8 @@ class TestCogneeServerStart(unittest.TestCase):
             headers=headers,
             files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
             data={
-                "ontology_key": json.dumps([ontology_key]),
-                "description": json.dumps(["Test ontology"]),
+                "ontology_key": ontology_key,
+                "description": "Test ontology",
             },
         )
         self.assertEqual(ontology_response.status_code, 200)
diff --git a/cognee/tests/unit/api/test_ontology_endpoint.py b/cognee/tests/unit/api/test_ontology_endpoint.py
index af3a4d90e..e072ceda8 100644
--- a/cognee/tests/unit/api/test_ontology_endpoint.py
+++ b/cognee/tests/unit/api/test_ontology_endpoint.py
@@ -1,17 +1,28 @@
 import pytest
 import uuid
 from fastapi.testclient import TestClient
-from unittest.mock import patch, Mock, AsyncMock
+from unittest.mock import Mock
 from types import SimpleNamespace
-import importlib
 from cognee.api.client import app
+from cognee.modules.users.methods import get_authenticated_user
 
-gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
+
+@pytest.fixture(scope="session")
+def test_client():
+    # Keep a single TestClient (and event loop) for the whole module.
+    # Re-creating TestClient repeatedly can break async DB connections (asyncpg loop mismatch).
+    with TestClient(app) as c:
+        yield c
 
 
 @pytest.fixture
-def client():
-    return TestClient(app)
+def client(test_client, mock_default_user):
+    async def override_get_authenticated_user():
+        return mock_default_user
+
+    app.dependency_overrides[get_authenticated_user] = override_get_authenticated_user
+    yield test_client
+    app.dependency_overrides.pop(get_authenticated_user, None)
 
 
 @pytest.fixture
@@ -32,12 +43,8 @@ def mock_default_user():
     )
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_upload_ontology_success(mock_get_default_user, client, mock_default_user):
+def test_upload_ontology_success(client):
     """Test successful ontology upload"""
-    import json
-
-    mock_get_default_user.return_value = mock_default_user
     ontology_content = (
         b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
     )
@@ -46,7 +53,7 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use
     response = client.post(
         "/api/v1/ontologies",
         files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
-        data={"ontology_key": json.dumps([unique_key]), "description": json.dumps(["Test"])},
+        data={"ontology_key": unique_key, "description": "Test"},
     )
 
     assert response.status_code == 200
@@ -55,10 +62,8 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use
     assert "uploaded_at" in data["uploaded_ontologies"][0]
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_default_user):
+def test_upload_ontology_invalid_file(client):
     """Test 400 response for non-.owl files"""
-    mock_get_default_user.return_value = mock_default_user
     unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
     response = client.post(
         "/api/v1/ontologies",
@@ -68,14 +73,10 @@ def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_defaul
     assert response.status_code == 400
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user):
+def test_upload_ontology_missing_data(client):
     """Test 400 response for missing file or key"""
-    import json
-
-    mock_get_default_user.return_value = mock_default_user
     # Missing file
-    response = client.post("/api/v1/ontologies", data={"ontology_key": json.dumps(["test"])})
+    response = client.post("/api/v1/ontologies", data={"ontology_key": "test"})
     assert response.status_code == 400
 
     # Missing key
@@ -85,34 +86,25 @@ def test_upload_ontology_missing_data(mock_get_default_user, client, mock_defaul
     assert response.status_code == 400
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user):
-    """Test behavior when default user is provided (no explicit authentication)"""
-    import json
-
+def test_upload_ontology_without_auth_header(client):
+    """Test behavior when no explicit authentication header is provided."""
     unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
-    mock_get_default_user.return_value = mock_default_user
     response = client.post(
         "/api/v1/ontologies",
         files=[("ontology_file", ("test.owl", b"<rdf></rdf>", "application/xml"))],
-        data={"ontology_key": json.dumps([unique_key])},
+        data={"ontology_key": unique_key},
     )
 
-    # The current system provides a default user when no explicit authentication is given
-    # This test verifies the system works with conditional authentication
     assert response.status_code == 200
     data = response.json()
     assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
     assert "uploaded_at" in data["uploaded_ontologies"][0]
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user):
-    """Test uploading multiple ontology files in single request"""
+def test_upload_multiple_ontologies_in_single_request_is_rejected(client):
+    """Uploading multiple ontology files in a single request should fail."""
     import io
 
-    mock_get_default_user.return_value = mock_default_user
-    # Create mock files
     file1_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
     file2_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
 
@@ -120,45 +112,34 @@ def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_
         ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
         ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
     ]
-    data = {
-        "ontology_key": '["vehicles", "manufacturers"]',
-        "descriptions": '["Base vehicles", "Car manufacturers"]',
-    }
+    data = {"ontology_key": "vehicles", "description": "Base vehicles"}
 
     response = client.post("/api/v1/ontologies", files=files, data=data)
 
-    assert response.status_code == 200
-    result = response.json()
-    assert "uploaded_ontologies" in result
-    assert len(result["uploaded_ontologies"]) == 2
-    assert result["uploaded_ontologies"][0]["ontology_key"] == "vehicles"
-    assert result["uploaded_ontologies"][1]["ontology_key"] == "manufacturers"
+    assert response.status_code == 400
+    assert "Only one ontology_file is allowed" in response.json()["error"]
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user):
-    """Test that upload endpoint accepts array parameters"""
+def test_upload_endpoint_rejects_array_style_fields(client):
+    """Array-style form values should be rejected (no backwards compatibility)."""
     import io
     import json
 
-    mock_get_default_user.return_value = mock_default_user
     file_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
 
     files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))]
     data = {
         "ontology_key": json.dumps(["single_key"]),
-        "descriptions": json.dumps(["Single ontology"]),
+        "description": json.dumps(["Single ontology"]),
     }
 
     response = client.post("/api/v1/ontologies", files=files, data=data)
 
-    assert response.status_code == 200
-    result = response.json()
-    assert result["uploaded_ontologies"][0]["ontology_key"] == "single_key"
+    assert response.status_code == 400
+    assert "ontology_key must be a string" in response.json()["error"]
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user):
+def test_cognify_with_multiple_ontologies(client):
     """Test cognify endpoint accepts multiple ontology keys"""
     payload = {
         "datasets": ["test_dataset"],
@@ -172,14 +153,11 @@ def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_de
     assert response.status_code in [200, 400, 409]  # May fail for other reasons, not type
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user):
-    """Test complete workflow: upload multiple ontologies → cognify with multiple keys"""
+def test_complete_multifile_workflow(client):
+    """Test workflow: upload ontologies one-by-one → cognify with multiple keys"""
     import io
-    import json
 
-    mock_get_default_user.return_value = mock_default_user
-    # Step 1: Upload multiple ontologies
+    # Step 1: Upload two ontologies (one-by-one)
     file1_content = b"""<?xml version="1.0"?>
     <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
              xmlns:owl="http://www.w3.org/2002/07/owl#">
@@ -192,17 +170,21 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default
         <owl:Class rdf:ID="Manufacturer"/>
     </rdf:RDF>"""
 
-    files = [
-        ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
-        ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
-    ]
-    data = {
-        "ontology_key": json.dumps(["vehicles", "manufacturers"]),
-        "descriptions": json.dumps(["Vehicle ontology", "Manufacturer ontology"]),
-    }
+    upload_response_1 = client.post(
+        "/api/v1/ontologies",
+        files=[("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml"))],
+        data={"ontology_key": "vehicles", "description": "Vehicle ontology"},
+    )
+    assert upload_response_1.status_code == 200
 
-    upload_response = client.post("/api/v1/ontologies", files=files, data=data)
-    assert upload_response.status_code == 200
+    upload_response_2 = client.post(
+        "/api/v1/ontologies",
+        files=[
+            ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml"))
+        ],
+        data={"ontology_key": "manufacturers", "description": "Manufacturer ontology"},
+    )
+    assert upload_response_2.status_code == 200
 
     # Step 2: Verify ontologies are listed
     list_response = client.get("/api/v1/ontologies")
@@ -223,44 +205,42 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default
     assert cognify_response.status_code != 400  # Not a validation error
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_multifile_error_handling(mock_get_default_user, client, mock_default_user):
-    """Test error handling for invalid multifile uploads"""
+def test_upload_error_handling(client):
+    """Test error handling for invalid uploads (single-file endpoint)."""
     import io
     import json
 
-    # Test mismatched array lengths
+    # Array-style key should be rejected
     file_content = b"<rdf:RDF></rdf:RDF>"
     files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))]
     data = {
-        "ontology_key": json.dumps(["key1", "key2"]),  # 2 keys, 1 file
-        "descriptions": json.dumps(["desc1"]),
+        "ontology_key": json.dumps(["key1", "key2"]),
+        "description": "desc1",
     }
 
     response = client.post("/api/v1/ontologies", files=files, data=data)
     assert response.status_code == 400
-    assert "Number of keys must match number of files" in response.json()["error"]
+    assert "ontology_key must be a string" in response.json()["error"]
 
-    # Test duplicate keys
-    files = [
-        ("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml")),
-        ("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml")),
-    ]
-    data = {
-        "ontology_key": json.dumps(["duplicate", "duplicate"]),
-        "descriptions": json.dumps(["desc1", "desc2"]),
-    }
+    # Duplicate key should be rejected
+    response_1 = client.post(
+        "/api/v1/ontologies",
+        files=[("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml"))],
+        data={"ontology_key": "duplicate", "description": "desc1"},
+    )
+    assert response_1.status_code == 200
 
-    response = client.post("/api/v1/ontologies", files=files, data=data)
-    assert response.status_code == 400
-    assert "Duplicate ontology keys not allowed" in response.json()["error"]
+    response_2 = client.post(
+        "/api/v1/ontologies",
+        files=[("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml"))],
+        data={"ontology_key": "duplicate", "description": "desc2"},
+    )
+    assert response_2.status_code == 400
+    assert "already exists" in response_2.json()["error"]
 
 
-@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
-def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user):
+def test_cognify_missing_ontology_key(client):
     """Test cognify with non-existent ontology key"""
-    mock_get_default_user.return_value = mock_default_user
-
     payload = {
         "datasets": ["test_dataset"],
         "ontology_key": ["nonexistent_key"],

From 67af8a7cb46f65c0075b0af5ea35f0607f026b9d Mon Sep 17 00:00:00 2001
From: Pavel Zorin <pazonec@yandex.ru>
Date: Mon, 15 Dec 2025 18:36:15 +0100
Subject: [PATCH 8/9] Bump version from 0.5.0.dev0 to 0.5.0.dev1

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8e4ed8a0d..cf2081d0a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "cognee"
 
-version = "0.5.0.dev0"
+version = "0.5.0.dev1"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
     { name = "Vasilije Markovic" },

From 78028b819f0b9293ec60b5894c8e7155284c5fcd Mon Sep 17 00:00:00 2001
From: Pavel Zorin <pazonec@yandex.ru>
Date: Mon, 15 Dec 2025 18:42:02 +0100
Subject: [PATCH 9/9] update dev uv.lock

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index fccab8c40..884fb63be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10, <3.14"
 resolution-markers = [
     "python_full_version >= '3.13' and platform_python_implementation != 'PyPy' and sys_platform == 'darwin'",
@@ -946,7 +946,7 @@ wheels = [
 
 [[package]]
 name = "cognee"
-version = "0.5.0.dev0"
+version = "0.5.0.dev1"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },