From 0a4b1068a253df8fb4e39a93ee18a73c911ee49e Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 17 Nov 2025 17:42:22 +0100 Subject: [PATCH 1/9] feat: add kwargs to openai adapter functions --- .../litellm_instructor/llm/openai/adapter.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 305b426b8..152f43e33 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -108,7 +108,7 @@ class OpenAIAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. @@ -149,6 +149,7 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, response_model=response_model, max_retries=self.MAX_RETRIES, + **kwargs, ) except ( ContentFilterFinishReasonError, @@ -174,6 +175,7 @@ class OpenAIAdapter(LLMInterface): # api_base=self.fallback_endpoint, response_model=response_model, max_retries=self.MAX_RETRIES, + **kwargs, ) except ( ContentFilterFinishReasonError, @@ -199,7 +201,7 @@ class OpenAIAdapter(LLMInterface): reraise=True, ) def create_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. @@ -239,6 +241,7 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, response_model=response_model, max_retries=self.MAX_RETRIES, + **kwargs, ) @retry( @@ -248,7 +251,7 @@ class OpenAIAdapter(LLMInterface): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) - async def create_transcript(self, input): + async def create_transcript(self, input, **kwargs): """ Generate an audio transcript from a user query. @@ -275,6 +278,7 @@ class OpenAIAdapter(LLMInterface): api_base=self.endpoint, api_version=self.api_version, max_retries=self.MAX_RETRIES, + **kwargs, ) return transcription @@ -286,7 +290,7 @@ class OpenAIAdapter(LLMInterface): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) - async def transcribe_image(self, input) -> BaseModel: + async def transcribe_image(self, input, **kwargs) -> BaseModel: """ Generate a transcription of an image from a user query. @@ -331,4 +335,5 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, max_completion_tokens=300, max_retries=self.MAX_RETRIES, + **kwargs, ) From aa8afefe8a7ae4233e82edc71ee9441f0b68d325 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 27 Nov 2025 17:05:37 +0100 Subject: [PATCH 2/9] feat: add kwargs to cognify and related tasks --- cognee/api/v1/cognify/cognify.py | 4 ++++ cognee/infrastructure/llm/LLMGateway.py | 4 ++-- .../llm/extraction/knowledge_graph/extract_content_graph.py | 4 ++-- cognee/tasks/graph/extract_graph_from_data.py | 3 ++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 0fa345176..bb2ebe86e 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -53,6 +53,7 @@ async def cognify( custom_prompt: Optional[str] = None, temporal_cognify: bool = False, data_per_batch: int = 20, + **kwargs ): """ Transform ingested data into a structured knowledge graph. @@ -224,6 +225,7 @@ async def cognify( config=config, custom_prompt=custom_prompt, chunks_per_batch=chunks_per_batch, + **kwargs, ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for @@ -251,6 +253,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's config: Config = None, custom_prompt: Optional[str] = None, chunks_per_batch: int = 100, + **kwargs, ) -> list[Task]: if config is None: ontology_config = get_ontology_env_config() @@ -286,6 +289,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's config=config, custom_prompt=custom_prompt, task_config={"batch_size": chunks_per_batch}, + **kwargs, ), # Generate knowledge graphs from the document chunks. Task( summarize_text, diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py index ab5bb35d7..fd42eb55e 100644 --- a/cognee/infrastructure/llm/LLMGateway.py +++ b/cognee/infrastructure/llm/LLMGateway.py @@ -11,7 +11,7 @@ class LLMGateway: @staticmethod def acreate_structured_output( - text_input: str, system_prompt: str, response_model: Type[BaseModel] + text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> Coroutine: llm_config = get_llm_config() if llm_config.structured_output_framework.upper() == "BAML": @@ -31,7 +31,7 @@ class LLMGateway: llm_client = get_llm_client() return llm_client.acreate_structured_output( - text_input=text_input, system_prompt=system_prompt, response_model=response_model + text_input=text_input, system_prompt=system_prompt, response_model=response_model, **kwargs ) @staticmethod diff --git a/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py b/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py index 59e6f563a..4a40979f4 100644 --- a/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +++ b/cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py @@ -10,7 +10,7 @@ from cognee.infrastructure.llm.config import ( async def extract_content_graph( - content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None + content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None, **kwargs ): if custom_prompt: system_prompt = custom_prompt @@ -30,7 +30,7 @@ async def extract_content_graph( system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory) content_graph = await LLMGateway.acreate_structured_output( - content, system_prompt, response_model + content, system_prompt, response_model, **kwargs ) return content_graph diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 49b51af2d..965214677 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -99,6 +99,7 @@ async def extract_graph_from_data( graph_model: Type[BaseModel], config: Config = None, custom_prompt: Optional[str] = None, + **kwargs, ) -> List[DocumentChunk]: """ Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model. @@ -113,7 +114,7 @@ async def extract_graph_from_data( chunk_graphs = await asyncio.gather( *[ - extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt) + extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt, **kwargs) for chunk in data_chunks ] ) From af8c5bedcc48e18c3723a2fbfa8afba3de242cbb Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 11 Dec 2025 17:47:23 +0100 Subject: [PATCH 3/9] feat: add kwargs to other adapters --- .../litellm_instructor/llm/anthropic/adapter.py | 2 +- .../litellm_instructor/llm/gemini/adapter.py | 2 +- .../litellm_instructor/llm/generic_llm_api/adapter.py | 2 +- .../litellm_instructor/llm/mistral/adapter.py | 2 +- .../litellm_instructor/llm/ollama/adapter.py | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py index dbf0dfbea..46e2b2736 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py @@ -51,7 +51,7 @@ class AnthropicAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py index 226f291d7..66d53b842 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py @@ -79,7 +79,7 @@ class GeminiAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py index 9d7f25fc5..3049b3c4f 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py @@ -79,7 +79,7 @@ class GenericAPIAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py index 355cdae0b..146d0a07a 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py @@ -68,7 +68,7 @@ class MistralAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from the user query. diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py index aabd19867..5ae09a4ac 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py @@ -74,7 +74,7 @@ class OllamaAPIAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a structured output from the LLM using the provided text and system prompt. @@ -121,7 +121,7 @@ class OllamaAPIAdapter(LLMInterface): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) - async def create_transcript(self, input_file: str) -> str: + async def create_transcript(self, input_file: str, **kwargs) -> str: """ Generate an audio transcript from a user query. @@ -160,7 +160,7 @@ class OllamaAPIAdapter(LLMInterface): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) - async def transcribe_image(self, input_file: str) -> str: + async def transcribe_image(self, input_file: str, **kwargs) -> str: """ Transcribe content from an image using base64 encoding. From 14ff94f269599140df6e830761ef3b6f2c99eb28 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 11 Dec 2025 12:38:19 +0100 Subject: [PATCH 4/9] Initial release pipeline --- .github/workflows/release.yml | 154 ++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..a19635628 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,154 @@ +name: release.yml +on: + workflow_dispatch: + inputs: + flavour: + required: true + default: dev + type: choice + options: + - dev + - main + description: Dev or Main release + test_mode: + required: true + type: boolean + description: Aka Dry Run. If true, it won't affect public indices or repositories + +jobs: + release-github: + name: Create GitHub Release from ${{ inputs.flavour }} + outputs: + tag: ${{ steps.create_tag.outputs.tag }} + version: ${{ steps.create_tag.outputs.version }} + permissions: + contents: write + runs-on: ubuntu-latest + + steps: + - name: Check out ${{ inputs.flavour }} + uses: actions/checkout@v4 + with: + ref: ${{ inputs.flavour }} + - name: Install uv + uses: astral-sh/setup-uv@v7 + + - name: Create and push git tag + id: create_tag + env: + TEST_MODE: ${{ inputs.test_mode }} + run: | + VERSION="$(uv version --short)" + TAG="v${VERSION}" + + echo "Tag to create: ${TAG}" + + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + echo "tag=${TAG}" >> "$GITHUB_OUTPUT" + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + + if [ "$TEST_MODE" = "false" ]; then + git tag "${TAG}" + git push origin "${TAG}" + else + echo "Test mode is enabled. Skipping tag creation and push." + fi + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ steps.create_tag.outputs.tag }} + generate_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + release-pypi-package: + needs: release-github + name: Release PyPI Package from ${{ inputs.flavour }} + permissions: + contents: read + runs-on: ubuntu-latest + + steps: + - name: Check out ${{ inputs.flavour }} + uses: actions/checkout@v4 + with: + ref: ${{ inputs.flavour }} + + - name: Install uv + uses: astral-sh/setup-uv@v7 + + - name: Install Python + run: uv python install + + - name: Install dependencies + run: uv sync --locked --all-extras + + - name: Build distributions + run: uv build + + - name: Publish ${{ inputs.flavour }} release to TestPyPI + if: ${{ !inputs.test_mode }} + env: + UV_PUBLISH_TOKEN: ${{ secrets.TEST_PYPI_TOKEN }} + run: uv publish --publish-url https://test.pypi.org/legacy/ + + - name: Publish ${{ inputs.flavour }} release to PyPI + if: ${{ !inputs.test_mode }} + env: + UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: uv publish + + release-docker-image: + needs: release-github + name: Release Docker Image from ${{ inputs.flavour }} + permissions: + contents: read + runs-on: ubuntu-latest + + steps: + - name: Check out ${{ inputs.flavour }} + uses: actions/checkout@v4 + with: + ref: ${{ inputs.flavour }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push Dev Docker Image + if: ${{ inputs.flavour == 'dev' }} + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ !inputs.test_mode }} + tags: cognee/cognee:${{ needs.release-github.outputs.version }} + labels: | + version=${{ needs.release-github.outputs.version }} + flavour=${{ inputs.flavour }} + cache-from: type=registry,ref=cognee/cognee:buildcache + cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max + + - name: Build and push Main Docker Image + if: ${{ inputs.flavour == 'main' }} + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ !inputs.test_mode }} + tags: | + cognee/cognee:${{ needs.release-github.outputs.version }} + cognee/cognee:latest + labels: | + version=${{ needs.release-github.outputs.version }} + flavour=${{ inputs.flavour }} + cache-from: type=registry,ref=cognee/cognee:buildcache + cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max From a6bc27afaaeb901e5e771a84ca5e9ba2af473aba Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Fri, 12 Dec 2025 17:31:54 +0100 Subject: [PATCH 5/9] Cleanup --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a19635628..ff2f809f3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -90,7 +90,7 @@ jobs: run: uv build - name: Publish ${{ inputs.flavour }} release to TestPyPI - if: ${{ !inputs.test_mode }} + if: ${{ inputs.test_mode }} env: UV_PUBLISH_TOKEN: ${{ secrets.TEST_PYPI_TOKEN }} run: uv publish --publish-url https://test.pypi.org/legacy/ From 14d9540d1b9d1aa3504baad0a026d7f92556c2e4 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Mon, 15 Dec 2025 18:15:48 +0100 Subject: [PATCH 6/9] feat: Add database deletion on dataset delete (#1893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description - Add support for database deletion when dataset is deleted - Simplify dataset handler usage in Cognee ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. ## Summary by CodeRabbit * **Bug Fixes** * Improved dataset deletion: stronger authorization checks and reliable removal of associated graph and vector storage. * **Tests** * Added end-to-end test to verify complete dataset deletion and cleanup of all related storage components. ✏️ Tip: You can customize this high-level summary in your review settings. --- .github/workflows/e2e_tests.yml | 25 ++++++ cognee/api/v1/cognify/cognify.py | 2 +- .../datasets/routers/get_datasets_router.py | 6 +- .../databases/utils/__init__.py | 2 + .../get_graph_dataset_database_handler.py | 10 +++ .../get_vector_dataset_database_handler.py | 10 +++ ...esolve_dataset_database_connection_info.py | 34 ++++----- cognee/infrastructure/llm/LLMGateway.py | 5 +- cognee/modules/data/deletion/prune_system.py | 38 +++------- cognee/modules/data/methods/delete_dataset.py | 26 +++++++ cognee/tests/test_dataset_delete.py | 76 +++++++++++++++++++ 11 files changed, 183 insertions(+), 51 deletions(-) create mode 100644 cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py create mode 100644 cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py create mode 100644 cognee/tests/test_dataset_delete.py diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index cb69e9ef6..8cd62910c 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -237,6 +237,31 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_dataset_database_handler.py + test-dataset-database-deletion: + name: Test dataset database deletion in Cognee + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run dataset databases deletion test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_dataset_delete.py + test-permissions: name: Test permissions with different situations in Cognee runs-on: ubuntu-22.04 diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 9371f7ffd..ffc903d68 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -53,7 +53,7 @@ async def cognify( custom_prompt: Optional[str] = None, temporal_cognify: bool = False, data_per_batch: int = 20, - **kwargs + **kwargs, ): """ Transform ingested data into a structured knowledge graph. diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index eff87b3af..ca738dfbe 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter: }, ) - from cognee.modules.data.methods import get_dataset, delete_dataset + from cognee.modules.data.methods import delete_dataset - dataset = await get_dataset(user.id, dataset_id) + dataset = await get_authorized_existing_datasets([dataset_id], "delete", user) if dataset is None: raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.") - await delete_dataset(dataset) + await delete_dataset(dataset[0]) @router.delete( "/{dataset_id}/data/{data_id}", diff --git a/cognee/infrastructure/databases/utils/__init__.py b/cognee/infrastructure/databases/utils/__init__.py index f31d1e0dc..3907b4325 100644 --- a/cognee/infrastructure/databases/utils/__init__.py +++ b/cognee/infrastructure/databases/utils/__init__.py @@ -1,2 +1,4 @@ from .get_or_create_dataset_database import get_or_create_dataset_database from .resolve_dataset_database_connection_info import resolve_dataset_database_connection_info +from .get_graph_dataset_database_handler import get_graph_dataset_database_handler +from .get_vector_dataset_database_handler import get_vector_dataset_database_handler diff --git a/cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py b/cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py new file mode 100644 index 000000000..d88685b48 --- /dev/null +++ b/cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py @@ -0,0 +1,10 @@ +from cognee.modules.users.models.DatasetDatabase import DatasetDatabase + + +def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict: + from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( + supported_dataset_database_handlers, + ) + + handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler] + return handler diff --git a/cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py b/cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py new file mode 100644 index 000000000..5d1152c04 --- /dev/null +++ b/cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py @@ -0,0 +1,10 @@ +from cognee.modules.users.models.DatasetDatabase import DatasetDatabase + + +def get_vector_dataset_database_handler(dataset_database: DatasetDatabase) -> dict: + from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( + supported_dataset_database_handlers, + ) + + handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler] + return handler diff --git a/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py b/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py index d33169642..561268eaf 100644 --- a/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +++ b/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py @@ -1,24 +1,12 @@ +from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import ( + get_graph_dataset_database_handler, +) +from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import ( + get_vector_dataset_database_handler, +) from cognee.modules.users.models.DatasetDatabase import DatasetDatabase -async def _get_vector_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase: - from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( - supported_dataset_database_handlers, - ) - - handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler] - return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database) - - -async def _get_graph_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase: - from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( - supported_dataset_database_handlers, - ) - - handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler] - return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database) - - async def resolve_dataset_database_connection_info( dataset_database: DatasetDatabase, ) -> DatasetDatabase: @@ -31,6 +19,12 @@ async def resolve_dataset_database_connection_info( Returns: DatasetDatabase instance with resolved connection info """ - dataset_database = await _get_vector_db_connection_info(dataset_database) - dataset_database = await _get_graph_db_connection_info(dataset_database) + vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database) + graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database) + dataset_database = await vector_dataset_database_handler[ + "handler_instance" + ].resolve_dataset_connection_info(dataset_database) + dataset_database = await graph_dataset_database_handler[ + "handler_instance" + ].resolve_dataset_connection_info(dataset_database) return dataset_database diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py index fd42eb55e..7bec9ca01 100644 --- a/cognee/infrastructure/llm/LLMGateway.py +++ b/cognee/infrastructure/llm/LLMGateway.py @@ -31,7 +31,10 @@ class LLMGateway: llm_client = get_llm_client() return llm_client.acreate_structured_output( - text_input=text_input, system_prompt=system_prompt, response_model=response_model, **kwargs + text_input=text_input, + system_prompt=system_prompt, + response_model=response_model, + **kwargs, ) @staticmethod diff --git a/cognee/modules/data/deletion/prune_system.py b/cognee/modules/data/deletion/prune_system.py index 645e1a223..22a0fde5f 100644 --- a/cognee/modules/data/deletion/prune_system.py +++ b/cognee/modules/data/deletion/prune_system.py @@ -5,6 +5,10 @@ from cognee.context_global_variables import backend_access_control_enabled from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.infrastructure.databases.utils import ( + get_graph_dataset_database_handler, + get_vector_dataset_database_handler, +) from cognee.shared.cache import delete_cache from cognee.modules.users.models import DatasetDatabase from cognee.shared.logging_utils import get_logger @@ -13,22 +17,13 @@ logger = get_logger() async def prune_graph_databases(): - async def _prune_graph_db(dataset_database: DatasetDatabase) -> dict: - from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( - supported_dataset_database_handlers, - ) - - handler = supported_dataset_database_handlers[ - dataset_database.graph_dataset_database_handler - ] - return await handler["handler_instance"].delete_dataset(dataset_database) - db_engine = get_relational_engine() try: - data = await db_engine.get_all_data_from_table("dataset_database") + dataset_databases = await db_engine.get_all_data_from_table("dataset_database") # Go through each dataset database and delete the graph database - for data_item in data: - await _prune_graph_db(data_item) + for dataset_database in dataset_databases: + handler = get_graph_dataset_database_handler(dataset_database) + await handler["handler_instance"].delete_dataset(dataset_database) except (OperationalError, EntityNotFoundError) as e: logger.debug( "Skipping pruning of graph DB. Error when accessing dataset_database table: %s", @@ -38,22 +33,13 @@ async def prune_graph_databases(): async def prune_vector_databases(): - async def _prune_vector_db(dataset_database: DatasetDatabase) -> dict: - from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( - supported_dataset_database_handlers, - ) - - handler = supported_dataset_database_handlers[ - dataset_database.vector_dataset_database_handler - ] - return await handler["handler_instance"].delete_dataset(dataset_database) - db_engine = get_relational_engine() try: - data = await db_engine.get_all_data_from_table("dataset_database") + dataset_databases = await db_engine.get_all_data_from_table("dataset_database") # Go through each dataset database and delete the vector database - for data_item in data: - await _prune_vector_db(data_item) + for dataset_database in dataset_databases: + handler = get_vector_dataset_database_handler(dataset_database) + await handler["handler_instance"].delete_dataset(dataset_database) except (OperationalError, EntityNotFoundError) as e: logger.debug( "Skipping pruning of vector DB. Error when accessing dataset_database table: %s", diff --git a/cognee/modules/data/methods/delete_dataset.py b/cognee/modules/data/methods/delete_dataset.py index ff20ff9e7..dea10e741 100644 --- a/cognee/modules/data/methods/delete_dataset.py +++ b/cognee/modules/data/methods/delete_dataset.py @@ -1,8 +1,34 @@ +from cognee.modules.users.models import DatasetDatabase +from sqlalchemy import select + from cognee.modules.data.models import Dataset +from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import ( + get_vector_dataset_database_handler, +) +from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import ( + get_graph_dataset_database_handler, +) from cognee.infrastructure.databases.relational import get_relational_engine async def delete_dataset(dataset: Dataset): db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + stmt = select(DatasetDatabase).where( + DatasetDatabase.dataset_id == dataset.id, + ) + dataset_database: DatasetDatabase = await session.scalar(stmt) + if dataset_database: + graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database) + vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database) + await graph_dataset_database_handler["handler_instance"].delete_dataset( + dataset_database + ) + await vector_dataset_database_handler["handler_instance"].delete_dataset( + dataset_database + ) + # TODO: Remove dataset from pipeline_run_status in Data objects related to dataset as well + # This blocks recreation of the dataset with the same name and data after deletion as + # it's marked as completed and will be just skipped even though it's empty. return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id) diff --git a/cognee/tests/test_dataset_delete.py b/cognee/tests/test_dataset_delete.py new file mode 100644 index 000000000..372945bdb --- /dev/null +++ b/cognee/tests/test_dataset_delete.py @@ -0,0 +1,76 @@ +import os +import asyncio +import pathlib +from uuid import UUID + +import cognee +from cognee.shared.logging_utils import setup_logging, ERROR +from cognee.modules.data.methods.delete_dataset import delete_dataset +from cognee.modules.data.methods.get_dataset import get_dataset +from cognee.modules.users.methods import get_default_user + + +async def main(): + # Set data and system directory paths + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete") + ).resolve() + ) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete") + ).resolve() + ) + cognee.config.system_root_directory(cognee_directory_path) + + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + + # cognee knowledge graph will be created based on this text + text = """ + Natural language processing (NLP) is an interdisciplinary + subfield of computer science and information retrieval. + """ + + # Add the text, and make it available for cognify + await cognee.add(text, "nlp_dataset") + await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset") + + # Use LLMs and cognee to create knowledge graph + ret_val = await cognee.cognify() + user = await get_default_user() + + for val in ret_val: + dataset_id = str(val) + vector_db_path = os.path.join( + cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db" + ) + graph_db_path = os.path.join( + cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl" + ) + + # Check if databases are properly created and exist before deletion + assert os.path.exists(graph_db_path), "Graph database file not found." + assert os.path.exists(vector_db_path), "Vector database file not found." + + dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id)) + await delete_dataset(dataset) + + # Confirm databases have been deleted + assert not os.path.exists(graph_db_path), "Graph database file found." + assert not os.path.exists(vector_db_path), "Vector database file found." + + +if __name__ == "__main__": + logger = setup_logging(log_level=ERROR) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From 622f8fa79e459d4cec8000de0cbf704957405b05 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 15 Dec 2025 18:30:35 +0100 Subject: [PATCH 7/9] chore: introduces 1 file upload in ontology endpoint (#1899) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description This PR fixes the ontology upload endpoint by forcing 1 file upload at the time. Tests are adjusted in both server start and ontology endpoint unit test. API was tested. Do not merge it together with https://github.com/topoteretes/cognee/pull/1898 its either that or this one. ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. ## Summary by CodeRabbit * **API Changes** * Ontology upload now accepts exactly one file per request; field renamed from "descriptions" to "description" and validated as a plain string. * Stricter form validation and tighter 400/500 error handling for malformed submissions. * **Tests** * Tests converted to real HTTP-style interactions using a shared test client and dependency overrides. * Payloads now use plain string fields; added coverage for single-file constraints and specific error responses. * **Style** * Minor formatting cleanups with no functional impact. ✏️ Tip: You can customize this high-level summary in your review settings. --- .../ontologies/routers/get_ontology_router.py | 48 ++--- cognee/tests/test_cognee_server_start.py | 4 +- .../tests/unit/api/test_ontology_endpoint.py | 166 ++++++++---------- 3 files changed, 100 insertions(+), 118 deletions(-) diff --git a/cognee/api/v1/ontologies/routers/get_ontology_router.py b/cognee/api/v1/ontologies/routers/get_ontology_router.py index ee31c683f..77667d88d 100644 --- a/cognee/api/v1/ontologies/routers/get_ontology_router.py +++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException +from fastapi import APIRouter, File, Form, UploadFile, Depends, Request from fastapi.responses import JSONResponse from typing import Optional, List @@ -15,28 +15,25 @@ def get_ontology_router() -> APIRouter: @router.post("", response_model=dict) async def upload_ontology( + request: Request, ontology_key: str = Form(...), - ontology_file: List[UploadFile] = File(...), - descriptions: Optional[str] = Form(None), + ontology_file: UploadFile = File(...), + description: Optional[str] = Form(None), user: User = Depends(get_authenticated_user), ): """ - Upload ontology files with their respective keys for later use in cognify operations. - - Supports both single and multiple file uploads: - - Single file: ontology_key=["key"], ontology_file=[file] - - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2] + Upload a single ontology file for later use in cognify operations. ## Request Parameters - - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies - - **ontology_file** (List[UploadFile]): OWL format ontology files - - **descriptions** (Optional[str]): JSON array string of optional descriptions + - **ontology_key** (str): User-defined identifier for the ontology. + - **ontology_file** (UploadFile): Single OWL format ontology file + - **description** (Optional[str]): Optional description for the ontology. ## Response - Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps. + Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp. ## Error Codes - - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded + - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded - **500 Internal Server Error**: File system or processing errors """ send_telemetry( @@ -49,16 +46,22 @@ def get_ontology_router() -> APIRouter: ) try: - import json + # Enforce: exactly one uploaded file for "ontology_file" + form = await request.form() + uploaded_files = form.getlist("ontology_file") + if len(uploaded_files) != 1: + raise ValueError("Only one ontology_file is allowed") - ontology_keys = json.loads(ontology_key) - description_list = json.loads(descriptions) if descriptions else None + if ontology_key.strip().startswith(("[", "{")): + raise ValueError("ontology_key must be a string") + if description is not None and description.strip().startswith(("[", "{")): + raise ValueError("description must be a string") - if not isinstance(ontology_keys, list): - raise ValueError("ontology_key must be a JSON array") - - results = await ontology_service.upload_ontologies( - ontology_keys, ontology_file, user, description_list + result = await ontology_service.upload_ontology( + ontology_key=ontology_key, + file=ontology_file, + user=user, + description=description, ) return { @@ -70,10 +73,9 @@ def get_ontology_router() -> APIRouter: "uploaded_at": result.uploaded_at, "description": result.description, } - for result in results ] } - except (json.JSONDecodeError, ValueError) as e: + except ValueError as e: return JSONResponse(status_code=400, content={"error": str(e)}) except Exception as e: return JSONResponse(status_code=500, content={"error": str(e)}) diff --git a/cognee/tests/test_cognee_server_start.py b/cognee/tests/test_cognee_server_start.py index fece88240..a626088a3 100644 --- a/cognee/tests/test_cognee_server_start.py +++ b/cognee/tests/test_cognee_server_start.py @@ -148,8 +148,8 @@ class TestCogneeServerStart(unittest.TestCase): headers=headers, files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], data={ - "ontology_key": json.dumps([ontology_key]), - "description": json.dumps(["Test ontology"]), + "ontology_key": ontology_key, + "description": "Test ontology", }, ) self.assertEqual(ontology_response.status_code, 200) diff --git a/cognee/tests/unit/api/test_ontology_endpoint.py b/cognee/tests/unit/api/test_ontology_endpoint.py index af3a4d90e..e072ceda8 100644 --- a/cognee/tests/unit/api/test_ontology_endpoint.py +++ b/cognee/tests/unit/api/test_ontology_endpoint.py @@ -1,17 +1,28 @@ import pytest import uuid from fastapi.testclient import TestClient -from unittest.mock import patch, Mock, AsyncMock +from unittest.mock import Mock from types import SimpleNamespace -import importlib from cognee.api.client import app +from cognee.modules.users.methods import get_authenticated_user -gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user") + +@pytest.fixture(scope="session") +def test_client(): + # Keep a single TestClient (and event loop) for the whole module. + # Re-creating TestClient repeatedly can break async DB connections (asyncpg loop mismatch). + with TestClient(app) as c: + yield c @pytest.fixture -def client(): - return TestClient(app) +def client(test_client, mock_default_user): + async def override_get_authenticated_user(): + return mock_default_user + + app.dependency_overrides[get_authenticated_user] = override_get_authenticated_user + yield test_client + app.dependency_overrides.pop(get_authenticated_user, None) @pytest.fixture @@ -32,12 +43,8 @@ def mock_default_user(): ) -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_upload_ontology_success(mock_get_default_user, client, mock_default_user): +def test_upload_ontology_success(client): """Test successful ontology upload""" - import json - - mock_get_default_user.return_value = mock_default_user ontology_content = ( b"" ) @@ -46,7 +53,7 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use response = client.post( "/api/v1/ontologies", files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], - data={"ontology_key": json.dumps([unique_key]), "description": json.dumps(["Test"])}, + data={"ontology_key": unique_key, "description": "Test"}, ) assert response.status_code == 200 @@ -55,10 +62,8 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use assert "uploaded_at" in data["uploaded_ontologies"][0] -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_default_user): +def test_upload_ontology_invalid_file(client): """Test 400 response for non-.owl files""" - mock_get_default_user.return_value = mock_default_user unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" response = client.post( "/api/v1/ontologies", @@ -68,14 +73,10 @@ def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_defaul assert response.status_code == 400 -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user): +def test_upload_ontology_missing_data(client): """Test 400 response for missing file or key""" - import json - - mock_get_default_user.return_value = mock_default_user # Missing file - response = client.post("/api/v1/ontologies", data={"ontology_key": json.dumps(["test"])}) + response = client.post("/api/v1/ontologies", data={"ontology_key": "test"}) assert response.status_code == 400 # Missing key @@ -85,34 +86,25 @@ def test_upload_ontology_missing_data(mock_get_default_user, client, mock_defaul assert response.status_code == 400 -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user): - """Test behavior when default user is provided (no explicit authentication)""" - import json - +def test_upload_ontology_without_auth_header(client): + """Test behavior when no explicit authentication header is provided.""" unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" - mock_get_default_user.return_value = mock_default_user response = client.post( "/api/v1/ontologies", files=[("ontology_file", ("test.owl", b"", "application/xml"))], - data={"ontology_key": json.dumps([unique_key])}, + data={"ontology_key": unique_key}, ) - # The current system provides a default user when no explicit authentication is given - # This test verifies the system works with conditional authentication assert response.status_code == 200 data = response.json() assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key assert "uploaded_at" in data["uploaded_ontologies"][0] -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user): - """Test uploading multiple ontology files in single request""" +def test_upload_multiple_ontologies_in_single_request_is_rejected(client): + """Uploading multiple ontology files in a single request should fail.""" import io - mock_get_default_user.return_value = mock_default_user - # Create mock files file1_content = b"" file2_content = b"" @@ -120,45 +112,34 @@ def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_ ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")), ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")), ] - data = { - "ontology_key": '["vehicles", "manufacturers"]', - "descriptions": '["Base vehicles", "Car manufacturers"]', - } + data = {"ontology_key": "vehicles", "description": "Base vehicles"} response = client.post("/api/v1/ontologies", files=files, data=data) - assert response.status_code == 200 - result = response.json() - assert "uploaded_ontologies" in result - assert len(result["uploaded_ontologies"]) == 2 - assert result["uploaded_ontologies"][0]["ontology_key"] == "vehicles" - assert result["uploaded_ontologies"][1]["ontology_key"] == "manufacturers" + assert response.status_code == 400 + assert "Only one ontology_file is allowed" in response.json()["error"] -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user): - """Test that upload endpoint accepts array parameters""" +def test_upload_endpoint_rejects_array_style_fields(client): + """Array-style form values should be rejected (no backwards compatibility).""" import io import json - mock_get_default_user.return_value = mock_default_user file_content = b"" files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))] data = { "ontology_key": json.dumps(["single_key"]), - "descriptions": json.dumps(["Single ontology"]), + "description": json.dumps(["Single ontology"]), } response = client.post("/api/v1/ontologies", files=files, data=data) - assert response.status_code == 200 - result = response.json() - assert result["uploaded_ontologies"][0]["ontology_key"] == "single_key" + assert response.status_code == 400 + assert "ontology_key must be a string" in response.json()["error"] -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user): +def test_cognify_with_multiple_ontologies(client): """Test cognify endpoint accepts multiple ontology keys""" payload = { "datasets": ["test_dataset"], @@ -172,14 +153,11 @@ def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_de assert response.status_code in [200, 400, 409] # May fail for other reasons, not type -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user): - """Test complete workflow: upload multiple ontologies → cognify with multiple keys""" +def test_complete_multifile_workflow(client): + """Test workflow: upload ontologies one-by-one → cognify with multiple keys""" import io - import json - mock_get_default_user.return_value = mock_default_user - # Step 1: Upload multiple ontologies + # Step 1: Upload two ontologies (one-by-one) file1_content = b""" @@ -192,17 +170,21 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default """ - files = [ - ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")), - ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")), - ] - data = { - "ontology_key": json.dumps(["vehicles", "manufacturers"]), - "descriptions": json.dumps(["Vehicle ontology", "Manufacturer ontology"]), - } + upload_response_1 = client.post( + "/api/v1/ontologies", + files=[("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml"))], + data={"ontology_key": "vehicles", "description": "Vehicle ontology"}, + ) + assert upload_response_1.status_code == 200 - upload_response = client.post("/api/v1/ontologies", files=files, data=data) - assert upload_response.status_code == 200 + upload_response_2 = client.post( + "/api/v1/ontologies", + files=[ + ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")) + ], + data={"ontology_key": "manufacturers", "description": "Manufacturer ontology"}, + ) + assert upload_response_2.status_code == 200 # Step 2: Verify ontologies are listed list_response = client.get("/api/v1/ontologies") @@ -223,44 +205,42 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default assert cognify_response.status_code != 400 # Not a validation error -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_multifile_error_handling(mock_get_default_user, client, mock_default_user): - """Test error handling for invalid multifile uploads""" +def test_upload_error_handling(client): + """Test error handling for invalid uploads (single-file endpoint).""" import io import json - # Test mismatched array lengths + # Array-style key should be rejected file_content = b"" files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))] data = { - "ontology_key": json.dumps(["key1", "key2"]), # 2 keys, 1 file - "descriptions": json.dumps(["desc1"]), + "ontology_key": json.dumps(["key1", "key2"]), + "description": "desc1", } response = client.post("/api/v1/ontologies", files=files, data=data) assert response.status_code == 400 - assert "Number of keys must match number of files" in response.json()["error"] + assert "ontology_key must be a string" in response.json()["error"] - # Test duplicate keys - files = [ - ("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml")), - ("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml")), - ] - data = { - "ontology_key": json.dumps(["duplicate", "duplicate"]), - "descriptions": json.dumps(["desc1", "desc2"]), - } + # Duplicate key should be rejected + response_1 = client.post( + "/api/v1/ontologies", + files=[("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml"))], + data={"ontology_key": "duplicate", "description": "desc1"}, + ) + assert response_1.status_code == 200 - response = client.post("/api/v1/ontologies", files=files, data=data) - assert response.status_code == 400 - assert "Duplicate ontology keys not allowed" in response.json()["error"] + response_2 = client.post( + "/api/v1/ontologies", + files=[("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml"))], + data={"ontology_key": "duplicate", "description": "desc2"}, + ) + assert response_2.status_code == 400 + assert "already exists" in response_2.json()["error"] -@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user): +def test_cognify_missing_ontology_key(client): """Test cognify with non-existent ontology key""" - mock_get_default_user.return_value = mock_default_user - payload = { "datasets": ["test_dataset"], "ontology_key": ["nonexistent_key"], From 67af8a7cb46f65c0075b0af5ea35f0607f026b9d Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Mon, 15 Dec 2025 18:36:15 +0100 Subject: [PATCH 8/9] Bump version from 0.5.0.dev0 to 0.5.0.dev1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8e4ed8a0d..cf2081d0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "cognee" -version = "0.5.0.dev0" +version = "0.5.0.dev1" description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." authors = [ { name = "Vasilije Markovic" }, From 78028b819f0b9293ec60b5894c8e7155284c5fcd Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Mon, 15 Dec 2025 18:42:02 +0100 Subject: [PATCH 9/9] update dev uv.lock --- uv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uv.lock b/uv.lock index fccab8c40..884fb63be 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10, <3.14" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation != 'PyPy' and sys_platform == 'darwin'", @@ -946,7 +946,7 @@ wheels = [ [[package]] name = "cognee" -version = "0.5.0.dev0" +version = "0.5.0.dev1" source = { editable = "." } dependencies = [ { name = "aiofiles" },