From 9ce27f2d0120de05ea59fd6ffa8a2f7d6e22f862 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 16 Sep 2025 14:45:29 +0200 Subject: [PATCH 1/6] fix: Resolve issue with wrong error message serialization for cognify pipeline in backend --- cognee/api/v1/cognify/routers/get_cognify_router.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index d40345f8e..9e4bdbbfd 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -3,6 +3,7 @@ import asyncio from uuid import UUID from pydantic import Field from typing import List, Optional +from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION @@ -119,7 +120,7 @@ def get_cognify_router() -> APIRouter: # If any cognify run errored return JSONResponse with proper error status code if any(isinstance(v, PipelineRunErrored) for v in cognify_run.values()): - return JSONResponse(status_code=420, content=cognify_run) + return JSONResponse(status_code=420, content=jsonable_encoder(cognify_run)) return cognify_run except Exception as error: return JSONResponse(status_code=409, content={"error": str(error)}) From e849e125676e5ff8db5a52656ffede153e5ab41d Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:56:35 +0200 Subject: [PATCH 2/6] refactor: make cypher query optional (#1418) ## Description Make Cypher search optional ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [x] Other (please specify): Security upgrade ## Changes Made Made cypher search optional for security reasons ## Testing Tried running cypher queries with feature turned on and off ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues ## Additional Notes ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .env.template | 3 +++ cognee/modules/search/methods/get_search_type_tools.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/.env.template b/.env.template index e9e9fb571..916a1ef76 100644 --- a/.env.template +++ b/.env.template @@ -121,6 +121,9 @@ ACCEPT_LOCAL_FILE_PATH=True # This protects against Server Side Request Forgery when proper infrastructure is not in place. ALLOW_HTTP_REQUESTS=True +# When set to false don't allow cypher search to be used in Cognee. +ALLOW_CYPHER_QUERY=True + # When set to False errors during data processing will be returned as info but not raised to allow handling of faulty documents RAISE_INCREMENTAL_LOADING_ERRORS=True diff --git a/cognee/modules/search/methods/get_search_type_tools.py b/cognee/modules/search/methods/get_search_type_tools.py index e671a7db3..551f77a16 100644 --- a/cognee/modules/search/methods/get_search_type_tools.py +++ b/cognee/modules/search/methods/get_search_type_tools.py @@ -1,3 +1,4 @@ +import os from typing import Callable, List, Optional, Type from cognee.modules.engine.models.node_set import NodeSet @@ -160,6 +161,12 @@ async def get_search_type_tools( if query_type is SearchType.FEELING_LUCKY: query_type = await select_search_type(query_text) + if ( + query_type in [SearchType.CYPHER, SearchType.NATURAL_LANGUAGE] + and os.getenv("ALLOW_CYPHER_QUERY", "true").lower() == "false" + ): + raise UnsupportedSearchTypeError("Cypher query search types are disabled.") + search_type_tools = search_tasks.get(query_type) if not search_type_tools: From f016bc6d367ca5380c55d6a11eccf987e6cc514c Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:59:24 +0200 Subject: [PATCH 3/6] fix: Resolve issue with server start test (#1408) ## Description ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made - - - ## Testing ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues ## Additional Notes ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/tests/test_cognee_server_start.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/tests/test_cognee_server_start.py b/cognee/tests/test_cognee_server_start.py index 1681b7867..40ae96548 100644 --- a/cognee/tests/test_cognee_server_start.py +++ b/cognee/tests/test_cognee_server_start.py @@ -48,7 +48,7 @@ class TestCogneeServerStart(unittest.TestCase): """Test that the server is running and can accept connections.""" # Test health endpoint health_response = requests.get("http://localhost:8000/health", timeout=15) - self.assertIn(health_response.status_code, [200, 503]) + self.assertIn(health_response.status_code, [200]) # Test root endpoint root_response = requests.get("http://localhost:8000/", timeout=15) @@ -88,7 +88,7 @@ class TestCogneeServerStart(unittest.TestCase): payload = {"datasets": [dataset_name]} add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50) - if add_response.status_code not in [200, 201, 409]: + if add_response.status_code not in [200, 201]: add_response.raise_for_status() # Cognify request @@ -99,7 +99,7 @@ class TestCogneeServerStart(unittest.TestCase): } cognify_response = requests.post(url, headers=headers, json=payload, timeout=150) - if cognify_response.status_code not in [200, 201, 409]: + if cognify_response.status_code not in [200, 201]: cognify_response.raise_for_status() # TODO: Add test to verify cognify pipeline is complete before testing search @@ -115,7 +115,7 @@ class TestCogneeServerStart(unittest.TestCase): payload = {"searchType": "GRAPH_COMPLETION", "query": "What's in the document?"} search_response = requests.post(url, headers=headers, json=payload, timeout=50) - if search_response.status_code not in [200, 201, 409]: + if search_response.status_code not in [200, 201]: search_response.raise_for_status() From f93b7686c3e59e0f6636d1dd8dda3325fa5e275a Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:14:05 +0200 Subject: [PATCH 4/6] fix: Resolve issue with Data object serialization (#1407) ## Description Enable Data object serialization when returning PipelineRunInfo type objects in backend ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made - - - ## Testing ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues ## Additional Notes ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/modules/pipelines/models/PipelineRunInfo.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cognee/modules/pipelines/models/PipelineRunInfo.py b/cognee/modules/pipelines/models/PipelineRunInfo.py index 5f5a91c34..2a1da34cc 100644 --- a/cognee/modules/pipelines/models/PipelineRunInfo.py +++ b/cognee/modules/pipelines/models/PipelineRunInfo.py @@ -1,6 +1,7 @@ -from typing import Any, Optional +from typing import Any, Optional, List, Union from uuid import UUID from pydantic import BaseModel +from cognee.modules.data.models.Data import Data class PipelineRunInfo(BaseModel): @@ -8,11 +9,15 @@ class PipelineRunInfo(BaseModel): pipeline_run_id: UUID dataset_id: UUID dataset_name: str - payload: Optional[Any] = None + # Data must be mentioned in typing to allow custom encoders for Data to be activated + payload: Optional[Union[Any, List[Data]]] = None data_ingestion_info: Optional[list] = None model_config = { "arbitrary_types_allowed": True, + "from_attributes": True, + # Add custom encoding handler for Data ORM model + "json_encoders": {Data: lambda d: d.to_json()}, } From 88770b1b111fe43d72f50e8c6507f65820b145f6 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:16:20 +0200 Subject: [PATCH 5/6] refactor: Use awaitable attrs for getting roles (#1405) ## Description Resolve issue with getting role objects for user ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made - - - ## Testing ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues ## Additional Notes ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. Co-authored-by: Boris --- .../infrastructure/databases/relational/ModelBase.py | 3 ++- .../methods/get_all_user_permission_datasets.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/databases/relational/ModelBase.py b/cognee/infrastructure/databases/relational/ModelBase.py index a4d3a1a19..3a2054207 100644 --- a/cognee/infrastructure/databases/relational/ModelBase.py +++ b/cognee/infrastructure/databases/relational/ModelBase.py @@ -1,7 +1,8 @@ from sqlalchemy.orm import DeclarativeBase +from sqlalchemy.ext.asyncio import AsyncAttrs -class Base(DeclarativeBase): +class Base(AsyncAttrs, DeclarativeBase): """ Represents a base class for declarative models using SQLAlchemy. diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index 5b242baa4..a8731a773 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -1,3 +1,5 @@ +from types import SimpleNamespace + from cognee.shared.logging_utils import get_logger from ...models.User import User @@ -17,9 +19,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # Get all datasets all tenants have access to tenant = await get_tenant(user.tenant_id) datasets.extend(await get_principal_datasets(tenant, permission_type)) + # Get all datasets Users roles have access to - for role_name in user.roles: - role = await get_role(user.tenant_id, role_name) + if isinstance(user, SimpleNamespace): + # If simple namespace use roles defined in user + roles = user.roles + else: + roles = await user.awaitable_attrs.roles + for role in roles: datasets.extend(await get_principal_datasets(role, permission_type)) # Deduplicate datasets with same ID From c01e78240ac466e15a0e2b07c4e48a51c5a981ad Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:00:06 +0200 Subject: [PATCH 6/6] fix: Resolve issue with file path name [COG-2937] (#1411) ## Description Resolves issue with file path handling of some uploaded files in Cognee ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Testing Uploaded file that had issue and was successfully resolved ## Pre-submission Checklist - [ x] **I have tested my changes thoroughly before submitting this PR** - [ x] **This PR contains minimal changes necessary to address the issue/feature** - [ x] My code follows the project's coding standards and style guidelines - [ x] I have added tests that prove my fix is effective or that my feature works - [ x] I have added necessary documentation (if applicable) - [ x] All new and existing tests pass - [ x] I have searched existing PRs to ensure this change hasn't been submitted already - [ x] I have linked any relevant issues in the description - [ x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. Co-authored-by: Boris --- cognee/infrastructure/files/utils/get_file_metadata.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cognee/infrastructure/files/utils/get_file_metadata.py b/cognee/infrastructure/files/utils/get_file_metadata.py index 1eb7a1f79..23b10a6df 100644 --- a/cognee/infrastructure/files/utils/get_file_metadata.py +++ b/cognee/infrastructure/files/utils/get_file_metadata.py @@ -56,7 +56,12 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata: file_type = guess_file_type(file) file_path = getattr(file, "name", None) or getattr(file, "full_name", None) - file_name = Path(file_path).stem if file_path else None + + if isinstance(file_path, str): + file_name = Path(file_path).stem if file_path else None + else: + # In case file_path does not exist or is a integer return None + file_name = None # Get file size pos = file.tell() # remember current pointer