From 30df102656bdaacee4c86359d4b09c0cbe0eb6d7 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 17 Sep 2025 10:42:11 +0200 Subject: [PATCH 01/37] docs: Add docstrings for permission related functions. --- .../data/methods/create_authorized_dataset.py | 9 +++++++++ .../modules/data/methods/get_authorized_dataset.py | 2 +- .../data/methods/get_authorized_dataset_by_name.py | 11 +++++++++++ .../layers/resolve_authorized_user_dataset.py | 13 +++++++++++++ .../layers/resolve_authorized_user_datasets.py | 2 +- .../authorized_give_permission_on_datasets.py | 12 ++++++++++++ .../methods/check_permission_on_dataset.py | 11 +++++++++++ .../methods/get_all_user_permission_datasets.py | 10 ++++++++++ .../methods/get_document_ids_for_user.py | 10 ++++++++++ .../users/permissions/methods/get_principal.py | 9 +++++++++ .../permissions/methods/get_principal_datasets.py | 11 +++++++++++ .../modules/users/permissions/methods/get_role.py | 10 ++++++++++ .../get_specific_user_permission_datasets.py | 6 +++--- .../modules/users/permissions/methods/get_tenant.py | 9 +++++++++ .../methods/give_default_permission_to_role.py | 9 +++++++++ .../methods/give_default_permission_to_tenant.py | 9 +++++++++ .../methods/give_default_permission_to_user.py | 9 +++++++++ .../methods/give_permission_on_dataset.py | 10 ++++++++++ .../modules/users/roles/methods/add_user_to_role.py | 11 +++++++++++ cognee/modules/users/roles/methods/create_role.py | 10 ++++++++++ .../users/tenants/methods/add_user_to_tenant.py | 12 ++++++++++++ .../modules/users/tenants/methods/create_tenant.py | 10 ++++++++++ 22 files changed, 200 insertions(+), 5 deletions(-) diff --git a/cognee/modules/data/methods/create_authorized_dataset.py b/cognee/modules/data/methods/create_authorized_dataset.py index e43381b35..08057a6bd 100644 --- a/cognee/modules/data/methods/create_authorized_dataset.py +++ b/cognee/modules/data/methods/create_authorized_dataset.py @@ -6,6 +6,15 @@ from .create_dataset import create_dataset async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset: + """ + Create a new dataset and give all permissions on this dataset to the given user. + Args: + dataset_name: Name of the dataset. + user: The user object. + + Returns: + Dataset: The new authorized dataset. + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/data/methods/get_authorized_dataset.py b/cognee/modules/data/methods/get_authorized_dataset.py index 0e30b7e0e..6c97322c8 100644 --- a/cognee/modules/data/methods/get_authorized_dataset.py +++ b/cognee/modules/data/methods/get_authorized_dataset.py @@ -15,7 +15,7 @@ async def get_authorized_dataset( Get a specific dataset with permissions for a user. Args: - user_id (UUID): user id + user: User object dataset_id (UUID): dataset id permission_type (str): permission type(read, write, delete, share), default is read diff --git a/cognee/modules/data/methods/get_authorized_dataset_by_name.py b/cognee/modules/data/methods/get_authorized_dataset_by_name.py index 654dcb630..5dc1d86a0 100644 --- a/cognee/modules/data/methods/get_authorized_dataset_by_name.py +++ b/cognee/modules/data/methods/get_authorized_dataset_by_name.py @@ -11,6 +11,17 @@ from ..models import Dataset async def get_authorized_dataset_by_name( dataset_name: str, user: User, permission_type: str ) -> Optional[Dataset]: + """ + Get a specific dataset with the given name, with permissions for a given user. + + Args: + dataset_name: Name of the dataset. + user: User object. + permission_type (str): permission type(read, write, delete, share), default is read + + Returns: + Optional[Dataset]: dataset with permissions + """ authorized_datasets = await get_authorized_existing_datasets([], permission_type, user) return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None) diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py index 30d0fef71..e135b8351 100644 --- a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +++ b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py @@ -11,6 +11,19 @@ from cognee.modules.data.methods import ( async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User): + """ + Function handles creation and dataset authorization if dataset already exist for Cognee. + Verifies that provided user has necessary permission for provided Dataset. + If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset. + + Args: + dataset_id: Id of the dataset. + dataset_name: Name of the dataset. + user: Cognee User request is being processed for, if None default user will be used. + + Returns: + Tuple[User, Dataset]: A tuple containing the user and the authorized dataset. + """ if not user: user = await get_default_user() diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py index 4f6fb8254..f91064995 100644 --- a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +++ b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py @@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets( datasets: Dataset names or Dataset UUID (in case Datasets already exist) Returns: - + Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets. """ # If no user is provided use default user if user is None: diff --git a/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py b/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py index d8a3777b7..7960eb756 100644 --- a/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +++ b/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py @@ -9,6 +9,18 @@ from uuid import UUID async def authorized_give_permission_on_datasets( principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID ): + """ + Give permission to certain datasets to a user. + The request owner must have the necessary permission to share the datasets. + Args: + principal_id: Id of user to whom datasets are shared + dataset_ids: Ids of datasets to share + permission_name: Name of permission to give + owner_id: Id of the request owner + + Returns: + None + """ # If only a single dataset UUID is provided transform it to a list if not isinstance(dataset_ids, list): dataset_ids = [dataset_ids] diff --git a/cognee/modules/users/permissions/methods/check_permission_on_dataset.py b/cognee/modules/users/permissions/methods/check_permission_on_dataset.py index 467da7154..d489417e0 100644 --- a/cognee/modules/users/permissions/methods/check_permission_on_dataset.py +++ b/cognee/modules/users/permissions/methods/check_permission_on_dataset.py @@ -10,6 +10,17 @@ logger = get_logger() async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID): + """ + Check if a user has a specific permission on a dataset. + Args: + user: User whose permission is checked + permission_type: Type of permission to check + dataset_id: Id of the dataset + + Returns: + None + + """ if user is None: user = await get_default_user() diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index 5b242baa4..a2a2b5fdd 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -9,6 +9,16 @@ logger = get_logger() async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]: + """ + Return a list of datasets the user has permission for. + If the user is part of a tenant, return datasets his roles have permission for. + Args: + user + permission_type + + Returns: + list[Dataset]: List of datasets user has permission for + """ datasets = list() # Get all datasets User has explicit access to datasets.extend(await get_principal_datasets(user, permission_type)) diff --git a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py index 3b053d8e7..9b1db024e 100644 --- a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py +++ b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py @@ -8,6 +8,16 @@ from ...models import ACL, Permission async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]: + """ + Return a list of documents ids for which the user has read permission. + If datasets are specified, return only documents from those datasets. + Args: + user_id: Id of the user + datasets: List of datasets + + Returns: + list[str]: List of documents for which the user has read permission + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/permissions/methods/get_principal.py b/cognee/modules/users/permissions/methods/get_principal.py index 53d39651a..245190cf8 100644 --- a/cognee/modules/users/permissions/methods/get_principal.py +++ b/cognee/modules/users/permissions/methods/get_principal.py @@ -6,6 +6,15 @@ from ...models.Principal import Principal async def get_principal(principal_id: UUID): + """ + Return information about a user based on their id + Args: + principal_id: Id of the user + + Returns: + principal: Information about the user (principal) + + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/permissions/methods/get_principal_datasets.py b/cognee/modules/users/permissions/methods/get_principal_datasets.py index b2385182f..a9adb8f00 100644 --- a/cognee/modules/users/permissions/methods/get_principal_datasets.py +++ b/cognee/modules/users/permissions/methods/get_principal_datasets.py @@ -9,6 +9,17 @@ from ...models.ACL import ACL async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]: + """ + Return a list of datasets for which the user (principal) has a certain permission. + Args: + principal: Information about the user + permission_type: Type of permission + + Returns: + list[Dataset]: List of datasets for which the user (principal) + has the permission (permission_type). + + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/permissions/methods/get_role.py b/cognee/modules/users/permissions/methods/get_role.py index 007044c43..a703fc9f9 100644 --- a/cognee/modules/users/permissions/methods/get_role.py +++ b/cognee/modules/users/permissions/methods/get_role.py @@ -9,6 +9,16 @@ from ...models.Role import Role async def get_role(tenant_id: UUID, role_name: str): + """ + Return the role with the name role_name of the given tenant. + Args: + tenant_id: Id of the given tenant + role_name: Name of the role + + Returns + The role for the given tenant. + + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py index b6ad1291d..8dee4d782 100644 --- a/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py @@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets( Return a list of datasets user has given permission for. If a list of datasets is provided, verify for which datasets user has appropriate permission for and return list of datasets he has permission for. Args: - user_id: - permission_type: - dataset_ids: + user_id: Id of the user. + permission_type: Type of the permission. + dataset_ids: Ids of the provided datasets Returns: list[Dataset]: List of datasets user has permission for diff --git a/cognee/modules/users/permissions/methods/get_tenant.py b/cognee/modules/users/permissions/methods/get_tenant.py index c5bf1a633..832ff71b8 100644 --- a/cognee/modules/users/permissions/methods/get_tenant.py +++ b/cognee/modules/users/permissions/methods/get_tenant.py @@ -8,6 +8,15 @@ from ...models.Tenant import Tenant async def get_tenant(tenant_id: UUID): + """ + Return information about the tenant based on the given id. + Args: + tenant_id: Id of the given tenant + + Returns + Information about the given tenant. + + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/permissions/methods/give_default_permission_to_role.py b/cognee/modules/users/permissions/methods/give_default_permission_to_role.py index bf3b6a9c7..9d9b41c1b 100644 --- a/cognee/modules/users/permissions/methods/give_default_permission_to_role.py +++ b/cognee/modules/users/permissions/methods/give_default_permission_to_role.py @@ -16,6 +16,15 @@ from cognee.modules.users.models import ( async def give_default_permission_to_role(role_id: UUID, permission_name: str): + """ + Give the permission with given name to the role with the given id as a default permission. + Args: + role_id: Id of the role + permission_name: Name of the permission + + Returns: + None + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py b/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py index 57049ae2e..7baa8c244 100644 --- a/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +++ b/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py @@ -16,6 +16,15 @@ from cognee.modules.users.models import ( async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str): + """ + Give the permission with given name to the tenant with the given id as a default permission. + Args: + tenant_id: Id of the tenant + permission_name: Name of the permission + + Returns: + None + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: tenant = ( diff --git a/cognee/modules/users/permissions/methods/give_default_permission_to_user.py b/cognee/modules/users/permissions/methods/give_default_permission_to_user.py index 40913ff12..545122fd0 100644 --- a/cognee/modules/users/permissions/methods/give_default_permission_to_user.py +++ b/cognee/modules/users/permissions/methods/give_default_permission_to_user.py @@ -16,6 +16,15 @@ from cognee.modules.users.models import ( async def give_default_permission_to_user(user_id: UUID, permission_name: str): + """ + Give the permission with given name to the user with the given id as a default permission. + Args: + user_id: Id of the tenant + permission_name: Name of the permission + + Returns: + None + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: user = (await session.execute(select(User).where(User.id == user_id))).scalars().first() diff --git a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py index 0ed536981..6d0272192 100644 --- a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py +++ b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py @@ -24,6 +24,16 @@ async def give_permission_on_dataset( dataset_id: UUID, permission_name: str, ): + """ + Give a specific permission on a dataset to a user. + Args: + principal: User who is being given the permission on the dataset + dataset_id: Id of the dataset + permission_name: Name of permission to give + + Returns: + None + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: diff --git a/cognee/modules/users/roles/methods/add_user_to_role.py b/cognee/modules/users/roles/methods/add_user_to_role.py index c6d8fdb63..de5e47775 100644 --- a/cognee/modules/users/roles/methods/add_user_to_role.py +++ b/cognee/modules/users/roles/methods/add_user_to_role.py @@ -21,6 +21,17 @@ from cognee.modules.users.models import ( async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID): + """ + Add a user with the given id to the role with the given id. + Args: + user_id: Id of the user. + role_id: Id of the role. + owner_id: Id of the request owner. + + Returns: + None + + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: user = (await session.execute(select(User).where(User.id == user_id))).scalars().first() diff --git a/cognee/modules/users/roles/methods/create_role.py b/cognee/modules/users/roles/methods/create_role.py index 897c42394..bdba4ad31 100644 --- a/cognee/modules/users/roles/methods/create_role.py +++ b/cognee/modules/users/roles/methods/create_role.py @@ -16,6 +16,16 @@ async def create_role( role_name: str, owner_id: UUID, ): + """ + Create a new role with the given name, if the request owner with the given id + has the necessary permission. + Args: + role_name: Name of the new role. + owner_id: Id of the request owner. + + Returns: + None + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: user = await get_user(owner_id) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index cf0ad0535..1374067a7 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import ( async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID): + """ + Add a user with the given id to the tenant with the given id. + This can only be successful if the request owner with the given id is the tenant owner. + Args: + user_id: Id of the user. + tenant_id: Id of the tenant. + owner_id: Id of the request owner. + + Returns: + None + + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: user = await get_user(user_id) diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index 5d68e8110..bd8abadd1 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -8,6 +8,16 @@ from cognee.modules.users.methods import get_user async def create_tenant(tenant_name: str, user_id: UUID): + """ + Create a new tenant with the given name, for the user with the given id. + This user is the owner of the tenant. + Args: + tenant_name: Name of the new tenant. + user_id: Id of the user. + + Returns: + None + """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: try: From 293a0e0053759686cd519e061a18e92e21ce32b7 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 17 Sep 2025 10:45:36 +0200 Subject: [PATCH 02/37] Fix formatiing --- .../embeddings/OllamaEmbeddingEngine.py | 6 +----- .../methods/get_authorized_dataset_by_name.py | 16 +++++++-------- .../layers/resolve_authorized_user_dataset.py | 20 +++++++++---------- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py index 29c57ed2e..3ecc7dbe8 100644 --- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py @@ -94,11 +94,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine): """ Internal method to call the Ollama embeddings endpoint for a single prompt. """ - payload = { - "model": self.model, - "prompt": prompt, - "input": prompt - } + payload = {"model": self.model, "prompt": prompt, "input": prompt} headers = {} api_key = os.getenv("LLM_API_KEY") if api_key: diff --git a/cognee/modules/data/methods/get_authorized_dataset_by_name.py b/cognee/modules/data/methods/get_authorized_dataset_by_name.py index 5dc1d86a0..ad50e25e9 100644 --- a/cognee/modules/data/methods/get_authorized_dataset_by_name.py +++ b/cognee/modules/data/methods/get_authorized_dataset_by_name.py @@ -12,16 +12,16 @@ async def get_authorized_dataset_by_name( dataset_name: str, user: User, permission_type: str ) -> Optional[Dataset]: """ - Get a specific dataset with the given name, with permissions for a given user. + Get a specific dataset with the given name, with permissions for a given user. - Args: - dataset_name: Name of the dataset. - user: User object. - permission_type (str): permission type(read, write, delete, share), default is read + Args: + dataset_name: Name of the dataset. + user: User object. + permission_type (str): permission type(read, write, delete, share), default is read - Returns: - Optional[Dataset]: dataset with permissions - """ + Returns: + Optional[Dataset]: dataset with permissions + """ authorized_datasets = await get_authorized_existing_datasets([], permission_type, user) return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None) diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py index e135b8351..7e3d1c124 100644 --- a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +++ b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py @@ -12,18 +12,18 @@ from cognee.modules.data.methods import ( async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User): """ - Function handles creation and dataset authorization if dataset already exist for Cognee. - Verifies that provided user has necessary permission for provided Dataset. - If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset. + Function handles creation and dataset authorization if dataset already exist for Cognee. + Verifies that provided user has necessary permission for provided Dataset. + If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset. - Args: - dataset_id: Id of the dataset. - dataset_name: Name of the dataset. - user: Cognee User request is being processed for, if None default user will be used. + Args: + dataset_id: Id of the dataset. + dataset_name: Name of the dataset. + user: Cognee User request is being processed for, if None default user will be used. - Returns: - Tuple[User, Dataset]: A tuple containing the user and the authorized dataset. - """ + Returns: + Tuple[User, Dataset]: A tuple containing the user and the authorized dataset. + """ if not user: user = await get_default_user() From 475749b8decb2b20d7ebd3440c4af2bf5e1b961c Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 17 Sep 2025 11:47:37 +0200 Subject: [PATCH 03/37] docs: Updated some docs, not a lot was necessary --- cognee/modules/graph/utils/retrieve_existing_edges.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cognee/modules/graph/utils/retrieve_existing_edges.py b/cognee/modules/graph/utils/retrieve_existing_edges.py index 20cb30a26..f0aefacd4 100644 --- a/cognee/modules/graph/utils/retrieve_existing_edges.py +++ b/cognee/modules/graph/utils/retrieve_existing_edges.py @@ -23,8 +23,6 @@ async def retrieve_existing_edges( chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each data chunk. Each graph contains nodes (entities) and edges (relationships) that were extracted from the chunk content. - graph_engine (GraphDBInterface): Interface to the graph database that will be queried - to check for existing edges. Must implement the has_edges() method. Returns: dict[str, bool]: A mapping of edge keys to boolean values indicating existence. From f651991c86d6fdc9dce6362100c3719dc9c2f5f6 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:02:38 +0200 Subject: [PATCH 04/37] feat: adds base class + renames rdflib implementation --- cognee/api/v1/cognify/cognify.py | 4 +-- .../get_default_tasks_by_indices.py | 4 +-- .../utils/expand_with_nodes_and_edges.py | 16 ++++----- .../ontology/base_ontology_resolver.py | 30 ++++++++++++++++ cognee/modules/ontology/models.py | 20 +++++++++++ .../ontology/rdf_xml/OntologyResolver.py | 35 +++++++------------ cognee/tasks/graph/extract_graph_from_data.py | 6 ++-- .../tasks/graph/extract_graph_from_data_v2.py | 4 +-- .../modules/ontology/test_ontology_adapter.py | 21 +++++------ 9 files changed, 90 insertions(+), 50 deletions(-) create mode 100644 cognee/modules/ontology/base_ontology_resolver.py create mode 100644 cognee/modules/ontology/models.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index e4f91b44c..a35658691 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -230,7 +230,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_adapter=OntologyResolver(ontology_file=ontology_file_path), + ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path), custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index be532232f..677090a58 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver async def get_default_tasks_by_indices( @@ -33,7 +33,7 @@ async def get_no_summary_tasks( # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) - ontology_adapter = OntologyResolver(ontology_file=ontology_file_path) + ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path) graph_task = Task( extract_graph_from_data, diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 125f59e72..3bd62e6e0 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -8,7 +8,7 @@ from cognee.modules.engine.utils import ( generate_node_name, ) from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver def _create_node_key(node_id: str, category: str) -> str: @@ -83,7 +83,7 @@ def _process_ontology_edges( def _create_type_node( node_type: str, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -141,7 +141,7 @@ def _create_entity_node( node_name: str, node_description: str, type_node: EntityType, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -198,7 +198,7 @@ def _create_entity_node( def _process_graph_nodes( data_chunk: DocumentChunk, graph: KnowledgeGraph, - ontology_resolver: OntologyResolver, + ontology_resolver: RDFLibOntologyResolver, added_nodes_map: dict, added_ontology_nodes_map: dict, name_mapping: dict, @@ -277,7 +277,7 @@ def _process_graph_edges( def expand_with_nodes_and_edges( data_chunks: list[DocumentChunk], chunk_graphs: list[KnowledgeGraph], - ontology_resolver: OntologyResolver = None, + ontology_resolver: RDFLibOntologyResolver = None, existing_edges_map: Optional[dict[str, bool]] = None, ): """ @@ -296,8 +296,8 @@ def expand_with_nodes_and_edges( chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each data chunk. Each graph contains nodes (entities) and edges (relationships) extracted from the chunk content. - ontology_resolver (OntologyResolver, optional): Resolver for validating entities and - types against an ontology. If None, a default OntologyResolver is created. + ontology_resolver (RDFLibOntologyResolver, optional): Resolver for validating entities and + types against an ontology. If None, a default RDFLibOntologyResolver is created. Defaults to None. existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}". @@ -320,7 +320,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = OntologyResolver() + ontology_resolver = RDFLibOntologyResolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py new file mode 100644 index 000000000..55826bfb0 --- /dev/null +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod +from typing import List, Tuple, Optional + +from cognee.modules.ontology.models import AttachedOntologyNode + + +class BaseOntologyResolver(ABC): + """Abstract base class for ontology resolvers.""" + + @abstractmethod + def build_lookup(self) -> None: + """Build the lookup dictionary for ontology entities.""" + pass + + @abstractmethod + def refresh_lookup(self) -> None: + """Refresh the lookup dictionary.""" + pass + + @abstractmethod + def find_closest_match(self, name: str, category: str) -> Optional[str]: + """Find the closest match for a given name in the specified category.""" + pass + + @abstractmethod + def get_subgraph( + self, node_name: str, node_type: str = "individuals", directed: bool = True + ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + """Get a subgraph for the given node.""" + pass diff --git a/cognee/modules/ontology/models.py b/cognee/modules/ontology/models.py new file mode 100644 index 000000000..eefa9e5dd --- /dev/null +++ b/cognee/modules/ontology/models.py @@ -0,0 +1,20 @@ +from typing import Any + + +class AttachedOntologyNode: + """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" + + def __init__(self, uri: Any, category: str): + self.uri = uri + self.name = self._extract_name(uri) + self.category = category + + @staticmethod + def _extract_name(uri: Any) -> str: + uri_str = str(uri) + if "#" in uri_str: + return uri_str.split("#")[-1] + return uri_str.rstrip("/").split("/")[-1] + + def __repr__(self): + return f"AttachedOntologyNode(name={self.name}, category={self.category})" diff --git a/cognee/modules/ontology/rdf_xml/OntologyResolver.py b/cognee/modules/ontology/rdf_xml/OntologyResolver.py index 7f3fa004d..3c1a55b5a 100644 --- a/cognee/modules/ontology/rdf_xml/OntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/OntologyResolver.py @@ -10,30 +10,19 @@ from cognee.modules.ontology.exceptions import ( FindClosestMatchError, GetSubgraphError, ) +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.models import AttachedOntologyNode logger = get_logger("OntologyAdapter") -class AttachedOntologyNode: - """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface.""" - - def __init__(self, uri: URIRef, category: str): - self.uri = uri - self.name = self._extract_name(uri) - self.category = category - - @staticmethod - def _extract_name(uri: URIRef) -> str: - uri_str = str(uri) - if "#" in uri_str: - return uri_str.split("#")[-1] - return uri_str.rstrip("/").split("/")[-1] - - def __repr__(self): - return f"AttachedOntologyNode(name={self.name}, category={self.category})" - - -class OntologyResolver: +class RDFLibOntologyResolver(BaseOntologyResolver): + """RDFLib-based ontology resolver implementation. + + This implementation uses RDFLib to parse and work with RDF/OWL ontology files. + It provides fuzzy matching and subgraph extraction capabilities for ontology entities. + """ + def __init__(self, ontology_file: Optional[str] = None): self.ontology_file = ontology_file try: @@ -60,7 +49,7 @@ class OntologyResolver: name = uri_str.rstrip("/").split("/")[-1] return name.lower().replace(" ", "_").strip() - def build_lookup(self): + def build_lookup(self) -> None: try: classes: Dict[str, URIRef] = {} individuals: Dict[str, URIRef] = {} @@ -97,7 +86,7 @@ class OntologyResolver: logger.error("Failed to build lookup dictionary: %s", str(e)) raise RuntimeError("Lookup build failed") from e - def refresh_lookup(self): + def refresh_lookup(self) -> None: self.build_lookup() logger.info("Ontology lookup refreshed.") @@ -125,7 +114,7 @@ class OntologyResolver: def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]: + ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: nodes_set = set() edges: List[Tuple[str, str, str]] = [] visited = set() diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index d81516206..2ad32f308 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, @@ -24,7 +24,7 @@ async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver, + ontology_adapter: RDFLibOntologyResolver, ) -> List[DocumentChunk]: """Updates DocumentChunk objects, integrates data points and edges into databases.""" @@ -70,7 +70,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver = None, + ontology_adapter: RDFLibOntologyResolver = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index c1f43df5c..ce69f9b0e 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, @@ -17,7 +17,7 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs async def extract_graph_from_data( data_chunks: List[DocumentChunk], n_rounds: int = 2, - ontology_adapter: OntologyResolver = None, + ontology_adapter: RDFLibOntologyResolver = None, ) -> List[DocumentChunk]: """Extract and update graph data from document chunks in multiple steps.""" chunk_nodes = await asyncio.gather( diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 8b406e53a..e0a6f1402 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -1,12 +1,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode +from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.models import AttachedOntologyNode def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = OntologyResolver() + adapter = RDFLibOntologyResolver() adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -14,7 +15,7 @@ def test_ontology_adapter_initialization_success(): def test_ontology_adapter_initialization_file_not_found(): """Test OntologyAdapter initialization with nonexistent file.""" - adapter = OntologyResolver(ontology_file="nonexistent.owl") + adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl") assert adapter.graph is None @@ -27,7 +28,7 @@ def test_build_lookup(): g.add((ns.Audi, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -50,7 +51,7 @@ def test_find_closest_match_exact(): g.add((ns.Car, RDF.type, OWL.Class)) g.add((ns.Audi, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -71,7 +72,7 @@ def test_find_closest_match_fuzzy(): g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -92,7 +93,7 @@ def test_find_closest_match_no_match(): g.add((ns.Audi, RDF.type, ns.Car)) g.add((ns.BMW, RDF.type, ns.Car)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -105,7 +106,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -138,7 +139,7 @@ def test_get_subgraph_success_rdflib(): g.add((ns.VW, owns, ns.Audi)) g.add((ns.VW, owns, ns.Porsche)) - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() @@ -163,7 +164,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = OntologyResolver() + resolver = RDFLibOntologyResolver() resolver.graph = g resolver.build_lookup() From 93a383b56a4e774a863a84847b4eb62ce61789cf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:23:30 +0200 Subject: [PATCH 05/37] feat: adds matching strategies and moves resolver --- cognee/api/v1/cognify/cognify.py | 2 +- .../get_default_tasks_by_indices.py | 2 +- .../utils/expand_with_nodes_and_edges.py | 2 +- .../ontology/base_ontology_resolver.py | 10 ++++ .../modules/ontology/matching_strategies.py | 55 +++++++++++++++++++ ...yResolver.py => RDFLibOntologyResolver.py} | 13 ++--- cognee/tasks/graph/extract_graph_from_data.py | 2 +- .../tasks/graph/extract_graph_from_data_v2.py | 2 +- .../modules/ontology/test_ontology_adapter.py | 2 +- 9 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 cognee/modules/ontology/matching_strategies.py rename cognee/modules/ontology/rdf_xml/{OntologyResolver.py => RDFLibOntologyResolver.py} (95%) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index a35658691..e933bafd8 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,7 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.users.models import User from cognee.tasks.documents import ( diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index 677090a58..fb10c7eed 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver async def get_default_tasks_by_indices( diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 3bd62e6e0..bc6205d41 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -8,7 +8,7 @@ from cognee.modules.engine.utils import ( generate_node_name, ) from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver def _create_node_key(node_id: str, category: str) -> str: diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py index 55826bfb0..86f51fcb7 100644 --- a/cognee/modules/ontology/base_ontology_resolver.py +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -2,10 +2,20 @@ from abc import ABC, abstractmethod from typing import List, Tuple, Optional from cognee.modules.ontology.models import AttachedOntologyNode +from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy class BaseOntologyResolver(ABC): """Abstract base class for ontology resolvers.""" + + def __init__(self, matching_strategy: Optional[MatchingStrategy] = None): + """Initialize the ontology resolver with a matching strategy. + + Args: + matching_strategy: The strategy to use for entity matching. + Defaults to FuzzyMatchingStrategy if None. + """ + self.matching_strategy = matching_strategy or FuzzyMatchingStrategy() @abstractmethod def build_lookup(self) -> None: diff --git a/cognee/modules/ontology/matching_strategies.py b/cognee/modules/ontology/matching_strategies.py new file mode 100644 index 000000000..c576bf6e2 --- /dev/null +++ b/cognee/modules/ontology/matching_strategies.py @@ -0,0 +1,55 @@ +import difflib +from abc import ABC, abstractmethod +from typing import List, Optional + + +class MatchingStrategy(ABC): + """Abstract base class for ontology entity matching strategies.""" + + @abstractmethod + def find_match(self, name: str, candidates: List[str]) -> Optional[str]: + """Find the best match for a given name from a list of candidates. + + Args: + name: The name to match + candidates: List of candidate names to match against + + Returns: + The best matching candidate name, or None if no match found + """ + pass + + +class FuzzyMatchingStrategy(MatchingStrategy): + """Fuzzy matching strategy using difflib for approximate string matching.""" + + def __init__(self, cutoff: float = 0.8): + """Initialize fuzzy matching strategy. + + Args: + cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid + """ + self.cutoff = cutoff + + def find_match(self, name: str, candidates: List[str]) -> Optional[str]: + """Find the closest fuzzy match for a given name. + + Args: + name: The normalized name to match + candidates: List of normalized candidate names + + Returns: + The best matching candidate name, or None if no match meets the cutoff + """ + if not candidates: + return None + + # Check for exact match first + if name in candidates: + return name + + # Find fuzzy match + best_match = difflib.get_close_matches( + name, candidates, n=1, cutoff=self.cutoff + ) + return best_match[0] if best_match else None diff --git a/cognee/modules/ontology/rdf_xml/OntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py similarity index 95% rename from cognee/modules/ontology/rdf_xml/OntologyResolver.py rename to cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 3c1a55b5a..d8de5794a 100644 --- a/cognee/modules/ontology/rdf_xml/OntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -12,6 +12,7 @@ from cognee.modules.ontology.exceptions import ( ) from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode +from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy logger = get_logger("OntologyAdapter") @@ -23,7 +24,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver): It provides fuzzy matching and subgraph extraction capabilities for ontology entities. """ - def __init__(self, ontology_file: Optional[str] = None): + def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None): + super().__init__(matching_strategy) self.ontology_file = ontology_file try: if ontology_file and os.path.exists(ontology_file): @@ -94,13 +96,8 @@ class RDFLibOntologyResolver(BaseOntologyResolver): try: normalized_name = name.lower().replace(" ", "_").strip() possible_matches = list(self.lookup.get(category, {}).keys()) - if normalized_name in possible_matches: - return normalized_name - - best_match = difflib.get_close_matches( - normalized_name, possible_matches, n=1, cutoff=0.8 - ) - return best_match[0] if best_match else None + + return self.matching_strategy.find_match(normalized_name, possible_matches) except Exception as e: logger.error("Error in find_closest_match: %s", str(e)) raise FindClosestMatchError() from e diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 2ad32f308..22cbc70fe 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( expand_with_nodes_and_edges, diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index ce69f9b0e..d2b4924c7 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -3,7 +3,7 @@ from typing import List from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import ( extract_content_nodes_and_relationship_names, diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index e0a6f1402..051cb3556 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -1,6 +1,6 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS -from cognee.modules.ontology.rdf_xml.OntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode From 00c3ba3a0ccbad28b203938c5d8a47eb7594b492 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:24:39 +0200 Subject: [PATCH 06/37] ruff fix --- .../ontology/base_ontology_resolver.py | 10 ++++---- .../modules/ontology/matching_strategies.py | 24 +++++++++---------- .../rdf_xml/RDFLibOntologyResolver.py | 16 +++++++++---- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py index 86f51fcb7..7005e6981 100644 --- a/cognee/modules/ontology/base_ontology_resolver.py +++ b/cognee/modules/ontology/base_ontology_resolver.py @@ -7,12 +7,12 @@ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyM class BaseOntologyResolver(ABC): """Abstract base class for ontology resolvers.""" - + def __init__(self, matching_strategy: Optional[MatchingStrategy] = None): """Initialize the ontology resolver with a matching strategy. - + Args: - matching_strategy: The strategy to use for entity matching. + matching_strategy: The strategy to use for entity matching. Defaults to FuzzyMatchingStrategy if None. """ self.matching_strategy = matching_strategy or FuzzyMatchingStrategy() @@ -35,6 +35,8 @@ class BaseOntologyResolver(ABC): @abstractmethod def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + ) -> Tuple[ + List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode] + ]: """Get a subgraph for the given node.""" pass diff --git a/cognee/modules/ontology/matching_strategies.py b/cognee/modules/ontology/matching_strategies.py index c576bf6e2..0e8ba7b96 100644 --- a/cognee/modules/ontology/matching_strategies.py +++ b/cognee/modules/ontology/matching_strategies.py @@ -5,15 +5,15 @@ from typing import List, Optional class MatchingStrategy(ABC): """Abstract base class for ontology entity matching strategies.""" - + @abstractmethod def find_match(self, name: str, candidates: List[str]) -> Optional[str]: """Find the best match for a given name from a list of candidates. - + Args: name: The name to match candidates: List of candidate names to match against - + Returns: The best matching candidate name, or None if no match found """ @@ -22,34 +22,32 @@ class MatchingStrategy(ABC): class FuzzyMatchingStrategy(MatchingStrategy): """Fuzzy matching strategy using difflib for approximate string matching.""" - + def __init__(self, cutoff: float = 0.8): """Initialize fuzzy matching strategy. - + Args: cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid """ self.cutoff = cutoff - + def find_match(self, name: str, candidates: List[str]) -> Optional[str]: """Find the closest fuzzy match for a given name. - + Args: name: The normalized name to match candidates: List of normalized candidate names - + Returns: The best matching candidate name, or None if no match meets the cutoff """ if not candidates: return None - + # Check for exact match first if name in candidates: return name - + # Find fuzzy match - best_match = difflib.get_close_matches( - name, candidates, n=1, cutoff=self.cutoff - ) + best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff) return best_match[0] if best_match else None diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index d8de5794a..c6b3e22be 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -19,12 +19,16 @@ logger = get_logger("OntologyAdapter") class RDFLibOntologyResolver(BaseOntologyResolver): """RDFLib-based ontology resolver implementation. - + This implementation uses RDFLib to parse and work with RDF/OWL ontology files. It provides fuzzy matching and subgraph extraction capabilities for ontology entities. """ - - def __init__(self, ontology_file: Optional[str] = None, matching_strategy: Optional[MatchingStrategy] = None): + + def __init__( + self, + ontology_file: Optional[str] = None, + matching_strategy: Optional[MatchingStrategy] = None, + ): super().__init__(matching_strategy) self.ontology_file = ontology_file try: @@ -96,7 +100,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver): try: normalized_name = name.lower().replace(" ", "_").strip() possible_matches = list(self.lookup.get(category, {}).keys()) - + return self.matching_strategy.find_match(normalized_name, possible_matches) except Exception as e: logger.error("Error in find_closest_match: %s", str(e)) @@ -111,7 +115,9 @@ class RDFLibOntologyResolver(BaseOntologyResolver): def get_subgraph( self, node_name: str, node_type: str = "individuals", directed: bool = True - ) -> Tuple[List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]]: + ) -> Tuple[ + List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode] + ]: nodes_set = set() edges: List[Tuple[str, str, str]] = [] visited = set() From 631b2f37ce0b8bad90bc5cdb3bcdc7d35d4c9f0e Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:28:44 +0200 Subject: [PATCH 07/37] fix: deletes old ontology resolver instance --- cognee/tasks/graph/extract_graph_from_data.py | 2 +- cognee/tasks/graph/extract_graph_from_data_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 22cbc70fe..1ae28ca89 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -102,5 +102,5 @@ async def extract_graph_from_data( ] return await integrate_chunk_graphs( - data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver() + data_chunks, chunk_graphs, graph_model, ontology_adapter ) diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py index d2b4924c7..5a4194fb1 100644 --- a/cognee/tasks/graph/extract_graph_from_data_v2.py +++ b/cognee/tasks/graph/extract_graph_from_data_v2.py @@ -44,5 +44,5 @@ async def extract_graph_from_data( data_chunks=data_chunks, chunk_graphs=chunk_graphs, graph_model=KnowledgeGraph, - ontology_adapter=ontology_adapter or OntologyResolver(), + ontology_adapter=ontology_adapter, ) From 75bef6e9299677f9e569f4ff2096b032578f7ae8 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:30:19 +0200 Subject: [PATCH 08/37] ruff fix --- cognee/tasks/graph/extract_graph_from_data.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 1ae28ca89..6681dd975 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -101,6 +101,4 @@ async def extract_graph_from_data( if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids ] - return await integrate_chunk_graphs( - data_chunks, chunk_graphs, graph_model, ontology_adapter - ) + return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) From 6261fca0c4fe57ab9bfe6d66dc7a9c7e2550c608 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:44:04 +0200 Subject: [PATCH 09/37] feat: adds default ontology resolver --- .../graph/utils/expand_with_nodes_and_edges.py | 3 ++- .../ontology/get_default_ontology_resolver.py | 18 ++++++++++++++++++ .../modules/ontology/test_ontology_adapter.py | 7 ++++--- 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/ontology/get_default_ontology_resolver.py diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index bc6205d41..b3e8e8029 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -9,6 +9,7 @@ from cognee.modules.engine.utils import ( ) from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def _create_node_key(node_id: str, category: str) -> str: @@ -320,7 +321,7 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = RDFLibOntologyResolver() + ontology_resolver = get_default_ontology_resolver() added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py new file mode 100644 index 000000000..9dc5c59ba --- /dev/null +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -0,0 +1,18 @@ +from typing import Optional + +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + +def get_default_ontology_resolver(ontology_file: Optional[str] = None) -> BaseOntologyResolver: + """Get the default ontology resolver (RDFLib with fuzzy matching). + + Args: + ontology_file: Optional path to ontology file + + Returns: + Default RDFLib ontology resolver with fuzzy matching strategy + """ + fuzzy_strategy = FuzzyMatchingStrategy() + return RDFLibOntologyResolver(ontology_file=ontology_file, matching_strategy=fuzzy_strategy) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 051cb3556..401c6dc02 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -2,12 +2,13 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode +from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = RDFLibOntologyResolver() + adapter = get_default_ontology_resolver() adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -106,7 +107,7 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = RDFLibOntologyResolver() + resolver = get_default_ontology_resolver() resolver.graph = g resolver.build_lookup() @@ -164,7 +165,7 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = RDFLibOntologyResolver() + resolver = get_default_ontology_resolver() resolver.graph = g resolver.build_lookup() From 7c046eafab20e8714ee985bb1cf9873c4e9ae3bf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:28:11 +0200 Subject: [PATCH 10/37] feat: adds get_ontology_resolver + typed dict to hold params --- .../utils/expand_with_nodes_and_edges.py | 5 +-- .../ontology/get_default_ontology_resolver.py | 18 ---------- .../modules/ontology/get_ontology_resolver.py | 36 +++++++++++++++++++ cognee/modules/ontology/ontology_config.py | 16 +++++++++ 4 files changed, 55 insertions(+), 20 deletions(-) delete mode 100644 cognee/modules/ontology/get_default_ontology_resolver.py create mode 100644 cognee/modules/ontology/get_ontology_resolver.py create mode 100644 cognee/modules/ontology/ontology_config.py diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index b3e8e8029..e18860744 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -9,7 +9,7 @@ from cognee.modules.engine.utils import ( ) from cognee.shared.data_models import KnowledgeGraph from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver def _create_node_key(node_id: str, category: str) -> str: @@ -321,7 +321,8 @@ def expand_with_nodes_and_edges( existing_edges_map = {} if ontology_resolver is None: - ontology_resolver = get_default_ontology_resolver() + config = get_ontology_resolver() + ontology_resolver = config["resolver"] added_nodes_map = {} added_ontology_nodes_map = {} diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py deleted file mode 100644 index 9dc5c59ba..000000000 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Optional - -from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver -from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy - - -def get_default_ontology_resolver(ontology_file: Optional[str] = None) -> BaseOntologyResolver: - """Get the default ontology resolver (RDFLib with fuzzy matching). - - Args: - ontology_file: Optional path to ontology file - - Returns: - Default RDFLib ontology resolver with fuzzy matching strategy - """ - fuzzy_strategy = FuzzyMatchingStrategy() - return RDFLibOntologyResolver(ontology_file=ontology_file, matching_strategy=fuzzy_strategy) diff --git a/cognee/modules/ontology/get_ontology_resolver.py b/cognee/modules/ontology/get_ontology_resolver.py new file mode 100644 index 000000000..01377c162 --- /dev/null +++ b/cognee/modules/ontology/get_ontology_resolver.py @@ -0,0 +1,36 @@ +from typing import Optional + +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy +from cognee.modules.ontology.ontology_config import OntologyConfig + + +def get_ontology_resolver( + resolver: Optional[BaseOntologyResolver] = None, + matching_strategy: Optional[MatchingStrategy] = None, +) -> OntologyConfig: + """Get ontology resolver configuration with default or custom objects. + + Args: + resolver: Optional pre-configured ontology resolver instance + matching_strategy: Optional matching strategy instance + + Returns: + Ontology configuration with default RDFLib resolver and fuzzy matching strategy, + or custom objects if provided + """ + config: OntologyConfig = {} + + if resolver is not None: + config["resolver"] = resolver + else: + default_strategy = matching_strategy or FuzzyMatchingStrategy() + config["resolver"] = RDFLibOntologyResolver( + ontology_file=None, matching_strategy=default_strategy + ) + + if matching_strategy is not None and resolver is None: + config["matching_strategy"] = matching_strategy + + return config diff --git a/cognee/modules/ontology/ontology_config.py b/cognee/modules/ontology/ontology_config.py new file mode 100644 index 000000000..e28da9f92 --- /dev/null +++ b/cognee/modules/ontology/ontology_config.py @@ -0,0 +1,16 @@ +from typing import TypedDict, Optional + +from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver +from cognee.modules.ontology.matching_strategies import MatchingStrategy + + +class OntologyConfig(TypedDict, total=False): + """Configuration for ontology resolver. + + Attributes: + resolver: The ontology resolver instance to use + matching_strategy: The matching strategy to use + """ + + resolver: Optional[BaseOntologyResolver] + matching_strategy: Optional[MatchingStrategy] From 142d8068e12fc0380db4b596e3dc8af63dc1e88d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:30:20 +0200 Subject: [PATCH 11/37] chore: updates default empty ontology resolver tests --- .../unit/modules/ontology/test_ontology_adapter.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 401c6dc02..74383d5df 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -2,13 +2,14 @@ import pytest from rdflib import Graph, Namespace, RDF, OWL, RDFS from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.ontology.models import AttachedOntologyNode -from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver def test_ontology_adapter_initialization_success(): """Test successful initialization of OntologyAdapter.""" - adapter = get_default_ontology_resolver() + config = get_ontology_resolver() + adapter = config["resolver"] adapter.build_lookup() assert isinstance(adapter.lookup, dict) @@ -107,7 +108,8 @@ def test_get_subgraph_no_match_rdflib(): """Test get_subgraph returns empty results for a non-existent node.""" g = Graph() - resolver = get_default_ontology_resolver() + config = get_ontology_resolver() + resolver = config["resolver"] resolver.graph = g resolver.build_lookup() @@ -165,7 +167,8 @@ def test_refresh_lookup_rdflib(): """Test that refresh_lookup rebuilds the lookup dict into a new object.""" g = Graph() - resolver = get_default_ontology_resolver() + config = get_ontology_resolver() + resolver = config["resolver"] resolver.graph = g resolver.build_lookup() From e815a3fc140d79fb0f6987b7ef730b2a4cd437b6 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:12:47 +0200 Subject: [PATCH 12/37] chore: changes ontology file path parameter to the new config structure --- cognee/api/v1/cognify/cognify.py | 14 +++++++++----- cognee/tasks/graph/extract_graph_from_data.py | 10 +++++++++- examples/python/ontology_demo_example.py | 9 ++++++++- examples/python/ontology_demo_example_2.py | 8 +++++++- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index e933bafd8..f90e487e0 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -10,7 +10,8 @@ from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver +from cognee.modules.ontology.ontology_config import OntologyConfig +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.users.models import User from cognee.tasks.documents import ( @@ -39,7 +40,7 @@ async def cognify( graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_file_path: Optional[str] = None, + ontology_config: OntologyConfig = None, vector_db_config: dict = None, graph_db_config: dict = None, run_in_background: bool = False, @@ -188,11 +189,14 @@ async def cognify( - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ + if ontology_config is None: + ontology_config = get_ontology_resolver() + if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) else: tasks = await get_default_tasks( - user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt + user, graph_model, chunker, chunk_size, ontology_config, custom_prompt ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for @@ -216,7 +220,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_file_path: Optional[str] = None, + ontology_config: OntologyConfig = get_ontology_resolver(), custom_prompt: Optional[str] = None, ) -> list[Task]: default_tasks = [ @@ -230,7 +234,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's Task( extract_graph_from_data, graph_model=graph_model, - ontology_adapter=RDFLibOntologyResolver(ontology_file=ontology_file_path), + ontology_config=ontology_config, custom_prompt=custom_prompt, task_config={"batch_size": 10}, ), # Generate knowledge graphs from the document chunks. diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 6681dd975..f0ef9c7f9 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -4,6 +4,8 @@ from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine from cognee.tasks.storage.add_data_points import add_data_points +from cognee.modules.ontology.ontology_config import OntologyConfig +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.graph.utils import ( @@ -70,7 +72,7 @@ async def integrate_chunk_graphs( async def extract_graph_from_data( data_chunks: List[DocumentChunk], graph_model: Type[BaseModel], - ontology_adapter: RDFLibOntologyResolver = None, + ontology_config: OntologyConfig = None, custom_prompt: Optional[str] = None, ) -> List[DocumentChunk]: """ @@ -101,4 +103,10 @@ async def extract_graph_from_data( if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids ] + # Extract resolver from config if provided, otherwise get default + if ontology_config is None: + ontology_config = get_ontology_resolver() + + ontology_adapter = ontology_config["resolver"] + return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_adapter) diff --git a/examples/python/ontology_demo_example.py b/examples/python/ontology_demo_example.py index 8243faef5..ea1ab8b72 100644 --- a/examples/python/ontology_demo_example.py +++ b/examples/python/ontology_demo_example.py @@ -5,6 +5,8 @@ import cognee from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver text_1 = """ 1. Audi @@ -60,7 +62,12 @@ async def main(): os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl" ) - await cognee.cognify(ontology_file_path=ontology_path) + # Create ontology config with custom ontology file + ontology_config = get_ontology_resolver( + resolver=RDFLibOntologyResolver(ontology_file=ontology_path) + ) + + await cognee.cognify(ontology_config=ontology_config) print("Knowledge with ontology created.") # Step 4: Query insights diff --git a/examples/python/ontology_demo_example_2.py b/examples/python/ontology_demo_example_2.py index 22fb19862..e897da2e5 100644 --- a/examples/python/ontology_demo_example_2.py +++ b/examples/python/ontology_demo_example_2.py @@ -5,6 +5,8 @@ import os import textwrap from cognee.api.v1.search import SearchType from cognee.api.v1.visualize.visualize import visualize_graph +from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver +from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver async def run_pipeline(ontology_path=None): @@ -17,7 +19,11 @@ async def run_pipeline(ontology_path=None): await cognee.add(scientific_papers_dir) - pipeline_run = await cognee.cognify(ontology_file_path=ontology_path) + ontology_config = get_ontology_resolver( + resolver=RDFLibOntologyResolver(ontology_file=ontology_path) + ) + + pipeline_run = await cognee.cognify(ontology_config=ontology_config) return pipeline_run From d2c7980e8317d7a3af79a2b3bbcd3fb77c786bbf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:14:39 +0200 Subject: [PATCH 13/37] chore: updates mutable default param --- cognee/api/v1/cognify/cognify.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index f90e487e0..2cb844d12 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -220,9 +220,11 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's graph_model: BaseModel = KnowledgeGraph, chunker=TextChunker, chunk_size: int = None, - ontology_config: OntologyConfig = get_ontology_resolver(), + ontology_config: OntologyConfig = None, custom_prompt: Optional[str] = None, ) -> list[Task]: + if ontology_config is None: + ontology_config = get_ontology_resolver() default_tasks = [ Task(classify_documents), Task(check_permissions_on_dataset, user=user, permissions=["write"]), From f4c70cc315dbb73aa8a2463ed8a085119034d535 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:39:43 +0200 Subject: [PATCH 14/37] feat: adds tests for the new logic + fixes the case when only matching is provided --- .../modules/ontology/get_ontology_resolver.py | 5 +- .../modules/ontology/test_ontology_adapter.py | 207 ++++++++++++++++++ 2 files changed, 209 insertions(+), 3 deletions(-) diff --git a/cognee/modules/ontology/get_ontology_resolver.py b/cognee/modules/ontology/get_ontology_resolver.py index 01377c162..d75928af9 100644 --- a/cognee/modules/ontology/get_ontology_resolver.py +++ b/cognee/modules/ontology/get_ontology_resolver.py @@ -24,13 +24,12 @@ def get_ontology_resolver( if resolver is not None: config["resolver"] = resolver + config["matching_strategy"] = matching_strategy or resolver.matching_strategy else: default_strategy = matching_strategy or FuzzyMatchingStrategy() config["resolver"] = RDFLibOntologyResolver( ontology_file=None, matching_strategy=default_strategy ) - - if matching_strategy is not None and resolver is None: - config["matching_strategy"] = matching_strategy + config["matching_strategy"] = default_strategy return config diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py index 74383d5df..9b7eeeae0 100644 --- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py +++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py @@ -177,3 +177,210 @@ def test_refresh_lookup_rdflib(): resolver.refresh_lookup() assert resolver.lookup is not original_lookup + + +def test_fuzzy_matching_strategy_exact_match(): + """Test FuzzyMatchingStrategy finds exact matches.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy() + candidates = ["audi", "bmw", "mercedes"] + + result = strategy.find_match("audi", candidates) + assert result == "audi" + + +def test_fuzzy_matching_strategy_fuzzy_match(): + """Test FuzzyMatchingStrategy finds fuzzy matches.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy(cutoff=0.6) + candidates = ["audi", "bmw", "mercedes"] + + result = strategy.find_match("audii", candidates) + assert result == "audi" + + +def test_fuzzy_matching_strategy_no_match(): + """Test FuzzyMatchingStrategy returns None when no match meets cutoff.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy(cutoff=0.9) + candidates = ["audi", "bmw", "mercedes"] + + result = strategy.find_match("completely_different", candidates) + assert result is None + + +def test_fuzzy_matching_strategy_empty_candidates(): + """Test FuzzyMatchingStrategy handles empty candidates list.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + strategy = FuzzyMatchingStrategy() + + result = strategy.find_match("audi", []) + assert result is None + + +def test_base_ontology_resolver_initialization(): + """Test BaseOntologyResolver initialization with default matching strategy.""" + from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + class TestOntologyResolver(BaseOntologyResolver): + def build_lookup(self): + pass + + def refresh_lookup(self): + pass + + def find_closest_match(self, name, category): + return None + + def get_subgraph(self, node_name, node_type="individuals", directed=True): + return [], [], None + + resolver = TestOntologyResolver() + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) + + +def test_base_ontology_resolver_custom_matching_strategy(): + """Test BaseOntologyResolver initialization with custom matching strategy.""" + from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver + from cognee.modules.ontology.matching_strategies import MatchingStrategy + + class CustomMatchingStrategy(MatchingStrategy): + def find_match(self, name, candidates): + return "custom_match" + + class TestOntologyResolver(BaseOntologyResolver): + def build_lookup(self): + pass + + def refresh_lookup(self): + pass + + def find_closest_match(self, name, category): + return None + + def get_subgraph(self, node_name, node_type="individuals", directed=True): + return [], [], None + + custom_strategy = CustomMatchingStrategy() + resolver = TestOntologyResolver(matching_strategy=custom_strategy) + assert resolver.matching_strategy == custom_strategy + + +def test_ontology_config_structure(): + """Test OntologyConfig TypedDict structure.""" + from cognee.modules.ontology.ontology_config import OntologyConfig + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + resolver = RDFLibOntologyResolver() + matching_strategy = FuzzyMatchingStrategy() + + config: OntologyConfig = {"resolver": resolver, "matching_strategy": matching_strategy} + + assert config["resolver"] == resolver + assert config["matching_strategy"] == matching_strategy + + +def test_get_ontology_resolver_default(): + """Test get_ontology_resolver returns default configuration.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + config = get_ontology_resolver() + + assert isinstance(config["resolver"], RDFLibOntologyResolver) + assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) + assert config["resolver"].matching_strategy == config["matching_strategy"] + + +def test_get_ontology_resolver_custom_resolver(): + """Test get_ontology_resolver with custom resolver.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") + config = get_ontology_resolver(resolver=custom_resolver) + + assert config["resolver"] == custom_resolver + assert config["matching_strategy"] == custom_resolver.matching_strategy + assert isinstance(config["matching_strategy"], FuzzyMatchingStrategy) + + +def test_get_ontology_resolver_custom_matching_strategy(): + """Test get_ontology_resolver with custom matching strategy.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) + config = get_ontology_resolver(matching_strategy=custom_strategy) + + assert isinstance(config["resolver"], RDFLibOntologyResolver) + assert config["matching_strategy"] == custom_strategy + assert config["resolver"].matching_strategy == custom_strategy + + +def test_get_ontology_resolver_both_custom(): + """Test get_ontology_resolver with both custom resolver and matching strategy.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_resolver = RDFLibOntologyResolver(ontology_file="test.owl") + custom_strategy = FuzzyMatchingStrategy(cutoff=0.9) + config = get_ontology_resolver(resolver=custom_resolver, matching_strategy=custom_strategy) + + assert config["resolver"] == custom_resolver + assert config["matching_strategy"] == custom_strategy + + +def test_get_ontology_resolver_only_resolver_uses_resolver_strategy(): + """Test that when only resolver is passed, it uses the resolver's matching strategy.""" + from cognee.modules.ontology.get_ontology_resolver import get_ontology_resolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + custom_strategy = FuzzyMatchingStrategy(cutoff=0.8) + custom_resolver = RDFLibOntologyResolver(matching_strategy=custom_strategy) + + config = get_ontology_resolver(resolver=custom_resolver) + + assert config["resolver"] == custom_resolver + assert config["matching_strategy"] == custom_strategy + assert config["matching_strategy"] == custom_resolver.matching_strategy + + +def test_rdflib_ontology_resolver_uses_matching_strategy(): + """Test that RDFLibOntologyResolver uses the provided matching strategy.""" + from cognee.modules.ontology.matching_strategies import MatchingStrategy + + class TestMatchingStrategy(MatchingStrategy): + def find_match(self, name, candidates): + return "test_match" if candidates else None + + ns = Namespace("http://example.org/test#") + g = Graph() + g.add((ns.Car, RDF.type, OWL.Class)) + g.add((ns.Audi, RDF.type, ns.Car)) + + resolver = RDFLibOntologyResolver(matching_strategy=TestMatchingStrategy()) + resolver.graph = g + resolver.build_lookup() + + result = resolver.find_closest_match("Audi", "individuals") + assert result == "test_match" + + +def test_rdflib_ontology_resolver_default_matching_strategy(): + """Test that RDFLibOntologyResolver uses FuzzyMatchingStrategy by default.""" + from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy + + resolver = RDFLibOntologyResolver() + assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy) From 46e3fca2280a37f6ecc675eaeebdf5b3f1b304f2 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:11:09 +0200 Subject: [PATCH 15/37] Fix: fixes infinite loop introduced in combined search refactor (#1434) ## Description Fixes infinite loop introduced in combined search refactor ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made Fixes infinite loop introduced in combined search refactor ## Testing Manual ## Screenshots/Videos (if applicable) None ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## Related Issues https://github.com/topoteretes/cognee/pull/1341 ## Additional Notes None ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/modules/retrieval/temporal_retriever.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index c5fe2094c..f080ed18f 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -113,7 +113,7 @@ class TemporalRetriever(GraphCompletionRetriever): logger.info( "No timestamps identified based on the query, performing retrieval using triplet search on events and entities." ) - triplets = await self.get_context(query) + triplets = await self.get_triplets(query) return await self.resolve_edges_to_text(triplets) if ids: @@ -122,7 +122,7 @@ class TemporalRetriever(GraphCompletionRetriever): logger.info( "No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities." ) - triplets = await self.get_context(query) + triplets = await self.get_triplets(query) return await self.resolve_edges_to_text(triplets) vector_engine = get_vector_engine() From 6f60ac76fd94582bb69b049271d0f9a011e2401f Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 18 Sep 2025 10:47:34 +0100 Subject: [PATCH 16/37] fix: Add S3 URL handling in ensure_absolute_path function (#1438) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary The `root_dir.py/ensure_absolute_path` validation in the `GraphConfig` model currently enforces that all paths start with `/`. This works for local file system paths but breaks when using S3 storage, since S3 paths do not begin with `/` and fail validation. ## Fix This PR updates the `ensure_absolute_path` method to recognize and treat S3 paths as valid. ## Logs before ``` (.venv) daulet@Mac cognee-claude % cognee-cli -ui 2025-09-18T01:30:39.768877 [info ] Deleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-09-18_02-15-14.log [cognee.shared.logging_utils] 2025-09-18T01:30:40.391407 [error ] Exception [cognee.shared.logging_utils] exception_message="1 validation error for GraphConfig\n Value error, Path must be absolute. Got relative path: s3://daulet-personal-dev/cognee/data [type=value_error, input_value={'data_root_directory': '...45daea63bc5392d85746fb'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.11/v/value_error" traceback=True ``` ## Logs after ``` (.venv) daulet@Mac cognee-claude % cognee-cli -ui 2025-09-18T01:34:34.404642 [info ] Deleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-09-18_02-17-55.log [cognee.shared.logging_utils] 2025-09-18T01:34:35.026078 [info ] Logging initialized [cognee.shared.logging_utils] cognee_version=0.3.4.dev1-local database_path=s3://daulet-personal-dev/cognee/system/databases graph_database_name= os_info='Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)' python_version=3.10.11 relational_config=cognee_db structlog_version=25.4.0 vector_config=lancedb 2025-09-18T01:34:35.026223 [info ] Database storage: s3://daulet-personal-dev/cognee/system/databases [cognee.shared.logging_utils] Starting cognee UI... 2025-09-18T01:34:36.105617 [info ] Starting cognee UI... [cognee.shared.logging_utils] 2025-09-18T01:34:36.105756 [info ] Starting cognee backend API server... [cognee.shared.logging_utils] 2025-09-18T01:34:37.522194 [info ] Logging initialized [cognee.shared.logging_utils] cognee_version=0.3.4.dev1-local database_path=s3://daulet-personal-dev/cognee/system/databases graph_database_name= os_info='Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)' python_version=3.10.11 relational_config=cognee_db structlog_version=25.4.0 vector_config=lancedb 2025-09-18T01:34:37.522376 [info ] Database storage: s3://daulet-personal-dev/cognee/system/databases [cognee.shared.logging_utils] 2025-09-18T01:34:38.115247 [info ] ✓ Backend API started at http://localhost:8000 [cognee.shared.logging_utils] 2025-09-18T01:34:38.198637 [info ] Starting frontend server at http://localhost:3000 [cognee.shared.logging_utils] 2025-09-18T01:34:38.198879 [info ] This may take a moment to compile and start... [cognee.shared.logging_utils] INFO: Started server process [83608] INFO: Waiting for application startup. 2025-09-18T01:34:39.781430 [warning ] Kuzu S3 storage file not found: s3://daulet-personal-dev/cognee/system/databases/cognee_graph_kuzu [cognee.shared.logging_utils] 2025-09-18T01:34:39.802516 [info ] Loaded JSON extension [cognee.shared.logging_utils] 2025-09-18T01:34:40.197857 [info ] Deleted Kuzu database files at s3://daulet-personal-dev/cognee/system/databases/cognee_graph_kuzu [cognee.shared.logging_utils] 2025-09-18T01:34:41.211523 [info ] ✓ Cognee UI is starting up... [cognee.shared.logging_utils] 2025-09-18T01:34:41.212561 [info ] ✓ Open your browser to: http://localhost:3000 [cognee.shared.logging_utils] 2025-09-18T01:34:41.212814 [info ] ✓ The UI will be available once Next.js finishes compiling [cognee.shared.logging_utils] Success: UI server started successfully! The interface is available at: http://localhost:3000 The API backend is available at: http://localhost:8000 Note: Press Ctrl+C to stop the server... ``` ## Description ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made - - - ## Testing ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues ## Additional Notes ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/root_dir.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 46d8fcb69..b10f7507c 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -20,6 +20,11 @@ def ensure_absolute_path(path: str) -> str: """ if path is None: raise ValueError("Path cannot be None") + + # Check if it's an S3 URL - S3 URLs are absolute by definition + if path.startswith("s3://"): + return path + path_obj = Path(path).expanduser() if path_obj.is_absolute(): return str(path_obj.resolve()) From f567098743dad2ffb3dce188e301415a829696cb Mon Sep 17 00:00:00 2001 From: Boris Date: Thu, 18 Sep 2025 13:53:27 +0200 Subject: [PATCH 17/37] fix: UI fixes (#1435) ## Description ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Changes Made - - - ## Testing ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## Related Issues ## Additional Notes ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee-frontend/src/app/account/Account.tsx | 10 +- .../src/app/dashboard/AddDataToCognee.tsx | 37 +-- .../src/app/dashboard/Dashboard.tsx | 18 +- .../dashboard/InstanceDatasetsAccordion.tsx | 3 +- .../src/app/dashboard/NotebooksAccordion.tsx | 9 +- cognee-frontend/src/app/globals.css | 3 +- cognee-frontend/src/app/plan/Plan.tsx | 125 +++++----- cognee-frontend/src/ui/Layout/Header.tsx | 9 +- .../src/ui/elements/GhostButton.tsx | 2 +- .../src/ui/elements/Modal/useModal.ts | 13 +- .../src/ui/elements/Notebook/Notebook.tsx | 215 +++++++++++------- .../elements/Notebook/NotebookCellHeader.tsx | 38 ++-- cognee-frontend/src/utils/fetch.ts | 34 ++- .../src/utils/handleServerErrors.ts | 9 +- .../api/v1/cloud/routers/get_checks_router.py | 2 +- 15 files changed, 311 insertions(+), 216 deletions(-) diff --git a/cognee-frontend/src/app/account/Account.tsx b/cognee-frontend/src/app/account/Account.tsx index c5cca42b7..067f302f4 100644 --- a/cognee-frontend/src/app/account/Account.tsx +++ b/cognee-frontend/src/app/account/Account.tsx @@ -13,8 +13,8 @@ export default function Account() { }; return ( -
- */} -
+
@@ -42,7 +42,7 @@ export default function Account() {
Plan
You are using open-source version. Subscribe to get access to hosted cognee with your data!
- Select a plan + Select a plan
diff --git a/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx b/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx index 60e2a4204..e0296d9c6 100644 --- a/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx +++ b/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx @@ -2,7 +2,7 @@ import { FormEvent, useCallback, useState } from "react"; import { LoadingIndicator } from "@/ui/App"; import { useModal } from "@/ui/elements/Modal"; -import { CloseIcon, PlusIcon } from "@/ui/Icons"; +import { CloseIcon, MinusIcon, PlusIcon } from "@/ui/Icons"; import { CTAButton, GhostButton, IconButton, Modal, NeutralButton, Select } from "@/ui/elements"; import addData from "@/modules/ingestion/addData"; @@ -16,16 +16,22 @@ interface AddDataToCogneeProps { } export default function AddDataToCognee({ datasets, refreshDatasets, useCloud = false }: AddDataToCogneeProps) { - const [filesForUpload, setFilesForUpload] = useState(null); + const [filesForUpload, setFilesForUpload] = useState([]); - const prepareFiles = useCallback((event: FormEvent) => { + const addFiles = useCallback((event: FormEvent) => { const formElements = event.currentTarget; - const files = formElements.files; + const newFiles = formElements.files; - setFilesForUpload(files); + if (newFiles?.length) { + setFilesForUpload((oldFiles) => [...oldFiles, ...Array.from(newFiles)]); + } }, []); - const processDataWithCognee = useCallback((state: object, event?: FormEvent) => { + const removeFile = useCallback((file: File) => { + setFilesForUpload((oldFiles) => oldFiles.filter((f) => f !== file)); + }, []); + + const processDataWithCognee = useCallback((state?: object, event?: FormEvent) => { event!.preventDefault(); if (!filesForUpload) { @@ -41,7 +47,7 @@ export default function AddDataToCognee({ datasets, refreshDatasets, useCloud = } : { name: "main_dataset", }, - Array.from(filesForUpload), + filesForUpload, useCloud ) .then(({ dataset_id, dataset_name }) => { @@ -57,7 +63,7 @@ export default function AddDataToCognee({ datasets, refreshDatasets, useCloud = useCloud, ) .then(() => { - setFilesForUpload(null); + setFilesForUpload([]); }); }); }, [filesForUpload, refreshDatasets, useCloud]); @@ -86,24 +92,25 @@ export default function AddDataToCognee({ datasets, refreshDatasets, useCloud =
Please select a {useCloud ? "cloud" : "local"} dataset to add data in.
If you don't have any, don't worry, we will create one for you.
- {!datasets.length && } - {datasets.map((dataset: Dataset, index) => ( - + {datasets.map((dataset: Dataset) => ( + ))} - + select files - {filesForUpload?.length && ( + {!!filesForUpload.length && (
selected files:
- {Array.from(filesForUpload || []).map((file) => ( -
+ {filesForUpload.map((file) => ( +
{file.name} +
))}
diff --git a/cognee-frontend/src/app/dashboard/Dashboard.tsx b/cognee-frontend/src/app/dashboard/Dashboard.tsx index 6da587ca7..75e3d7518 100644 --- a/cognee-frontend/src/app/dashboard/Dashboard.tsx +++ b/cognee-frontend/src/app/dashboard/Dashboard.tsx @@ -4,7 +4,7 @@ import { useCallback, useEffect, useRef, useState } from "react"; import { Header } from "@/ui/Layout"; import { SearchIcon } from "@/ui/Icons"; -import { Notebook } from "@/ui/elements"; +import { CTAButton, Notebook } from "@/ui/elements"; import { fetch, isCloudEnvironment } from "@/utils"; import { Notebook as NotebookType } from "@/ui/elements/Notebook/types"; import { useAuthenticatedUser } from "@/modules/auth"; @@ -111,8 +111,8 @@ export default function Dashboard({ accessToken }: DashboardProps) { const isCloudEnv = isCloudEnvironment(); return ( -
- */}
-
-
+
+
@@ -152,6 +152,12 @@ export default function Dashboard({ accessToken }: DashboardProps) { />
+ +
diff --git a/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx b/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx index f83dd79a2..f094f7caf 100644 --- a/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx +++ b/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx @@ -36,7 +36,8 @@ export default function InstanceDatasetsAccordion({ onDatasetsChange }: Instance }; checkConnectionToLocalCognee(); - }, [setCloudCogneeConnected, setLocalCogneeConnected]); + checkConnectionToCloudCognee(); + }, [checkConnectionToCloudCognee, setCloudCogneeConnected, setLocalCogneeConnected]); const { value: isCloudConnectedModalOpen, diff --git a/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx b/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx index 6c4e569cc..ffb4d0fa8 100644 --- a/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx +++ b/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx @@ -5,8 +5,8 @@ import { useBoolean } from "@/utils"; import { Accordion, CTAButton, GhostButton, IconButton, Input, Modal } from "@/ui/elements"; import { CloseIcon, MinusIcon, NotebookIcon, PlusIcon } from "@/ui/Icons"; import { Notebook } from "@/ui/elements/Notebook/types"; -import { LoadingIndicator } from "@/ui/App"; import { useModal } from "@/ui/elements/Modal"; +import { LoadingIndicator } from "@/ui/App"; interface NotebooksAccordionProps { notebooks: Notebook[]; @@ -60,7 +60,7 @@ export default function NotebooksAccordion({ .finally(() => setNotebookToRemove(null)); }; - const handleNotebookAdd = useCallback((_: object, formEvent?: FormEvent) => { + const handleNotebookAdd = useCallback((_: Notebook, formEvent?: FormEvent) => { if (!formEvent) { return; } @@ -71,6 +71,7 @@ export default function NotebooksAccordion({ const notebookName = formElements.notebookName.value.trim(); return addNotebook(notebookName) + .then(() => {}); }, [addNotebook]); const { @@ -79,7 +80,7 @@ export default function NotebooksAccordion({ closeModal: closeNewNotebookModal, confirmAction: handleNewNotebookSubmit, isActionLoading: isNewDatasetLoading, - } = useModal(false, handleNotebookAdd); + } = useModal(false, handleNotebookAdd); return ( <> @@ -91,7 +92,7 @@ export default function NotebooksAccordion({ tools={isNewDatasetLoading ? ( ) : ( - + openNewNotebookModal()}> )} > {notebooks.length === 0 && ( diff --git a/cognee-frontend/src/app/globals.css b/cognee-frontend/src/app/globals.css index 1007b2fb7..b49bc7dcf 100644 --- a/cognee-frontend/src/app/globals.css +++ b/cognee-frontend/src/app/globals.css @@ -11,7 +11,7 @@ --global-color-primary-active: #500cc5 !important; --global-color-primary-text: white !important; --global-color-secondary: #0DFF00 !important; - --global-background-default: #0D051C; + --global-background-default: #F4F4F4; --textarea-default-color: #0D051C !important; } @@ -20,6 +20,7 @@ body { height: 100%; max-width: 100vw; overflow-x: hidden; + background-color: var(--global-background-default); } a { diff --git a/cognee-frontend/src/app/plan/Plan.tsx b/cognee-frontend/src/app/plan/Plan.tsx index 1fc3477ea..3fd181270 100644 --- a/cognee-frontend/src/app/plan/Plan.tsx +++ b/cognee-frontend/src/app/plan/Plan.tsx @@ -1,12 +1,17 @@ +"use client"; + import Link from "next/link"; import { BackIcon, CheckIcon } from "@/ui/Icons"; import { CTAButton, NeutralButton } from "@/ui/elements"; import Header from "@/ui/Layout/Header"; +import { useAuthenticatedUser } from "@/modules/auth"; export default function Plan() { + const { user } = useAuthenticatedUser(); + return ( -
- */} -
+
- - - back - +
+ + + back + + + {/* */} +
-
+
Affordable and transparent pricing
-
+
Basic
-
Free
+
Free
-
+
+
Most Popular
On-prem Subscription
-
$2470 /per month
-
Save 20% yearly
+
$2470 /per month
+
Save 20% yearly
-
+
Cloud Subscription
-
$25 /per month
-
(beta pricing)
+
$25 /per month
+
(beta pricing)
-
-
Everything in the free plan, plus...
+
+
Everything in the free plan, plus...
-
License to use Cognee open source
-
Cognee tasks and pipelines
-
Custom schema and ontology generation
-
Integrated evaluations
-
More than 28 data sources supported
+
License to use Cognee open source
+
Cognee tasks and pipelines
+
Custom schema and ontology generation
+
Integrated evaluations
+
More than 28 data sources supported
-
-
Everything in the free plan, plus...
-
-
License to use Cognee open source and Cognee Platform
-
1 day SLA
-
On-prem deployment
-
Hands-on support
-
Architecture review
-
Roadmap prioritization
-
Knowledge transfer
+
+
Everything in the free plan, plus...
+
+
License to use Cognee open source and Cognee Platform
+
1 day SLA
+
On-prem deployment
+
Hands-on support
+
Architecture review
+
Roadmap prioritization
+
Knowledge transfer
-
-
Everything in the free plan, plus...
-
-
Fully hosted cloud platform
-
Multi-tenant architecture
-
Comprehensive API endpoints
-
Automated scaling and parallel processing
-
Ability to group memories per user and domain
-
Automatic updates and priority support
-
1 GB ingestion + 10,000 API calls
+
+
Everything in the free plan, plus...
+
+
Fully hosted cloud platform
+
Multi-tenant architecture
+
Comprehensive API endpoints
+
Automated scaling and parallel processing
+
Ability to group memories per user and domain
+
Automatic updates and priority support
+
1 GB ingestion + 10,000 API calls
- Try for free + + Try for free +
- Talk to us + + Talk to us +
- Sign up for Cogwit Beta + + Sign up for Cogwit Beta +
@@ -106,7 +127,7 @@ export default function Plan() {
On-prem
Cloud
-
+
Data Sources
28+
28+
@@ -134,19 +155,19 @@ export default function Plan() {
-
+
Can I change my plan anytime?
Yes, you can upgrade or downgrade your plan at any time. Changes take effect immediately.
-
+
What happens to my data if I downgrade?
Your data is preserved, but features may be limited based on your new plan constraints.
-
+
Do you offer educational discounts?
Yes, we offer special pricing for educational institutions and students. Contact us for details.
-
+
Is there a free trial for paid plans?
All new accounts start with a 14-day free trial of our Pro plan features.
diff --git a/cognee-frontend/src/ui/Layout/Header.tsx b/cognee-frontend/src/ui/Layout/Header.tsx index 2f26433e9..7a1d2e906 100644 --- a/cognee-frontend/src/ui/Layout/Header.tsx +++ b/cognee-frontend/src/ui/Layout/Header.tsx @@ -32,20 +32,21 @@ export default function Header({ user }: HeaderProps) { return ( <> -
+
Cognee Local
- +
Sync
- - Premium + + Premium + API keys {/*
*/} diff --git a/cognee-frontend/src/ui/elements/GhostButton.tsx b/cognee-frontend/src/ui/elements/GhostButton.tsx index a27a0ff94..dc252decd 100644 --- a/cognee-frontend/src/ui/elements/GhostButton.tsx +++ b/cognee-frontend/src/ui/elements/GhostButton.tsx @@ -1,7 +1,7 @@ import classNames from "classnames"; import { ButtonHTMLAttributes } from "react"; -export default function CTAButton({ children, className, ...props }: ButtonHTMLAttributes) { +export default function GhostButton({ children, className, ...props }: ButtonHTMLAttributes) { return ( ); diff --git a/cognee-frontend/src/ui/elements/Modal/useModal.ts b/cognee-frontend/src/ui/elements/Modal/useModal.ts index 4947d32ca..d28333995 100644 --- a/cognee-frontend/src/ui/elements/Modal/useModal.ts +++ b/cognee-frontend/src/ui/elements/Modal/useModal.ts @@ -1,8 +1,8 @@ import { FormEvent, useCallback, useState } from "react"; import { useBoolean } from "@/utils"; -export default function useModal(initiallyOpen?: boolean, confirmCallback?: (state: object, event?: FormEvent) => Promise | ConfirmActionReturnType) { - const [modalState, setModalState] = useState({}); +export default function useModal>(initiallyOpen?: boolean, confirmCallback?: (state: ModalState, event?: ConfirmActionEvent) => Promise | void) { + const [modalState, setModalState] = useState(); const [isActionLoading, setLoading] = useState(false); const { @@ -11,7 +11,7 @@ export default function useModal(initiallyOpen?: setFalse: closeModalInternal, } = useBoolean(initiallyOpen || false); - const openModal = useCallback((state?: object) => { + const openModal = useCallback((state?: ModalState) => { if (state) { setModalState(state); } @@ -20,20 +20,21 @@ export default function useModal(initiallyOpen?: const closeModal = useCallback(() => { closeModalInternal(); - setModalState({}); + setModalState({} as ModalState); }, [closeModalInternal]); - const confirmAction = useCallback((event?: FormEvent) => { + const confirmAction = useCallback((event?: ConfirmActionEvent) => { if (confirmCallback) { setLoading(true); - const maybePromise = confirmCallback(modalState, event); + const maybePromise = confirmCallback(modalState as ModalState, event); if (maybePromise instanceof Promise) { return maybePromise .finally(closeModal) .finally(() => setLoading(false)); } else { + closeModal(); return maybePromise; // Not a promise. } } diff --git a/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx b/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx index 31c716b96..1401f95b4 100644 --- a/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx +++ b/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx @@ -2,10 +2,11 @@ import { v4 as uuid4 } from "uuid"; import classNames from "classnames"; -import { Fragment, MutableRefObject, useCallback, useEffect, useRef, useState } from "react"; +import { Fragment, MouseEvent, MutableRefObject, useCallback, useEffect, useRef, useState } from "react"; -import { CaretIcon, PlusIcon } from "@/ui/Icons"; -import { IconButton, PopupMenu, TextArea } from "@/ui/elements"; +import { useModal } from "@/ui/elements/Modal"; +import { CaretIcon, CloseIcon, PlusIcon } from "@/ui/Icons"; +import { IconButton, PopupMenu, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements"; import { GraphControlsAPI } from "@/app/(graph)/GraphControls"; import GraphVisualization, { GraphVisualizationAPI } from "@/app/(graph)/GraphVisualization"; @@ -60,13 +61,26 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook updateNotebook(newNotebook); }, [notebook, updateNotebook]); - const handleCellRemove = useCallback((cell: Cell) => { + const removeCell = useCallback((cell: Cell, event?: MouseEvent) => { + event?.preventDefault(); + updateNotebook({ ...notebook, cells: notebook.cells.filter((c: Cell) => c.id !== cell.id), }); }, [notebook, updateNotebook]); + const { + isModalOpen: isRemoveCellConfirmModalOpen, + openModal: openCellRemoveConfirmModal, + closeModal: closeCellRemoveConfirmModal, + confirmAction: handleCellRemoveConfirm, + } = useModal(false, removeCell); + + const handleCellRemove = useCallback((cell: Cell) => { + openCellRemoveConfirmModal(cell); + }, [openCellRemoveConfirmModal]); + const handleCellInputChange = useCallback((notebook: NotebookType, cell: Cell, value: string) => { const newCell = {...cell, content: value }; @@ -134,100 +148,133 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook }; return ( -
-
{notebook.name}
+ <> +
+
{notebook.name}
- {notebook.cells.map((cell: Cell, index) => ( - -
-
- {cell.type === "code" ? ( - <> -
- - - -
+ {notebook.cells.map((cell: Cell, index) => ( + +
+
+ {cell.type === "code" ? ( + <> +
+ + + +
- + - {openCells.has(cell.id) && ( - <> + {openCells.has(cell.id) && ( + <> +