From f3ce7be88588e1b5c126285b1cb39ca85dc234db Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 11 Dec 2024 14:31:54 +0100 Subject: [PATCH 01/16] feat: Add ability to send directories with data to cognee Add ability to send data directories to cognee Feature COG-656 --- cognee/tasks/ingestion/__init__.py | 1 + .../ingestion/resolve_data_directories.py | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 cognee/tasks/ingestion/resolve_data_directories.py diff --git a/cognee/tasks/ingestion/__init__.py b/cognee/tasks/ingestion/__init__.py index f569267a1..8b873b273 100644 --- a/cognee/tasks/ingestion/__init__.py +++ b/cognee/tasks/ingestion/__init__.py @@ -3,3 +3,4 @@ from .save_data_to_storage import save_data_to_storage from .save_data_item_to_storage import save_data_item_to_storage from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage from .ingest_data_with_metadata import ingest_data_with_metadata +from .resolve_data_directories import resolve_data_directories diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py new file mode 100644 index 000000000..5cde12642 --- /dev/null +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -0,0 +1,61 @@ +# import os +# from typing import List, Union, BinaryIO +# +# def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str]]): +# # We want to work with lists from now on +# if type(data) is not list: +# data = [data] +# +# # Check if data item in list is a directory +# for item in data: +# if type(item) is str: +# # If it's a directory add all files inside the directory to data list instead +# if os.path.isdir(item): +# pass + +import os +from typing import List, Union, BinaryIO + +def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str]], include_subdirectories: bool = True): + """ + Resolves directories by replacing them with their contained files. + + Args: + data: A single file, directory, or binary stream, or a list of such items. + include_subdirectories: Whether to include files in subdirectories recursively. + + Returns: + A list of resolved files and binary streams. + """ + # Ensure `data` is a list + if not isinstance(data, list): + data = [data] + + resolved_data = [] + + for item in data: + if isinstance(item, str): # Check if the item is a path + if os.path.isdir(item): # If it's a directory + if include_subdirectories: + # Recursively add all files in the directory and subdirectories + for root, _, files in os.walk(item): + resolved_data.extend([os.path.join(root, f) for f in files]) + else: + # Add all files (not subdirectories) in the directory + resolved_data.extend( + [os.path.join(item, f) for f in os.listdir(item) if os.path.isfile(os.path.join(item, f))] + ) + elif os.path.isfile(item): # If it's a file, add it to the resolved_data list + resolved_data.append(item) + else: + raise ValueError(f"Path '{item}' is neither a file nor a directory.") + elif isinstance(item, BinaryIO): # If it's a binary stream, add it directly + resolved_data.append(item) + else: + raise TypeError(f"Unsupported type: {type(item)}. Expected str or BinaryIO.") + + return resolved_data + +# Example usage: +# files = resolve_data_directories(["/path/to/dir", "/path/to/file.txt"], include_subdirectories=True) +# print(files) From d4e2eb717afaa5d2bacbdef6c54a0d71166ad2e4 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 11 Dec 2024 16:04:31 +0100 Subject: [PATCH 02/16] fix: fix existing edge check Resolve issue with UUID concat by casting to string Fix COG-656 --- cognee/modules/graph/utils/retrieve_existing_edges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/graph/utils/retrieve_existing_edges.py b/cognee/modules/graph/utils/retrieve_existing_edges.py index 50b5f3d7c..0959e79d8 100644 --- a/cognee/modules/graph/utils/retrieve_existing_edges.py +++ b/cognee/modules/graph/utils/retrieve_existing_edges.py @@ -53,6 +53,6 @@ async def retrieve_existing_edges( existing_edges_map = {} for edge in existing_edges: - existing_edges_map[edge[0] + edge[1] + edge[2]] = True + existing_edges_map[str(edge[0]) + str(edge[1]) + edge[2]] = True return existing_edges_map From d9d90d91ae1fccabc9dd5556750c5cbcbe33e60c Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 11 Dec 2024 16:49:34 +0100 Subject: [PATCH 03/16] chore: Remove comments from code Remove code comments that are not needed Chore COG-656 --- .../ingestion/resolve_data_directories.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 5cde12642..4c7350c06 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -1,18 +1,3 @@ -# import os -# from typing import List, Union, BinaryIO -# -# def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str]]): -# # We want to work with lists from now on -# if type(data) is not list: -# data = [data] -# -# # Check if data item in list is a directory -# for item in data: -# if type(item) is str: -# # If it's a directory add all files inside the directory to data list instead -# if os.path.isdir(item): -# pass - import os from typing import List, Union, BinaryIO @@ -55,7 +40,3 @@ def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str raise TypeError(f"Unsupported type: {type(item)}. Expected str or BinaryIO.") return resolved_data - -# Example usage: -# files = resolve_data_directories(["/path/to/dir", "/path/to/file.txt"], include_subdirectories=True) -# print(files) From ff9fd90cf1bf4083925be991b22512e3bcf06474 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 11 Dec 2024 17:33:51 +0100 Subject: [PATCH 04/16] feat: Add directory resolution as step in cognee add function Added directory resolution as step in cognee add function Feature COG-656 --- cognee/api/v1/add/add_v2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/add/add_v2.py b/cognee/api/v1/add/add_v2.py index 631d963e5..9d07f31e4 100644 --- a/cognee/api/v1/add/add_v2.py +++ b/cognee/api/v1/add/add_v2.py @@ -2,7 +2,7 @@ from typing import Union, BinaryIO from cognee.modules.users.models import User from cognee.modules.users.methods import get_default_user from cognee.modules.pipelines import run_tasks, Task -from cognee.tasks.ingestion import ingest_data_with_metadata +from cognee.tasks.ingestion import ingest_data_with_metadata, resolve_data_directories from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables @@ -13,6 +13,9 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam if user is None: user = await get_default_user() + # Resolve all directories from data to files + data = resolve_data_directories(data) + tasks = [ Task(ingest_data_with_metadata, dataset_name, user) ] From 9b4af854742de51cce5db4c97c668c07411e09e7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 12 Dec 2024 13:31:20 +0100 Subject: [PATCH 05/16] fix: Resolve issue with text being submitted as data Add support for text data to resolving data directory task Fix COG-656 --- cognee/tasks/ingestion/resolve_data_directories.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 4c7350c06..52ab27111 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -32,8 +32,8 @@ def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str ) elif os.path.isfile(item): # If it's a file, add it to the resolved_data list resolved_data.append(item) - else: - raise ValueError(f"Path '{item}' is neither a file nor a directory.") + else: # If it's just text add it directly + resolved_data.append(item) elif isinstance(item, BinaryIO): # If it's a binary stream, add it directly resolved_data.append(item) else: From 7100a4994a5dd2b3343c99a136c436f549eba5c8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 12 Dec 2024 17:04:49 +0100 Subject: [PATCH 06/16] feat: Add resolving of directories as task for the add pipeline Add resolving of directories as task for the add pipeline Feature COG-656 --- cognee/api/v1/add/add_v2.py | 4 +--- cognee/tasks/ingestion/resolve_data_directories.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cognee/api/v1/add/add_v2.py b/cognee/api/v1/add/add_v2.py index 9d07f31e4..637c4a187 100644 --- a/cognee/api/v1/add/add_v2.py +++ b/cognee/api/v1/add/add_v2.py @@ -13,10 +13,8 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam if user is None: user = await get_default_user() - # Resolve all directories from data to files - data = resolve_data_directories(data) - tasks = [ + Task(resolve_data_directories), Task(ingest_data_with_metadata, dataset_name, user) ] diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 52ab27111..599a9b342 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -1,7 +1,7 @@ import os from typing import List, Union, BinaryIO -def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str]], include_subdirectories: bool = True): +async def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str]], include_subdirectories: bool = True): """ Resolves directories by replacing them with their contained files. From 92d0122b467d3d11538246ed6d8652bd238f4ae8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 09:55:47 +0100 Subject: [PATCH 07/16] fix: Remove data handling based on type in resolving directory function No need to handle different data types in resolving directories, focus on just handling case when it's a directory Fix COG-656 --- cognee/tasks/ingestion/resolve_data_directories.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 599a9b342..980756805 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -30,13 +30,8 @@ async def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, Li resolved_data.extend( [os.path.join(item, f) for f in os.listdir(item) if os.path.isfile(os.path.join(item, f))] ) - elif os.path.isfile(item): # If it's a file, add it to the resolved_data list + else: # If it's a file or text add it directly resolved_data.append(item) - else: # If it's just text add it directly - resolved_data.append(item) - elif isinstance(item, BinaryIO): # If it's a binary stream, add it directly + else: # If it's not a string add it directly resolved_data.append(item) - else: - raise TypeError(f"Unsupported type: {type(item)}. Expected str or BinaryIO.") - return resolved_data From eddfc17861361e394d6c758906700a4226030963 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 12:13:42 +0100 Subject: [PATCH 08/16] fix: Rewrite endpoint to add users to groups Rewrote endpoint which adds users to groups Fix COG-656 --- .../routers/get_permissions_router.py | 21 ++++++++++++------- cognee/modules/users/models/__init__.py | 1 + 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 8d012d600..77ae5e4e6 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -1,16 +1,18 @@ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from fastapi.responses import JSONResponse from sqlalchemy.orm import Session +from sqlalchemy.future import select +from sqlalchemy import insert from cognee.modules.users.exceptions import UserNotFoundError, GroupNotFoundError from cognee.modules.users import get_user_db -from cognee.modules.users.models import User, Group, Permission +from cognee.modules.users.models import User, Group, Permission, UserGroup def get_permissions_router() -> APIRouter: permissions_router = APIRouter() @permissions_router.post("/groups/{group_id}/permissions") - async def give_permission_to_group(group_id: int, permission: str, db: Session = Depends(get_user_db)): + async def give_permission_to_group(group_id: str, permission: str, db: Session = Depends(get_user_db)): group = db.query(Group).filter(Group.id == group_id).first() if not group: @@ -29,18 +31,21 @@ def get_permissions_router() -> APIRouter: return JSONResponse(status_code = 200, content = {"message": "Permission assigned to group"}) @permissions_router.post("/users/{user_id}/groups") - async def add_user_to_group(user_id: int, group_id: int, db: Session = Depends(get_user_db)): - user = db.query(User).filter(User.id == user_id).first() - group = db.query(Group).filter(Group.id == group_id).first() + async def add_user_to_group(user_id: str, group_id: str, db: Session = Depends(get_user_db)): + user = (await db.session.execute(select(User).where(User.id == user_id))).scalars().first() + group = (await db.session.execute(select(Group).where(Group.id == group_id))).scalars().first() if not user: raise UserNotFoundError elif not group: raise GroupNotFoundError - user.groups.append(group) + # Add association directly to the association table + stmt = insert(UserGroup).values(user_id=user_id, group_id=group_id) + await db.session.execute(stmt) + #user.groups.append(group) - db.commit() + await db.session.commit() return JSONResponse(status_code = 200, content = {"message": "User added to group"}) diff --git a/cognee/modules/users/models/__init__.py b/cognee/modules/users/models/__init__.py index 7dc1bf8ca..3d359da76 100644 --- a/cognee/modules/users/models/__init__.py +++ b/cognee/modules/users/models/__init__.py @@ -1,5 +1,6 @@ from .User import User from .Group import Group +from .UserGroup import UserGroup from .Resource import Resource from .Permission import Permission from .ACL import ACL From b8ba436dba2a17d17ab166ac6916a7f3d4758869 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 12:37:01 +0100 Subject: [PATCH 09/16] fix: Resolve issue with adding permissions to groups Resolve issue with adding permissions to groups Fix COG-656 --- .../routers/get_permissions_router.py | 22 +++++++++++-------- .../modules/users/models/GroupPermission.py | 11 ++++++++++ cognee/modules/users/models/__init__.py | 1 + 3 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 cognee/modules/users/models/GroupPermission.py diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 77ae5e4e6..2999b8a27 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -6,27 +6,32 @@ from sqlalchemy import insert from cognee.modules.users.exceptions import UserNotFoundError, GroupNotFoundError from cognee.modules.users import get_user_db -from cognee.modules.users.models import User, Group, Permission, UserGroup +from cognee.modules.users.models import User, Group, Permission, UserGroup, GroupPermission def get_permissions_router() -> APIRouter: permissions_router = APIRouter() @permissions_router.post("/groups/{group_id}/permissions") async def give_permission_to_group(group_id: str, permission: str, db: Session = Depends(get_user_db)): - group = db.query(Group).filter(Group.id == group_id).first() + group = (await db.session.execute(select(Group).where(Group.id == group_id))).scalars().first() if not group: raise GroupNotFoundError - permission = db.query(Permission).filter(Permission.name == permission).first() + permission_entity = ( + await db.session.execute(select(Permission).where(Permission.name == permission))).scalars().first() - if not permission: - permission = Permission(name = permission) - db.add(permission) + if not permission_entity: + stmt = insert(Permission).values(name=permission) + ret_val = await db.session.execute(stmt) - group.permissions.append(permission) + permission_entity = ( + await db.session.execute(select(Permission).where(Permission.name == permission))).scalars().first() - db.commit() + # add permission to group + await db.session.execute(insert(GroupPermission).values(group_id=group.id, permission_id=permission_entity.id)) + + await db.session.commit() return JSONResponse(status_code = 200, content = {"message": "Permission assigned to group"}) @@ -43,7 +48,6 @@ def get_permissions_router() -> APIRouter: # Add association directly to the association table stmt = insert(UserGroup).values(user_id=user_id, group_id=group_id) await db.session.execute(stmt) - #user.groups.append(group) await db.session.commit() diff --git a/cognee/modules/users/models/GroupPermission.py b/cognee/modules/users/models/GroupPermission.py new file mode 100644 index 000000000..eaf3630b4 --- /dev/null +++ b/cognee/modules/users/models/GroupPermission.py @@ -0,0 +1,11 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base + +class GroupPermission(Base): + __tablename__ = "group_permissions" + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + + group_id = Column(UUID, ForeignKey("groups.id"), primary_key = True) + permission_id = Column(UUID, ForeignKey("permissions.id"), primary_key = True) diff --git a/cognee/modules/users/models/__init__.py b/cognee/modules/users/models/__init__.py index 3d359da76..a713798d5 100644 --- a/cognee/modules/users/models/__init__.py +++ b/cognee/modules/users/models/__init__.py @@ -1,6 +1,7 @@ from .User import User from .Group import Group from .UserGroup import UserGroup +from .GroupPermission import GroupPermission from .Resource import Resource from .Permission import Permission from .ACL import ACL From 11808394699a813c495f5cd798e42d30bc453e02 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 12:49:57 +0100 Subject: [PATCH 10/16] feat: Add error handling in case user is already part of database and permission already given to group Added error handling in case permission is already given to group and user is already part of group Feature COG-656 --- .../routers/get_permissions_router.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 2999b8a27..2b30f62fd 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -3,7 +3,9 @@ from fastapi.responses import JSONResponse from sqlalchemy.orm import Session from sqlalchemy.future import select from sqlalchemy import insert +from sqlalchemy.exc import IntegrityError +from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.modules.users.exceptions import UserNotFoundError, GroupNotFoundError from cognee.modules.users import get_user_db from cognee.modules.users.models import User, Group, Permission, UserGroup, GroupPermission @@ -23,13 +25,16 @@ def get_permissions_router() -> APIRouter: if not permission_entity: stmt = insert(Permission).values(name=permission) - ret_val = await db.session.execute(stmt) + await db.session.execute(stmt) + permission_entity = ( + await db.session.execute(select(Permission).where(Permission.name == permission))).scalars().first() - permission_entity = ( - await db.session.execute(select(Permission).where(Permission.name == permission))).scalars().first() - - # add permission to group - await db.session.execute(insert(GroupPermission).values(group_id=group.id, permission_id=permission_entity.id)) + try: + # add permission to group + await db.session.execute( + insert(GroupPermission).values(group_id=group.id, permission_id=permission_entity.id)) + except IntegrityError as e: + raise EntityAlreadyExistsError(message="Group permission already exists.") await db.session.commit() @@ -45,9 +50,12 @@ def get_permissions_router() -> APIRouter: elif not group: raise GroupNotFoundError - # Add association directly to the association table - stmt = insert(UserGroup).values(user_id=user_id, group_id=group_id) - await db.session.execute(stmt) + try: + # Add association directly to the association table + stmt = insert(UserGroup).values(user_id=user_id, group_id=group_id) + await db.session.execute(stmt) + except IntegrityError as e: + raise EntityAlreadyExistsError(message="User is already part of group.") await db.session.commit() From 43187e4d63f9d1604ca439a346401faad57b1e27 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 13:54:45 +0100 Subject: [PATCH 11/16] feat: Add user verification for accessing data Verify user has access to data before returning it Feature COG-656 --- cognee/api/v1/datasets/routers/get_datasets_router.py | 8 ++++++-- cognee/modules/data/methods/get_data.py | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 31e3fa67d..1ba96a232 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -76,7 +76,7 @@ def get_datasets_router() -> APIRouter: message=f"Dataset ({dataset_id}) not found." ) - data = await get_data(data_id) + data = await get_data(user.id, data_id) if data is None: raise EntityNotFoundError( @@ -141,6 +141,7 @@ def get_datasets_router() -> APIRouter: @router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse) async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)): + from cognee.modules.data.methods import get_data from cognee.modules.data.methods import get_dataset, get_dataset_data dataset = await get_dataset(user.id, dataset_id) @@ -164,7 +165,10 @@ def get_datasets_router() -> APIRouter: if len(matching_data) == 0: raise EntityNotFoundError(message= f"Data ({data_id}) not found in dataset ({dataset_id}).") - data = matching_data[0] + data = await get_data(user.id, data_id) + + if data is None: + raise EntityNotFoundError(message=f"Data ({data_id}) not found in dataset ({dataset_id}).") return data.raw_data_location diff --git a/cognee/modules/data/methods/get_data.py b/cognee/modules/data/methods/get_data.py index b07401463..9161e3df2 100644 --- a/cognee/modules/data/methods/get_data.py +++ b/cognee/modules/data/methods/get_data.py @@ -3,10 +3,11 @@ from typing import Optional from cognee.infrastructure.databases.relational import get_relational_engine from ..models import Data -async def get_data(data_id: UUID) -> Optional[Data]: +async def get_data(user_id: UUID, data_id: UUID) -> Optional[Data]: """Retrieve data by ID. Args: + user_id (UUID): user ID data_id (UUID): ID of the data to retrieve Returns: @@ -17,4 +18,7 @@ async def get_data(data_id: UUID) -> Optional[Data]: async with db_engine.get_async_session() as session: data = await session.get(Data, data_id) + if data and data.owner_id != user_id: + return None + return data \ No newline at end of file From 9c3e2422f305a7da481b09b1bdd6756caa99d168 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 15:18:33 +0100 Subject: [PATCH 12/16] feat: Add compute search to cognee Add compute search to cognee which makes searches human readable Feature COG-656 --- cognee/api/v1/search/search_v2.py | 3 ++ cognee/tasks/compute/__init__.py | 1 + cognee/tasks/compute/exceptions/__init__.py | 9 +++++ cognee/tasks/compute/exceptions/exceptions.py | 11 ++++++ cognee/tasks/compute/query_compute.py | 36 +++++++++++++++++++ 5 files changed, 60 insertions(+) create mode 100644 cognee/tasks/compute/__init__.py create mode 100644 cognee/tasks/compute/exceptions/__init__.py create mode 100644 cognee/tasks/compute/exceptions/exceptions.py create mode 100644 cognee/tasks/compute/query_compute.py diff --git a/cognee/api/v1/search/search_v2.py b/cognee/api/v1/search/search_v2.py index d77aa5fa8..516c7b11d 100644 --- a/cognee/api/v1/search/search_v2.py +++ b/cognee/api/v1/search/search_v2.py @@ -14,11 +14,13 @@ from cognee.modules.users.permissions.methods import get_document_ids_for_user from cognee.tasks.chunks import query_chunks from cognee.tasks.graph import query_graph_connections from cognee.tasks.summarization import query_summaries +from cognee.tasks.compute import query_compute class SearchType(Enum): SUMMARIES = "SUMMARIES" INSIGHTS = "INSIGHTS" CHUNKS = "CHUNKS" + COMPUTE = "COMPUTE" async def search(query_type: SearchType, query_text: str, user: User = None) -> list: if user is None: @@ -50,6 +52,7 @@ async def specific_search(query_type: SearchType, query: str, user) -> list: SearchType.SUMMARIES: query_summaries, SearchType.INSIGHTS: query_graph_connections, SearchType.CHUNKS: query_chunks, + SearchType.COMPUTE: query_compute, } search_task = search_tasks.get(query_type) diff --git a/cognee/tasks/compute/__init__.py b/cognee/tasks/compute/__init__.py new file mode 100644 index 000000000..7f6ed58fd --- /dev/null +++ b/cognee/tasks/compute/__init__.py @@ -0,0 +1 @@ +from .query_compute import query_compute \ No newline at end of file diff --git a/cognee/tasks/compute/exceptions/__init__.py b/cognee/tasks/compute/exceptions/__init__.py new file mode 100644 index 000000000..5f80e6ecc --- /dev/null +++ b/cognee/tasks/compute/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various compute errors +""" + +from .exceptions import ( + NoRelevantDataFound, +) \ No newline at end of file diff --git a/cognee/tasks/compute/exceptions/exceptions.py b/cognee/tasks/compute/exceptions/exceptions.py new file mode 100644 index 000000000..9b64c01a6 --- /dev/null +++ b/cognee/tasks/compute/exceptions/exceptions.py @@ -0,0 +1,11 @@ +from cognee.exceptions import CogneeApiError +from fastapi import status + +class NoRelevantDataFound(CogneeApiError): + def __init__( + self, + message: str = "Search did not find any data.", + name: str = "NoRelevantDataFound", + status_code=status.HTTP_404_NOT_FOUND, + ): + super().__init__(message, name, status_code) \ No newline at end of file diff --git a/cognee/tasks/compute/query_compute.py b/cognee/tasks/compute/query_compute.py new file mode 100644 index 000000000..1686701fe --- /dev/null +++ b/cognee/tasks/compute/query_compute.py @@ -0,0 +1,36 @@ +from cognee.infrastructure.databases.vector import get_vector_engine +from cognee.tasks.compute.exceptions import NoRelevantDataFound +from cognee.infrastructure.llm.get_llm_client import get_llm_client +from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt + + +async def query_compute(query: str) -> list: + """ + Parameters: + - query (str): The query string to compute. + + Returns: + - list: Answer to the query. + """ + vector_engine = get_vector_engine() + + found_chunks = await vector_engine.search("document_chunk_text", query, limit = 1) + + if len(found_chunks) == 0: + raise NoRelevantDataFound + + args = { + "question": query, + "context": found_chunks[0].payload["text"], + } + user_prompt = render_prompt("context_for_question.txt", args) + system_prompt = read_query_prompt("answer_hotpot_using_cognee_search.txt") + + llm_client = get_llm_client() + computed_answer = await llm_client.acreate_structured_output( + text_input=user_prompt, + system_prompt=system_prompt, + response_model=str, + ) + + return [computed_answer] From 67585d0ab1eee7151547d092671469d06d1041b2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 15:30:24 +0100 Subject: [PATCH 13/16] feat: Add simple instruction for system prompt Add simple instruction for system prompt Feature COG-656 --- cognee/infrastructure/llm/prompts/answer_simple_question.txt | 1 + cognee/tasks/compute/query_compute.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 cognee/infrastructure/llm/prompts/answer_simple_question.txt diff --git a/cognee/infrastructure/llm/prompts/answer_simple_question.txt b/cognee/infrastructure/llm/prompts/answer_simple_question.txt new file mode 100644 index 000000000..351e1e5e9 --- /dev/null +++ b/cognee/infrastructure/llm/prompts/answer_simple_question.txt @@ -0,0 +1 @@ +Answer the question using the provided context. Be as brief as possible. \ No newline at end of file diff --git a/cognee/tasks/compute/query_compute.py b/cognee/tasks/compute/query_compute.py index 1686701fe..cd606ddc5 100644 --- a/cognee/tasks/compute/query_compute.py +++ b/cognee/tasks/compute/query_compute.py @@ -24,7 +24,7 @@ async def query_compute(query: str) -> list: "context": found_chunks[0].payload["text"], } user_prompt = render_prompt("context_for_question.txt", args) - system_prompt = read_query_prompt("answer_hotpot_using_cognee_search.txt") + system_prompt = read_query_prompt("answer_simple_question.txt") llm_client = get_llm_client() computed_answer = await llm_client.acreate_structured_output( From 11634cb58d495cd8ead5caabc27beab6958ff1dd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 16:54:53 +0100 Subject: [PATCH 14/16] feat: Add unauth access error to getting data Raise unauth access error when trying to read data without access Feature COG-656 --- cognee/modules/data/exceptions/__init__.py | 1 + cognee/modules/data/exceptions/exceptions.py | 9 +++++++++ cognee/modules/data/methods/get_data.py | 3 ++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cognee/modules/data/exceptions/__init__.py b/cognee/modules/data/exceptions/__init__.py index fa8468c88..6f74c627e 100644 --- a/cognee/modules/data/exceptions/__init__.py +++ b/cognee/modules/data/exceptions/__init__.py @@ -6,4 +6,5 @@ This module defines a set of exceptions for handling various data errors from .exceptions import ( UnstructuredLibraryImportError, + UnauthorizedDataAccessError, ) \ No newline at end of file diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index 3b1aac52c..7d7bbd632 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -7,5 +7,14 @@ class UnstructuredLibraryImportError(CogneeApiError): message: str = "Import error. Unstructured library is not installed.", name: str = "UnstructuredModuleImportError", status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + ): + super().__init__(message, name, status_code) + +class UnauthorizedDataAccessError(CogneeApiError): + def __init__( + self, + message: str = "Usesr does not have permission to access this data.", + name: str = "UnauthorizedDataAccessError", + status_code=status.HTTP_401_UNAUTHORIZED, ): super().__init__(message, name, status_code) \ No newline at end of file diff --git a/cognee/modules/data/methods/get_data.py b/cognee/modules/data/methods/get_data.py index 9161e3df2..d7daff29b 100644 --- a/cognee/modules/data/methods/get_data.py +++ b/cognee/modules/data/methods/get_data.py @@ -1,6 +1,7 @@ from uuid import UUID from typing import Optional from cognee.infrastructure.databases.relational import get_relational_engine +from ..exceptions import UnauthorizedDataAccessError from ..models import Data async def get_data(user_id: UUID, data_id: UUID) -> Optional[Data]: @@ -19,6 +20,6 @@ async def get_data(user_id: UUID, data_id: UUID) -> Optional[Data]: data = await session.get(Data, data_id) if data and data.owner_id != user_id: - return None + raise UnauthorizedDataAccessError(message=f"User {user_id} is not authorized to access data {data_id}") return data \ No newline at end of file From 924759a599e7be2f26977a1cfd95d87ac121092d Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 17:03:38 +0100 Subject: [PATCH 15/16] refactor: Rename query compute to query completion Rename searching type from compute to completion Refactor COG-656 --- cognee/api/v1/search/search_v2.py | 6 +++--- cognee/tasks/completion/__init__.py | 1 + cognee/tasks/{compute => completion}/exceptions/__init__.py | 0 .../tasks/{compute => completion}/exceptions/exceptions.py | 0 .../query_compute.py => completion/query_completion.py} | 4 ++-- cognee/tasks/compute/__init__.py | 1 - 6 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 cognee/tasks/completion/__init__.py rename cognee/tasks/{compute => completion}/exceptions/__init__.py (100%) rename cognee/tasks/{compute => completion}/exceptions/exceptions.py (100%) rename cognee/tasks/{compute/query_compute.py => completion/query_completion.py} (89%) delete mode 100644 cognee/tasks/compute/__init__.py diff --git a/cognee/api/v1/search/search_v2.py b/cognee/api/v1/search/search_v2.py index 516c7b11d..6a5da4648 100644 --- a/cognee/api/v1/search/search_v2.py +++ b/cognee/api/v1/search/search_v2.py @@ -14,13 +14,13 @@ from cognee.modules.users.permissions.methods import get_document_ids_for_user from cognee.tasks.chunks import query_chunks from cognee.tasks.graph import query_graph_connections from cognee.tasks.summarization import query_summaries -from cognee.tasks.compute import query_compute +from cognee.tasks.completion import query_completion class SearchType(Enum): SUMMARIES = "SUMMARIES" INSIGHTS = "INSIGHTS" CHUNKS = "CHUNKS" - COMPUTE = "COMPUTE" + COMPLETION = "COMPLETION" async def search(query_type: SearchType, query_text: str, user: User = None) -> list: if user is None: @@ -52,7 +52,7 @@ async def specific_search(query_type: SearchType, query: str, user) -> list: SearchType.SUMMARIES: query_summaries, SearchType.INSIGHTS: query_graph_connections, SearchType.CHUNKS: query_chunks, - SearchType.COMPUTE: query_compute, + SearchType.COMPLETION: query_completion, } search_task = search_tasks.get(query_type) diff --git a/cognee/tasks/completion/__init__.py b/cognee/tasks/completion/__init__.py new file mode 100644 index 000000000..1bf0fa6bb --- /dev/null +++ b/cognee/tasks/completion/__init__.py @@ -0,0 +1 @@ +from .query_completion import query_completion \ No newline at end of file diff --git a/cognee/tasks/compute/exceptions/__init__.py b/cognee/tasks/completion/exceptions/__init__.py similarity index 100% rename from cognee/tasks/compute/exceptions/__init__.py rename to cognee/tasks/completion/exceptions/__init__.py diff --git a/cognee/tasks/compute/exceptions/exceptions.py b/cognee/tasks/completion/exceptions/exceptions.py similarity index 100% rename from cognee/tasks/compute/exceptions/exceptions.py rename to cognee/tasks/completion/exceptions/exceptions.py diff --git a/cognee/tasks/compute/query_compute.py b/cognee/tasks/completion/query_completion.py similarity index 89% rename from cognee/tasks/compute/query_compute.py rename to cognee/tasks/completion/query_completion.py index cd606ddc5..5324676f8 100644 --- a/cognee/tasks/compute/query_compute.py +++ b/cognee/tasks/completion/query_completion.py @@ -1,10 +1,10 @@ from cognee.infrastructure.databases.vector import get_vector_engine -from cognee.tasks.compute.exceptions import NoRelevantDataFound +from cognee.tasks.completion.exceptions import NoRelevantDataFound from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt -async def query_compute(query: str) -> list: +async def query_completion(query: str) -> list: """ Parameters: - query (str): The query string to compute. diff --git a/cognee/tasks/compute/__init__.py b/cognee/tasks/compute/__init__.py deleted file mode 100644 index 7f6ed58fd..000000000 --- a/cognee/tasks/compute/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .query_compute import query_compute \ No newline at end of file From 35b1f7d26afad735bdebf5b0de396031d49690bf Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 13 Dec 2024 17:08:05 +0100 Subject: [PATCH 16/16] chore: Update typo in code Update typo in string in code Chore COG-656 --- cognee/modules/data/exceptions/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index 7d7bbd632..5117f3cac 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -13,7 +13,7 @@ class UnstructuredLibraryImportError(CogneeApiError): class UnauthorizedDataAccessError(CogneeApiError): def __init__( self, - message: str = "Usesr does not have permission to access this data.", + message: str = "User does not have permission to access this data.", name: str = "UnauthorizedDataAccessError", status_code=status.HTTP_401_UNAUTHORIZED, ):