Merge pull request #368 from topoteretes/COG-656-deployment-state
Cog 656 deployment state
This commit is contained in:
commit
2508727f54
16 changed files with 168 additions and 19 deletions
|
|
@ -2,7 +2,7 @@ from typing import Union, BinaryIO
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.pipelines import run_tasks, Task
|
from cognee.modules.pipelines import run_tasks, Task
|
||||||
from cognee.tasks.ingestion import ingest_data_with_metadata
|
from cognee.tasks.ingestion import ingest_data_with_metadata, resolve_data_directories
|
||||||
from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables
|
from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables
|
||||||
from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables
|
from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables
|
||||||
|
|
||||||
|
|
@ -14,6 +14,7 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
||||||
tasks = [
|
tasks = [
|
||||||
|
Task(resolve_data_directories),
|
||||||
Task(ingest_data_with_metadata, dataset_name, user)
|
Task(ingest_data_with_metadata, dataset_name, user)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ def get_datasets_router() -> APIRouter:
|
||||||
message=f"Dataset ({dataset_id}) not found."
|
message=f"Dataset ({dataset_id}) not found."
|
||||||
)
|
)
|
||||||
|
|
||||||
data = await get_data(data_id)
|
data = await get_data(user.id, data_id)
|
||||||
|
|
||||||
if data is None:
|
if data is None:
|
||||||
raise EntityNotFoundError(
|
raise EntityNotFoundError(
|
||||||
|
|
@ -141,6 +141,7 @@ def get_datasets_router() -> APIRouter:
|
||||||
|
|
||||||
@router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
|
@router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
|
||||||
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
|
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.data.methods import get_data
|
||||||
from cognee.modules.data.methods import get_dataset, get_dataset_data
|
from cognee.modules.data.methods import get_dataset, get_dataset_data
|
||||||
|
|
||||||
dataset = await get_dataset(user.id, dataset_id)
|
dataset = await get_dataset(user.id, dataset_id)
|
||||||
|
|
@ -164,7 +165,10 @@ def get_datasets_router() -> APIRouter:
|
||||||
if len(matching_data) == 0:
|
if len(matching_data) == 0:
|
||||||
raise EntityNotFoundError(message= f"Data ({data_id}) not found in dataset ({dataset_id}).")
|
raise EntityNotFoundError(message= f"Data ({data_id}) not found in dataset ({dataset_id}).")
|
||||||
|
|
||||||
data = matching_data[0]
|
data = await get_data(user.id, data_id)
|
||||||
|
|
||||||
|
if data is None:
|
||||||
|
raise EntityNotFoundError(message=f"Data ({data_id}) not found in dataset ({dataset_id}).")
|
||||||
|
|
||||||
return data.raw_data_location
|
return data.raw_data_location
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,46 +1,63 @@
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
from sqlalchemy.future import select
|
||||||
|
from sqlalchemy import insert
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
||||||
from cognee.modules.users.exceptions import UserNotFoundError, GroupNotFoundError
|
from cognee.modules.users.exceptions import UserNotFoundError, GroupNotFoundError
|
||||||
from cognee.modules.users import get_user_db
|
from cognee.modules.users import get_user_db
|
||||||
from cognee.modules.users.models import User, Group, Permission
|
from cognee.modules.users.models import User, Group, Permission, UserGroup, GroupPermission
|
||||||
|
|
||||||
def get_permissions_router() -> APIRouter:
|
def get_permissions_router() -> APIRouter:
|
||||||
permissions_router = APIRouter()
|
permissions_router = APIRouter()
|
||||||
|
|
||||||
@permissions_router.post("/groups/{group_id}/permissions")
|
@permissions_router.post("/groups/{group_id}/permissions")
|
||||||
async def give_permission_to_group(group_id: int, permission: str, db: Session = Depends(get_user_db)):
|
async def give_permission_to_group(group_id: str, permission: str, db: Session = Depends(get_user_db)):
|
||||||
group = db.query(Group).filter(Group.id == group_id).first()
|
group = (await db.session.execute(select(Group).where(Group.id == group_id))).scalars().first()
|
||||||
|
|
||||||
if not group:
|
if not group:
|
||||||
raise GroupNotFoundError
|
raise GroupNotFoundError
|
||||||
|
|
||||||
permission = db.query(Permission).filter(Permission.name == permission).first()
|
permission_entity = (
|
||||||
|
await db.session.execute(select(Permission).where(Permission.name == permission))).scalars().first()
|
||||||
|
|
||||||
if not permission:
|
if not permission_entity:
|
||||||
permission = Permission(name = permission)
|
stmt = insert(Permission).values(name=permission)
|
||||||
db.add(permission)
|
await db.session.execute(stmt)
|
||||||
|
permission_entity = (
|
||||||
|
await db.session.execute(select(Permission).where(Permission.name == permission))).scalars().first()
|
||||||
|
|
||||||
group.permissions.append(permission)
|
try:
|
||||||
|
# add permission to group
|
||||||
|
await db.session.execute(
|
||||||
|
insert(GroupPermission).values(group_id=group.id, permission_id=permission_entity.id))
|
||||||
|
except IntegrityError as e:
|
||||||
|
raise EntityAlreadyExistsError(message="Group permission already exists.")
|
||||||
|
|
||||||
db.commit()
|
await db.session.commit()
|
||||||
|
|
||||||
return JSONResponse(status_code = 200, content = {"message": "Permission assigned to group"})
|
return JSONResponse(status_code = 200, content = {"message": "Permission assigned to group"})
|
||||||
|
|
||||||
@permissions_router.post("/users/{user_id}/groups")
|
@permissions_router.post("/users/{user_id}/groups")
|
||||||
async def add_user_to_group(user_id: int, group_id: int, db: Session = Depends(get_user_db)):
|
async def add_user_to_group(user_id: str, group_id: str, db: Session = Depends(get_user_db)):
|
||||||
user = db.query(User).filter(User.id == user_id).first()
|
user = (await db.session.execute(select(User).where(User.id == user_id))).scalars().first()
|
||||||
group = db.query(Group).filter(Group.id == group_id).first()
|
group = (await db.session.execute(select(Group).where(Group.id == group_id))).scalars().first()
|
||||||
|
|
||||||
if not user:
|
if not user:
|
||||||
raise UserNotFoundError
|
raise UserNotFoundError
|
||||||
elif not group:
|
elif not group:
|
||||||
raise GroupNotFoundError
|
raise GroupNotFoundError
|
||||||
|
|
||||||
user.groups.append(group)
|
try:
|
||||||
|
# Add association directly to the association table
|
||||||
|
stmt = insert(UserGroup).values(user_id=user_id, group_id=group_id)
|
||||||
|
await db.session.execute(stmt)
|
||||||
|
except IntegrityError as e:
|
||||||
|
raise EntityAlreadyExistsError(message="User is already part of group.")
|
||||||
|
|
||||||
db.commit()
|
await db.session.commit()
|
||||||
|
|
||||||
return JSONResponse(status_code = 200, content = {"message": "User added to group"})
|
return JSONResponse(status_code = 200, content = {"message": "User added to group"})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,11 +14,13 @@ from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
||||||
from cognee.tasks.chunks import query_chunks
|
from cognee.tasks.chunks import query_chunks
|
||||||
from cognee.tasks.graph import query_graph_connections
|
from cognee.tasks.graph import query_graph_connections
|
||||||
from cognee.tasks.summarization import query_summaries
|
from cognee.tasks.summarization import query_summaries
|
||||||
|
from cognee.tasks.completion import query_completion
|
||||||
|
|
||||||
class SearchType(Enum):
|
class SearchType(Enum):
|
||||||
SUMMARIES = "SUMMARIES"
|
SUMMARIES = "SUMMARIES"
|
||||||
INSIGHTS = "INSIGHTS"
|
INSIGHTS = "INSIGHTS"
|
||||||
CHUNKS = "CHUNKS"
|
CHUNKS = "CHUNKS"
|
||||||
|
COMPLETION = "COMPLETION"
|
||||||
|
|
||||||
async def search(query_type: SearchType, query_text: str, user: User = None) -> list:
|
async def search(query_type: SearchType, query_text: str, user: User = None) -> list:
|
||||||
if user is None:
|
if user is None:
|
||||||
|
|
@ -50,6 +52,7 @@ async def specific_search(query_type: SearchType, query: str, user) -> list:
|
||||||
SearchType.SUMMARIES: query_summaries,
|
SearchType.SUMMARIES: query_summaries,
|
||||||
SearchType.INSIGHTS: query_graph_connections,
|
SearchType.INSIGHTS: query_graph_connections,
|
||||||
SearchType.CHUNKS: query_chunks,
|
SearchType.CHUNKS: query_chunks,
|
||||||
|
SearchType.COMPLETION: query_completion,
|
||||||
}
|
}
|
||||||
|
|
||||||
search_task = search_tasks.get(query_type)
|
search_task = search_tasks.get(query_type)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
Answer the question using the provided context. Be as brief as possible.
|
||||||
|
|
@ -6,4 +6,5 @@ This module defines a set of exceptions for handling various data errors
|
||||||
|
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
UnstructuredLibraryImportError,
|
UnstructuredLibraryImportError,
|
||||||
|
UnauthorizedDataAccessError,
|
||||||
)
|
)
|
||||||
|
|
@ -7,5 +7,14 @@ class UnstructuredLibraryImportError(CogneeApiError):
|
||||||
message: str = "Import error. Unstructured library is not installed.",
|
message: str = "Import error. Unstructured library is not installed.",
|
||||||
name: str = "UnstructuredModuleImportError",
|
name: str = "UnstructuredModuleImportError",
|
||||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
class UnauthorizedDataAccessError(CogneeApiError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "User does not have permission to access this data.",
|
||||||
|
name: str = "UnauthorizedDataAccessError",
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from ..exceptions import UnauthorizedDataAccessError
|
||||||
from ..models import Data
|
from ..models import Data
|
||||||
|
|
||||||
async def get_data(data_id: UUID) -> Optional[Data]:
|
async def get_data(user_id: UUID, data_id: UUID) -> Optional[Data]:
|
||||||
"""Retrieve data by ID.
|
"""Retrieve data by ID.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
user_id (UUID): user ID
|
||||||
data_id (UUID): ID of the data to retrieve
|
data_id (UUID): ID of the data to retrieve
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -17,4 +19,7 @@ async def get_data(data_id: UUID) -> Optional[Data]:
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
data = await session.get(Data, data_id)
|
data = await session.get(Data, data_id)
|
||||||
|
|
||||||
|
if data and data.owner_id != user_id:
|
||||||
|
raise UnauthorizedDataAccessError(message=f"User {user_id} is not authorized to access data {data_id}")
|
||||||
|
|
||||||
return data
|
return data
|
||||||
11
cognee/modules/users/models/GroupPermission.py
Normal file
11
cognee/modules/users/models/GroupPermission.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from sqlalchemy import Column, ForeignKey, DateTime, UUID
|
||||||
|
from cognee.infrastructure.databases.relational import Base
|
||||||
|
|
||||||
|
class GroupPermission(Base):
|
||||||
|
__tablename__ = "group_permissions"
|
||||||
|
|
||||||
|
created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
|
||||||
|
|
||||||
|
group_id = Column(UUID, ForeignKey("groups.id"), primary_key = True)
|
||||||
|
permission_id = Column(UUID, ForeignKey("permissions.id"), primary_key = True)
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
from .User import User
|
from .User import User
|
||||||
from .Group import Group
|
from .Group import Group
|
||||||
|
from .UserGroup import UserGroup
|
||||||
|
from .GroupPermission import GroupPermission
|
||||||
from .Resource import Resource
|
from .Resource import Resource
|
||||||
from .Permission import Permission
|
from .Permission import Permission
|
||||||
from .ACL import ACL
|
from .ACL import ACL
|
||||||
|
|
|
||||||
1
cognee/tasks/completion/__init__.py
Normal file
1
cognee/tasks/completion/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .query_completion import query_completion
|
||||||
9
cognee/tasks/completion/exceptions/__init__.py
Normal file
9
cognee/tasks/completion/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various compute errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import (
|
||||||
|
NoRelevantDataFound,
|
||||||
|
)
|
||||||
11
cognee/tasks/completion/exceptions/exceptions.py
Normal file
11
cognee/tasks/completion/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
from cognee.exceptions import CogneeApiError
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
class NoRelevantDataFound(CogneeApiError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Search did not find any data.",
|
||||||
|
name: str = "NoRelevantDataFound",
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
36
cognee/tasks/completion/query_completion.py
Normal file
36
cognee/tasks/completion/query_completion.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
from cognee.tasks.completion.exceptions import NoRelevantDataFound
|
||||||
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
|
||||||
|
|
||||||
|
|
||||||
|
async def query_completion(query: str) -> list:
|
||||||
|
"""
|
||||||
|
Parameters:
|
||||||
|
- query (str): The query string to compute.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- list: Answer to the query.
|
||||||
|
"""
|
||||||
|
vector_engine = get_vector_engine()
|
||||||
|
|
||||||
|
found_chunks = await vector_engine.search("document_chunk_text", query, limit = 1)
|
||||||
|
|
||||||
|
if len(found_chunks) == 0:
|
||||||
|
raise NoRelevantDataFound
|
||||||
|
|
||||||
|
args = {
|
||||||
|
"question": query,
|
||||||
|
"context": found_chunks[0].payload["text"],
|
||||||
|
}
|
||||||
|
user_prompt = render_prompt("context_for_question.txt", args)
|
||||||
|
system_prompt = read_query_prompt("answer_simple_question.txt")
|
||||||
|
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
computed_answer = await llm_client.acreate_structured_output(
|
||||||
|
text_input=user_prompt,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
response_model=str,
|
||||||
|
)
|
||||||
|
|
||||||
|
return [computed_answer]
|
||||||
|
|
@ -3,3 +3,4 @@ from .save_data_to_storage import save_data_to_storage
|
||||||
from .save_data_item_to_storage import save_data_item_to_storage
|
from .save_data_item_to_storage import save_data_item_to_storage
|
||||||
from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage
|
from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage
|
||||||
from .ingest_data_with_metadata import ingest_data_with_metadata
|
from .ingest_data_with_metadata import ingest_data_with_metadata
|
||||||
|
from .resolve_data_directories import resolve_data_directories
|
||||||
|
|
|
||||||
37
cognee/tasks/ingestion/resolve_data_directories.py
Normal file
37
cognee/tasks/ingestion/resolve_data_directories.py
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
import os
|
||||||
|
from typing import List, Union, BinaryIO
|
||||||
|
|
||||||
|
async def resolve_data_directories(data: Union[BinaryIO, List[BinaryIO], str, List[str]], include_subdirectories: bool = True):
|
||||||
|
"""
|
||||||
|
Resolves directories by replacing them with their contained files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: A single file, directory, or binary stream, or a list of such items.
|
||||||
|
include_subdirectories: Whether to include files in subdirectories recursively.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of resolved files and binary streams.
|
||||||
|
"""
|
||||||
|
# Ensure `data` is a list
|
||||||
|
if not isinstance(data, list):
|
||||||
|
data = [data]
|
||||||
|
|
||||||
|
resolved_data = []
|
||||||
|
|
||||||
|
for item in data:
|
||||||
|
if isinstance(item, str): # Check if the item is a path
|
||||||
|
if os.path.isdir(item): # If it's a directory
|
||||||
|
if include_subdirectories:
|
||||||
|
# Recursively add all files in the directory and subdirectories
|
||||||
|
for root, _, files in os.walk(item):
|
||||||
|
resolved_data.extend([os.path.join(root, f) for f in files])
|
||||||
|
else:
|
||||||
|
# Add all files (not subdirectories) in the directory
|
||||||
|
resolved_data.extend(
|
||||||
|
[os.path.join(item, f) for f in os.listdir(item) if os.path.isfile(os.path.join(item, f))]
|
||||||
|
)
|
||||||
|
else: # If it's a file or text add it directly
|
||||||
|
resolved_data.append(item)
|
||||||
|
else: # If it's not a string add it directly
|
||||||
|
resolved_data.append(item)
|
||||||
|
return resolved_data
|
||||||
Loading…
Add table
Reference in a new issue