diff --git a/.pylintrc b/.pylintrc index 5dd663ba8..aba6f148c 100644 --- a/.pylintrc +++ b/.pylintrc @@ -433,8 +433,9 @@ disable=raw-checker-failed, use-implicit-booleaness-not-comparison-to-zero, missing-module-docstring, missing-function-docstring, - missing-class-docstring - + missing-class-docstring, + relative-beyond-top-level + # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/cognee-frontend/src/modules/datasets/cognifyDataset.ts b/cognee-frontend/src/modules/datasets/cognifyDataset.ts index 424abd64f..ae7a8e827 100644 --- a/cognee-frontend/src/modules/datasets/cognifyDataset.ts +++ b/cognee-frontend/src/modules/datasets/cognifyDataset.ts @@ -1,5 +1,5 @@ export default function cognifyDataset(dataset: { id: string }) { - return fetch('http://0.0.0.0:8000/cognify', { + return fetch('http://127.0.0.1:8000/cognify', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/cognee-frontend/src/modules/datasets/deleteDataset.ts b/cognee-frontend/src/modules/datasets/deleteDataset.ts index dfc686536..d449942e6 100644 --- a/cognee-frontend/src/modules/datasets/deleteDataset.ts +++ b/cognee-frontend/src/modules/datasets/deleteDataset.ts @@ -1,5 +1,5 @@ export default function deleteDataset(dataset: { id: string }) { - return fetch(`http://0.0.0.0:8000/datasets/${dataset.id}`, { + return fetch(`http://127.0.0.1:8000/datasets/${dataset.id}`, { method: 'DELETE', }) } diff --git a/cognee-frontend/src/modules/datasets/getDatasetData.ts b/cognee-frontend/src/modules/datasets/getDatasetData.ts index aa5138383..c2d480c54 100644 --- a/cognee-frontend/src/modules/datasets/getDatasetData.ts +++ b/cognee-frontend/src/modules/datasets/getDatasetData.ts @@ -1,4 +1,4 @@ export default function getDatasetData(dataset: { id: string }) { - return fetch(`http://0.0.0.0:8000/datasets/${dataset.id}/data`) + return fetch(`http://127.0.0.1:8000/datasets/${dataset.id}/data`) .then((response) => response.json()); } diff --git a/cognee-frontend/src/modules/exploration/getExplorationGraphUrl.ts b/cognee-frontend/src/modules/exploration/getExplorationGraphUrl.ts index aa83b1520..6af086d57 100644 --- a/cognee-frontend/src/modules/exploration/getExplorationGraphUrl.ts +++ b/cognee-frontend/src/modules/exploration/getExplorationGraphUrl.ts @@ -1,5 +1,5 @@ export default function getExplorationGraphUrl(dataset: { id: string }) { - return fetch(`http://0.0.0.0:8000/datasets/${dataset.id}/graph`) + return fetch(`http://127.0.0.1:8000/datasets/${dataset.id}/graph`) .then(async (response) => { if (response.status !== 200) { throw new Error((await response.text()).replaceAll("\"", "")); diff --git a/cognee-frontend/src/modules/ingestion/DataView/DataView.tsx b/cognee-frontend/src/modules/ingestion/DataView/DataView.tsx index d9e66639e..288241e43 100644 --- a/cognee-frontend/src/modules/ingestion/DataView/DataView.tsx +++ b/cognee-frontend/src/modules/ingestion/DataView/DataView.tsx @@ -36,7 +36,7 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi const showRawData = useCallback((dataItem: Data) => { setSelectedData(dataItem); - fetch(`http://0.0.0.0:8000/datasets/${datasetId}/data/${dataItem.id}/raw`) + fetch(`http://127.0.0.1:8000/datasets/${datasetId}/data/${dataItem.id}/raw`) .then((response) => response.arrayBuffer()) .then(setRawData); diff --git a/cognee-frontend/src/modules/ingestion/addData.ts b/cognee-frontend/src/modules/ingestion/addData.ts index 3087f5e7d..b89160755 100644 --- a/cognee-frontend/src/modules/ingestion/addData.ts +++ b/cognee-frontend/src/modules/ingestion/addData.ts @@ -5,7 +5,7 @@ export default function addData(dataset: { id: string }, files: File[]) { }) formData.append('datasetId', dataset.id); - return fetch('http://0.0.0.0:8000/add', { + return fetch('http://127.0.0.1:8000/add', { method: 'POST', body: formData, }).then((response) => response.json()); diff --git a/cognee-frontend/src/modules/ingestion/useDatasets.ts b/cognee-frontend/src/modules/ingestion/useDatasets.ts index c76f51401..402b25a07 100644 --- a/cognee-frontend/src/modules/ingestion/useDatasets.ts +++ b/cognee-frontend/src/modules/ingestion/useDatasets.ts @@ -14,7 +14,7 @@ function useDatasets() { const statusTimeout = useRef(null); const fetchDatasetStatuses = useCallback((datasets: Dataset[]) => { - fetch(`http://0.0.0.0:8000/datasets/status?dataset=${datasets.map(d => d.id).join('&dataset=')}`) + fetch(`http://127.0.0.1:8000/datasets/status?dataset=${datasets.map(d => d.id).join('&dataset=')}`) .then((response) => response.json()) .then((statuses) => setDatasets( (datasets) => ( @@ -65,7 +65,7 @@ function useDatasets() { }, []); const fetchDatasets = useCallback(() => { - fetch('http://0.0.0.0:8000/datasets') + fetch('http://127.0.0.1:8000/datasets') .then((response) => response.json()) .then((datasets) => datasets.map((dataset: string) => ({ id: dataset, name: dataset }))) .then((datasets) => { diff --git a/cognee-frontend/src/ui/Partials/SettingsModal/Settings.tsx b/cognee-frontend/src/ui/Partials/SettingsModal/Settings.tsx index 134f2164c..05b7b30a5 100644 --- a/cognee-frontend/src/ui/Partials/SettingsModal/Settings.tsx +++ b/cognee-frontend/src/ui/Partials/SettingsModal/Settings.tsx @@ -75,7 +75,7 @@ export default function Settings({ onDone = () => {}, submitButtonText = 'Save' startSaving(); - fetch('http://0.0.0.0:8000/settings', { + fetch('http://127.0.0.1:8000/settings', { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -138,7 +138,7 @@ export default function Settings({ onDone = () => {}, submitButtonText = 'Save' useEffect(() => { const fetchConfig = async () => { - const response = await fetch('http://0.0.0.0:8000/settings'); + const response = await fetch('http://127.0.0.1:8000/settings'); const settings = await response.json(); if (!settings.llm.model) { diff --git a/cognee/api/client.py b/cognee/api/client.py index b9d8cd27f..50252319e 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -3,7 +3,6 @@ import os import aiohttp import uvicorn import json -import asyncio import logging import sentry_sdk from typing import Dict, Any, List, Union, Optional, Literal @@ -13,7 +12,7 @@ from fastapi.responses import JSONResponse, FileResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from cognee.infrastructure.databases.relational.user_authentication.routers import permission_router +from cognee.infrastructure.databases.relational import create_db_and_tables # Set up logging logging.basicConfig( @@ -28,13 +27,16 @@ if os.getenv("ENV") == "prod": traces_sample_rate = 1.0, profiles_sample_rate = 1.0, ) + from contextlib import asynccontextmanager + @asynccontextmanager async def lifespan(app: FastAPI): # Not needed if you setup a migration system like Alembic await create_db_and_tables() yield -app = FastAPI(debug = os.getenv("ENV") != "prod", lifespan=lifespan) + +app = FastAPI(debug = os.getenv("ENV") != "prod", lifespan = lifespan) origins = [ "http://frontend:3000", @@ -50,70 +52,46 @@ app.add_middleware( allow_headers=["*"], ) -from contextlib import asynccontextmanager +from cognee.api.v1.users.routers import get_auth_router, get_register_router,\ + get_reset_password_router, get_verify_router, get_users_router -from fastapi import Depends, FastAPI - -from cognee.infrastructure.databases.relational.user_authentication.authentication_db import User, create_db_and_tables -from cognee.infrastructure.databases.relational.user_authentication.schemas import UserCreate, UserRead, UserUpdate -from cognee.infrastructure.databases.relational.user_authentication.users import auth_backend, current_active_user, fastapi_users +from cognee.api.v1.permissions.get_permissions_router import get_permissions_router app.include_router( - fastapi_users.get_auth_router(auth_backend), prefix="/auth/jwt", tags=["auth"] -) -app.include_router( - fastapi_users.get_register_router(UserRead, UserCreate), - prefix="/auth", - tags=["auth"], -) -app.include_router( - fastapi_users.get_reset_password_router(), - prefix="/auth", - tags=["auth"], -) -app.include_router( - fastapi_users.get_verify_router(UserRead), - prefix="/auth", - tags=["auth"], -) -app.include_router( - fastapi_users.get_users_router(UserRead, UserUpdate), - prefix="/users", - tags=["users"], + get_auth_router(), + prefix = "/auth/jwt", + tags = ["auth"] ) -app.include_router(permission_router, prefix="/manage", tags=["management"]) - -@asynccontextmanager -async def lifespan(app: FastAPI): - # Not needed if you setup a migration system like Alembic - await create_db_and_tables() - yield app.include_router( - fastapi_users.get_auth_router(auth_backend), prefix="/auth/jwt", tags=["auth"] -) -app.include_router( - fastapi_users.get_register_router(UserRead, UserCreate), - prefix="/auth", - tags=["auth"], -) -app.include_router( - fastapi_users.get_reset_password_router(), - prefix="/auth", - tags=["auth"], -) -app.include_router( - fastapi_users.get_verify_router(UserRead), - prefix="/auth", - tags=["auth"], -) -app.include_router( - fastapi_users.get_users_router(UserRead, UserUpdate), - prefix="/users", - tags=["users"], + get_register_router(), + prefix = "/auth", + tags = ["auth"], ) +app.include_router( + get_reset_password_router(), + prefix = "/auth", + tags = ["auth"], +) +app.include_router( + get_verify_router(), + prefix = "/auth", + tags = ["auth"], +) + +app.include_router( + get_users_router(), + prefix = "/users", + tags = ["users"], +) + +app.include_router( + get_permissions_router(), + prefix = "/permissions", + tags = ["permissions"], +) @app.get("/") async def root(): @@ -135,7 +113,7 @@ class Payload(BaseModel): @app.get("/datasets", response_model=list) async def get_datasets(): from cognee.api.v1.datasets.datasets import datasets - return datasets.list_datasets() + return await datasets.list_datasets() @app.delete("/datasets/{dataset_id}", response_model=dict) async def delete_dataset(dataset_id: str): @@ -294,8 +272,8 @@ async def search(payload: SearchPayload): @app.get("/settings", response_model=dict) async def get_settings(): - from cognee.modules.settings import get_settings - return get_settings() + from cognee.modules.settings import get_settings as get_cognee_settings + return get_cognee_settings() class LLMConfig(BaseModel): provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]] @@ -334,13 +312,10 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000): try: logger.info("Starting server at %s:%s", host, port) - from cognee.infrastructure.databases.relational import get_relationaldb_config - relational_config = get_relationaldb_config() - relational_config.create_engine() - + import asyncio from cognee.modules.data.deletion import prune_system, prune_data - # asyncio.run(prune_data()) - # asyncio.run(prune_system(metadata = True)) + asyncio.run(prune_data()) + asyncio.run(prune_system(metadata = True)) uvicorn.run(app, host = host, port = port) except Exception as e: diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index cd488c708..8a4d90352 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -3,18 +3,20 @@ from os import path import asyncio import dlt import duckdb -from fastapi_users import fastapi_users import cognee.modules.ingestion as ingestion -from cognee.infrastructure.databases.relational.user_authentication.users import give_permission_document, \ - get_async_session_context, current_active_user, create_default_user from cognee.infrastructure.files.storage import LocalStorage from cognee.modules.ingestion import get_matched_datasets, save_data_to_file from cognee.shared.utils import send_telemetry from cognee.base_config import get_base_config -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_config, create_db_and_tables +from cognee.modules.users.methods import create_default_user +from cognee.modules.users.permissions.methods import give_permission_on_document +from cognee.modules.users.models import User + +async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = "main_dataset", user: User = None): + await create_db_and_tables() -async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = "main_dataset"): if isinstance(data, str): if "data://" in data: # data is a data directory path @@ -48,11 +50,11 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam file_paths.append(save_data_to_file(data_item, dataset_name)) if len(file_paths) > 0: - return await add_files(file_paths, dataset_name) + return await add_files(file_paths, dataset_name, user) return [] -async def add_files(file_paths: List[str], dataset_name: str, user_id: str = "default_user"): +async def add_files(file_paths: List[str], dataset_name: str, user): base_config = get_base_config() data_directory_path = base_config.data_root_directory @@ -73,12 +75,24 @@ async def add_files(file_paths: List[str], dataset_name: str, user_id: str = "d else: processed_file_paths.append(file_path) - relational_config = get_relationaldb_config() - db = duckdb.connect(relational_config.db_file_path) + relational_config = get_relational_config() - destination = dlt.destinations.duckdb( - credentials = db, - ) + if relational_config.db_provider == "duckdb": + db = duckdb.connect(relational_config.db_file_path) + + destination = dlt.destinations.duckdb( + credentials = db, + ) + else: + destination = dlt.destinations.postgres( + credentials = { + "host": relational_config.db_host, + "port": relational_config.db_port, + "user": relational_config.db_user, + "password": relational_config.db_password, + "database": relational_config.db_name, + }, + ) pipeline = dlt.pipeline( pipeline_name = "file_load_from_filesystem", @@ -86,17 +100,18 @@ async def add_files(file_paths: List[str], dataset_name: str, user_id: str = "d ) @dlt.resource(standalone = True, merge_key = "id") - async def data_resources(file_paths: str, user_id: str = user_id): + async def data_resources(file_paths: str, user: User): for file_path in file_paths: with open(file_path.replace("file://", ""), mode = "rb") as file: classified_data = ingestion.classify(file) data_id = ingestion.identify(classified_data) - async with get_async_session_context() as session: - if user_id is None: - current_active_user = await create_default_user() - await give_permission_document(current_active_user, data_id, "write", session= session) + if user is None: + user = await create_default_user() + + await give_permission_on_document(user, data_id, "read") + await give_permission_on_document(user, data_id, "write") file_metadata = classified_data.get_metadata() @@ -109,7 +124,7 @@ async def add_files(file_paths: List[str], dataset_name: str, user_id: str = "d } run_info = pipeline.run( - data_resources(processed_file_paths), + data_resources(processed_file_paths, user), table_name = "file_metadata", dataset_name = dataset_name.replace(" ", "_").replace(".", "_") if dataset_name is not None else "main_dataset", write_disposition = "merge", diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 632d52c04..77f19f1ec 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -32,7 +32,7 @@ from cognee.shared.SourceCodeGraph import SourceCodeGraph from cognee.modules.tasks import get_task_status from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.modules.cognify.config import get_cognify_config -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine USER_ID = "default_user" @@ -49,11 +49,10 @@ async def cognify(datasets: Union[str, List[str]] = None): graph_client = await get_graph_engine() - relational_config = get_relationaldb_config() - db_engine = relational_config.database_engine + db_engine = get_relational_engine() if datasets is None or len(datasets) == 0: - datasets = db_engine.get_datasets() + datasets = await db_engine.get_datasets() awaitables = [] @@ -83,7 +82,7 @@ async def cognify(datasets: Union[str, List[str]] = None): graphs = await asyncio.gather(*awaitables) return graphs[0] - added_datasets = db_engine.get_datasets() + added_datasets = await db_engine.get_datasets() # datasets is a dataset name string dataset_files = [] @@ -167,7 +166,7 @@ async def cognify(datasets: Union[str, List[str]] = None): else: document_id = await add_document_node( graph_client, - parent_node_id = file_metadata['id'], + parent_node_id = file_metadata["id"], document_metadata = file_metadata, ) @@ -226,7 +225,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi if cognify_config.connect_documents is True: - db_engine = get_relationaldb_config().database_engine + db_engine = get_relational_engine() relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id) list_of_nodes = [] diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 589c5331d..6b2502c46 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -1,13 +1,10 @@ import asyncio import logging -import uuid from typing import Union from cognee.infrastructure.databases.graph import get_graph_config -from cognee.infrastructure.databases.relational.user_authentication.users import has_permission_document, \ - get_async_session_context, fast_api_users_init from cognee.modules.cognify.config import get_cognify_config -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.data.processing.document_types.AudioDocument import AudioDocument from cognee.modules.data.processing.document_types.ImageDocument import ImageDocument from cognee.shared.data_models import KnowledgeGraph @@ -23,6 +20,9 @@ from cognee.modules.data.extraction.knowledge_graph.establish_graph_topology imp from cognee.modules.pipelines.tasks.Task import Task from cognee.modules.pipelines import run_tasks, run_tasks_parallel from cognee.modules.tasks import create_task_status_table, update_task_status, get_task_status +from cognee.modules.users.models import User +from cognee.modules.users.methods import get_default_user +from cognee.modules.users.permissions.methods import check_permissions_on_documents logger = logging.getLogger("cognify.v2") @@ -33,113 +33,98 @@ class PermissionDeniedException(Exception): self.message = message super().__init__(self.message) -async def cognify(datasets: Union[str, list[str]] = None, root_node_id: str = None): - - relational_config = get_relationaldb_config() - db_engine = relational_config.database_engine +async def cognify(datasets: Union[str, list[str]] = None, user: User = None): + db_engine = get_relational_engine() create_task_status_table() if datasets is None or len(datasets) == 0: - return await cognify(db_engine.get_datasets()) + return await cognify(await db_engine.get_datasets()) + if user is None: + user = await get_default_user() async def run_cognify_pipeline(dataset_name: str, files: list[dict]): + documents = [ + PdfDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) if file["extension"] == "pdf" else + AudioDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) if file["extension"] == "audio" else + ImageDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) if file["extension"] == "image" else + TextDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) + for file in files + ] - for file in files: - file["id"] = str(uuid.uuid4()) - file["name"] = file["name"].replace(" ", "_") + await check_permissions_on_documents(user, "read", [document.id for document in documents]) - async with get_async_session_context() as session: - active_user = await fast_api_users_init() + async with update_status_lock: + task_status = get_task_status([dataset_name]) - out = await has_permission_document(active_user.current_user(active=True), file["id"], "write", session) + if dataset_name in task_status and task_status[dataset_name] == "DATASET_PROCESSING_STARTED": + logger.info(f"Dataset {dataset_name} is being processed.") + return - if out: - async with update_status_lock: - task_status = get_task_status([dataset_name]) + update_task_status(dataset_name, "DATASET_PROCESSING_STARTED") + try: + cognee_config = get_cognify_config() + graph_config = get_graph_config() + root_node_id = None - if dataset_name in task_status and task_status[dataset_name] == "DATASET_PROCESSING_STARTED": - logger.info(f"Dataset {dataset_name} is being processed.") - return + if graph_config.infer_graph_topology and graph_config.graph_topology_task: + from cognee.modules.topology.topology import TopologyEngine + topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) + root_node_id = await topology_engine.add_graph_topology(files = files) + elif graph_config.infer_graph_topology and not graph_config.infer_graph_topology: + from cognee.modules.topology.topology import TopologyEngine + topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) + await topology_engine.add_graph_topology(graph_config.topology_file_path) + elif not graph_config.graph_topology_task: + root_node_id = "ROOT" - update_task_status(dataset_name, "DATASET_PROCESSING_STARTED") - try: - cognee_config = get_cognify_config() - graph_config = get_graph_config() - root_node_id = None + tasks = [ + Task(process_documents, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type + Task(establish_graph_topology, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data + Task(expand_knowledge_graph, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes + Task(filter_affected_chunks, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks + Task( + save_data_chunks, + collection_name = "chunks", + ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other) + run_tasks_parallel([ + Task( + summarize_text_chunks, + summarization_model = cognee_config.summarization_model, + collection_name = "chunk_summaries", + ), # Summarize the document chunks + Task( + classify_text_chunks, + classification_model = cognee_config.classification_model, + ), + ]), + Task(remove_obsolete_chunks), # Remove the obsolete document chunks. + ] - if graph_config.infer_graph_topology and graph_config.graph_topology_task: - from cognee.modules.topology.topology import TopologyEngine - topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - root_node_id = await topology_engine.add_graph_topology(files = files) - elif graph_config.infer_graph_topology and not graph_config.infer_graph_topology: - from cognee.modules.topology.topology import TopologyEngine - topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - await topology_engine.add_graph_topology(graph_config.topology_file_path) - elif not graph_config.graph_topology_task: - root_node_id = "ROOT" + pipeline = run_tasks(tasks, documents) - tasks = [ - Task(process_documents, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type - Task(establish_graph_topology, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data - Task(expand_knowledge_graph, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes - Task(filter_affected_chunks, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks - Task( - save_data_chunks, - collection_name = "chunks", - ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other) - run_tasks_parallel([ - Task( - summarize_text_chunks, - summarization_model = cognee_config.summarization_model, - collection_name = "chunk_summaries", - ), # Summarize the document chunks - Task( - classify_text_chunks, - classification_model = cognee_config.classification_model, - ), - ]), - Task(remove_obsolete_chunks), # Remove the obsolete document chunks. - ] + async for result in pipeline: + print(result) - pipeline = run_tasks(tasks, [ - PdfDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) if file["extension"] == "pdf" else - AudioDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) if file["extension"] == "audio" else - ImageDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) if file["extension"] == "image" else - TextDocument(title=f"{file['name']}.{file['extension']}", file_path=file["file_path"]) - for file in files - ]) - - async for result in pipeline: - print(result) - - update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") - except Exception as error: - update_task_status(dataset_name, "DATASET_PROCESSING_ERROR") - raise error + update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") + except Exception as error: + update_task_status(dataset_name, "DATASET_PROCESSING_ERROR") + raise error - existing_datasets = db_engine.get_datasets() + existing_datasets = await db_engine.get_datasets() + awaitables = [] - awaitables = [] + for dataset in datasets: + dataset_name = generate_dataset_name(dataset) + if dataset_name in existing_datasets: + awaitables.append(run_cognify_pipeline(dataset, db_engine.get_files_metadata(dataset_name))) - # dataset_files = [] - # dataset_name = datasets.replace(".", "_").replace(" ", "_") - - # for added_dataset in existing_datasets: - # if dataset_name in added_dataset: - # dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset))) - - for dataset in datasets: - if dataset in existing_datasets: - # for file_metadata in files: - # if root_node_id is None: - # root_node_id=file_metadata['id'] - awaitables.append(run_cognify_pipeline(dataset, db_engine.get_files_metadata(dataset))) - - return await asyncio.gather(*awaitables) + return await asyncio.gather(*awaitables) +def generate_dataset_name(dataset_name: str) -> str: + return dataset_name.replace(".", "_").replace(" ", "_") # # if __name__ == "__main__": diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index b284c622f..2a9eb63d0 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -5,16 +5,17 @@ from cognee.modules.cognify.config import get_cognify_config from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.infrastructure.databases.vector import get_vectordb_config from cognee.infrastructure.databases.graph.config import get_graph_config -from cognee.infrastructure.databases.relational import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_config +from cognee.infrastructure.files.storage import LocalStorage class config(): @staticmethod def system_root_directory(system_root_directory: str): databases_directory_path = os.path.join(system_root_directory, "databases") - relational_config = get_relationaldb_config() + relational_config = get_relational_config() relational_config.db_path = databases_directory_path - relational_config.create_engine() + LocalStorage.ensure_directory_exists(databases_directory_path) graph_config = get_graph_config() graph_config.graph_file_path = os.path.join(databases_directory_path, "cognee.graph") diff --git a/cognee/api/v1/create_user/create_user.py b/cognee/api/v1/create_user/create_user.py deleted file mode 100644 index 3901aa593..000000000 --- a/cognee/api/v1/create_user/create_user.py +++ /dev/null @@ -1,22 +0,0 @@ -from cognee.infrastructure.databases.relational.user_authentication.users import create_user_method - - - -async def create_user(email: str, password: str, is_superuser: bool = False): - output = await create_user_method(email=email, password=password, is_superuser=is_superuser) - return output - - -if __name__ == "__main__": - import asyncio - # Define an example user - example_email = "example@example.com" - example_password = "securepassword123" - example_is_superuser = False - - # Create an event loop and run the create_user function - loop = asyncio.get_event_loop() - result = loop.run_until_complete(create_user(example_email, example_password, example_is_superuser)) - - # Print the result - print(result) \ No newline at end of file diff --git a/cognee/api/v1/datasets/datasets.py b/cognee/api/v1/datasets/datasets.py index 92ae14ce9..d46fd2413 100644 --- a/cognee/api/v1/datasets/datasets.py +++ b/cognee/api/v1/datasets/datasets.py @@ -1,14 +1,13 @@ from duckdb import CatalogException from cognee.modules.ingestion import discover_directory_datasets from cognee.modules.tasks import get_task_status -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine class datasets(): @staticmethod - def list_datasets(): - relational_config = get_relationaldb_config() - db = relational_config.database_engine - return db.get_datasets() + async def list_datasets(): + db = get_relational_engine() + return await db.get_datasets() @staticmethod def discover_datasets(directory_path: str): @@ -16,8 +15,7 @@ class datasets(): @staticmethod def list_data(dataset_name: str): - relational_config = get_relationaldb_config() - db = relational_config.database_engine + db = get_relational_engine() try: return db.get_files_metadata(dataset_name) except CatalogException: @@ -32,8 +30,7 @@ class datasets(): @staticmethod def delete_dataset(dataset_id: str): - relational_config = get_relationaldb_config() - db = relational_config.database_engine + db = get_relational_engine() try: return db.delete_table(dataset_id) except CatalogException: diff --git a/cognee/api/v1/permissions/get_permissions_router.py b/cognee/api/v1/permissions/get_permissions_router.py new file mode 100644 index 000000000..ab20fb1a2 --- /dev/null +++ b/cognee/api/v1/permissions/get_permissions_router.py @@ -0,0 +1,43 @@ +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import JSONResponse +from sqlalchemy.orm import Session +from cognee.modules.users import get_user_db +from cognee.modules.users.models import User, Group, Permission + +def get_permissions_router() -> APIRouter: + permissions_router = APIRouter() + + @permissions_router.post("/groups/{group_id}/permissions") + async def give_permission_to_group(group_id: int, permission: str, db: Session = Depends(get_user_db)): + group = db.query(Group).filter(Group.id == group_id).first() + + if not group: + raise HTTPException(status_code = 404, detail = "Group not found") + + permission = db.query(Permission).filter(Permission.name == permission).first() + + if not permission: + permission = Permission(name = permission) + db.add(permission) + + group.permissions.append(permission) + + db.commit() + + return JSONResponse(status_code = 200, content = {"message": "Permission assigned to group"}) + + @permissions_router.post("/users/{user_id}/groups") + async def add_user_to_group(user_id: int, group_id: int, db: Session = Depends(get_user_db)): + user = db.query(User).filter(User.id == user_id).first() + group = db.query(Group).filter(Group.id == group_id).first() + + if not user or not group: + raise HTTPException(status_code = 404, detail = "User or group not found") + + user.groups.append(group) + + db.commit() + + return JSONResponse(status_code = 200, content = {"message": "User added to group"}) + + return permissions_router diff --git a/cognee/api/v1/reset_user_password/__init__.py b/cognee/api/v1/reset_user_password/__init__.py deleted file mode 100644 index aad3147da..000000000 --- a/cognee/api/v1/reset_user_password/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .create_user import create_user diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index b0c64f9ed..08c325576 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -4,14 +4,15 @@ from enum import Enum from typing import Dict, Any, Callable, List from pydantic import BaseModel, field_validator -from cognee.infrastructure.databases.relational.user_authentication.users import fast_api_users_init, \ - has_permission_document, get_async_session_context, get_document_ids_for_user from cognee.modules.search.graph import search_cypher from cognee.modules.search.graph.search_adjacent import search_adjacent from cognee.modules.search.vector.search_traverse import search_traverse from cognee.modules.search.graph.search_summary import search_summary from cognee.modules.search.graph.search_similarity import search_similarity from cognee.shared.utils import send_telemetry +from cognee.modules.users.permissions.methods import get_document_ids_for_user +from cognee.modules.users.methods import get_default_user +from cognee.modules.users.models import User class SearchType(Enum): ADJACENT = "ADJACENT" @@ -41,19 +42,19 @@ class SearchParameters(BaseModel): return value -async def search(search_type: str, params: Dict[str, Any]) -> List: - active_user = await fast_api_users_init() - async with get_async_session_context() as session: +async def search(search_type: str, params: Dict[str, Any], user: User = None) -> List: + if user is None: + user = await get_default_user() + + extract_documents = await get_document_ids_for_user(user.id) + search_params = SearchParameters(search_type = search_type, params = params) + searches = await specific_search([search_params]) - extract_documents = await get_document_ids_for_user(active_user.current_user(active=True), session=session) - search_params = SearchParameters(search_type = search_type, params = params) - searches = await specific_search([search_params]) - - filtered_searches =[] - for document in searches: - for document_id in extract_documents: - if document_id in document: - filtered_searches.append(document) + filtered_searches = [] + for document in searches: + for document_id in extract_documents: + if document_id in document: + filtered_searches.append(document) return filtered_searches diff --git a/cognee/api/v1/create_user/__init__.py b/cognee/api/v1/users/__init__.py similarity index 100% rename from cognee/api/v1/create_user/__init__.py rename to cognee/api/v1/users/__init__.py diff --git a/cognee/api/v1/users/create_user.py b/cognee/api/v1/users/create_user.py new file mode 100644 index 000000000..eba7a6e89 --- /dev/null +++ b/cognee/api/v1/users/create_user.py @@ -0,0 +1,12 @@ +from cognee.modules.users.methods import create_user as create_user_method + + +async def create_user(email: str, password: str, is_superuser: bool = False): + user = await create_user_method( + email = email, + password = password, + is_superuser = is_superuser, + is_verified = True, + ) + + return user diff --git a/cognee/api/v1/users/routers/__init__.py b/cognee/api/v1/users/routers/__init__.py new file mode 100644 index 000000000..482aac265 --- /dev/null +++ b/cognee/api/v1/users/routers/__init__.py @@ -0,0 +1,5 @@ +from .get_auth_router import get_auth_router +from .get_register_router import get_register_router +from .get_reset_password_router import get_reset_password_router +from .get_users_router import get_users_router +from .get_verify_router import get_verify_router diff --git a/cognee/api/v1/users/routers/get_auth_router.py b/cognee/api/v1/users/routers/get_auth_router.py new file mode 100644 index 000000000..8a65cde35 --- /dev/null +++ b/cognee/api/v1/users/routers/get_auth_router.py @@ -0,0 +1,6 @@ +from cognee.modules.users.get_fastapi_users import get_fastapi_users +from cognee.modules.users.authentication.get_auth_backend import get_auth_backend + +def get_auth_router(): + auth_backend = get_auth_backend() + return get_fastapi_users().get_auth_router(auth_backend) diff --git a/cognee/api/v1/users/routers/get_register_router.py b/cognee/api/v1/users/routers/get_register_router.py new file mode 100644 index 000000000..a1152c01c --- /dev/null +++ b/cognee/api/v1/users/routers/get_register_router.py @@ -0,0 +1,5 @@ +from cognee.modules.users.get_fastapi_users import get_fastapi_users +from cognee.modules.users.models.User import UserRead, UserCreate + +def get_register_router(): + return get_fastapi_users().get_register_router(UserRead, UserCreate) diff --git a/cognee/api/v1/users/routers/get_reset_password_router.py b/cognee/api/v1/users/routers/get_reset_password_router.py new file mode 100644 index 000000000..c058abe2a --- /dev/null +++ b/cognee/api/v1/users/routers/get_reset_password_router.py @@ -0,0 +1,4 @@ +from cognee.modules.users.get_fastapi_users import get_fastapi_users + +def get_reset_password_router(): + return get_fastapi_users().get_reset_password_router() diff --git a/cognee/api/v1/users/routers/get_users_router.py b/cognee/api/v1/users/routers/get_users_router.py new file mode 100644 index 000000000..b81be73b0 --- /dev/null +++ b/cognee/api/v1/users/routers/get_users_router.py @@ -0,0 +1,5 @@ +from cognee.modules.users.get_fastapi_users import get_fastapi_users +from cognee.modules.users.models.User import UserRead, UserUpdate + +def get_users_router(): + return get_fastapi_users().get_users_router(UserRead, UserUpdate) diff --git a/cognee/api/v1/users/routers/get_verify_router.py b/cognee/api/v1/users/routers/get_verify_router.py new file mode 100644 index 000000000..0c18b08c2 --- /dev/null +++ b/cognee/api/v1/users/routers/get_verify_router.py @@ -0,0 +1,5 @@ +from cognee.modules.users.get_fastapi_users import get_fastapi_users +from cognee.modules.users.models.User import UserRead + +def get_verify_router(): + return get_fastapi_users().get_verify_router(UserRead) diff --git a/cognee/api/v1/verify_user_token/__init__.py b/cognee/api/v1/verify_user_token/__init__.py deleted file mode 100644 index 45fda4dba..000000000 --- a/cognee/api/v1/verify_user_token/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .verify_user_token import verify_user_token diff --git a/cognee/api/v1/verify_user_token/verify_user_token.py b/cognee/api/v1/verify_user_token/verify_user_token.py deleted file mode 100644 index b5a0f2228..000000000 --- a/cognee/api/v1/verify_user_token/verify_user_token.py +++ /dev/null @@ -1,8 +0,0 @@ -from cognee.infrastructure.databases.relational.user_authentication.users import user_check_token - - - -async def verify_user_token(token: str): - - output = await user_check_token(token=token) - return output \ No newline at end of file diff --git a/cognee/infrastructure/data/models/DatasetData.py b/cognee/infrastructure/data/models/DatasetData.py index 9e586dcef..c84d890ae 100644 --- a/cognee/infrastructure/data/models/DatasetData.py +++ b/cognee/infrastructure/data/models/DatasetData.py @@ -1,19 +1,12 @@ from uuid import uuid4 from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, UUID, ForeignKey, PrimaryKeyConstraint, UniqueConstraint -from sqlalchemy.orm import relationship - +from sqlalchemy import Column, DateTime, UUID, ForeignKey from cognee.infrastructure.databases.relational import Base class DatasetData(Base): __tablename__ = "dataset_data" id = Column(UUID, primary_key=True, default=uuid4) - created_at = Column(DateTime, default=datetime.now(timezone.utc)) + dataset_id = Column(UUID, ForeignKey("dataset.id"), nullable=False) data_id = Column(UUID, ForeignKey("data.id"), nullable=False) - __table_args__ = ( - UniqueConstraint('dataset_id', 'data_id', name='uix_dataset_data'), - ) - - acls = relationship('ACL', back_populates='document') diff --git a/cognee/infrastructure/databases/relational/FakeAsyncSession.py b/cognee/infrastructure/databases/relational/FakeAsyncSession.py new file mode 100644 index 000000000..cd7e4b6a3 --- /dev/null +++ b/cognee/infrastructure/databases/relational/FakeAsyncSession.py @@ -0,0 +1,29 @@ +import inspect +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import Session + +class FakeAsyncSession: + def __init__(self, session: Session): + self.session = session + + def run_sync(self, *args, **kwargs): + return self.execute(*args, **kwargs) + + def __getattr__(self, name: str) -> Any: + """ + If the method being called is async in AsyncSession, create a fake async version + for Session so callers can `await` as usual. Think `commit`, `refresh`, + `delete`, etc. + """ + async_session_attr = getattr(AsyncSession, name, None) + session_attr = getattr(self.session, name) + + if not inspect.iscoroutinefunction(async_session_attr): + return session_attr + + async def async_wrapper(*args, **kwargs): + return session_attr(*args, **kwargs) + + return async_wrapper diff --git a/cognee/infrastructure/databases/relational/ModelBase.py b/cognee/infrastructure/databases/relational/ModelBase.py index 6e60c638f..f75ec448b 100644 --- a/cognee/infrastructure/databases/relational/ModelBase.py +++ b/cognee/infrastructure/databases/relational/ModelBase.py @@ -1,7 +1,4 @@ - from sqlalchemy.orm import DeclarativeBase - class Base(DeclarativeBase): pass - diff --git a/cognee/infrastructure/databases/relational/__init__.py b/cognee/infrastructure/databases/relational/__init__.py index 214559ccb..61ff4ef85 100644 --- a/cognee/infrastructure/databases/relational/__init__.py +++ b/cognee/infrastructure/databases/relational/__init__.py @@ -2,4 +2,6 @@ from .ModelBase import Base from .DatabaseEngine import DatabaseEngine from .sqlite.SqliteEngine import SqliteEngine from .duckdb.DuckDBAdapter import DuckDBAdapter -from .config import get_relationaldb_config +from .config import get_relational_config +from .create_db_and_tables import create_db_and_tables +from .get_relational_engine import get_relational_engine diff --git a/cognee/infrastructure/databases/relational/config.py b/cognee/infrastructure/databases/relational/config.py index 746cce2ae..e5e4854e2 100644 --- a/cognee/infrastructure/databases/relational/config.py +++ b/cognee/infrastructure/databases/relational/config.py @@ -2,27 +2,21 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.root_dir import get_absolute_path -from .create_relational_engine import create_relational_engine class RelationalConfig(BaseSettings): db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases") db_name: str = "cognee_db" - db_host: str = "localhost" + db_host: str = "127.0.0.1" db_port: str = "5432" db_user: str = "cognee" db_password: str = "cognee" db_provider: str = "postgresql+asyncpg" - # database_engine: object = create_relational_engine(db_path, db_name, db_provider) + # db_provider: str = "duckdb" db_file_path: str = os.path.join(db_path, db_name) model_config = SettingsConfigDict(env_file = ".env", extra = "allow") - def create_engine(self): - self.db_file_path = os.path.join(self.db_path, self.db_name) - self.database_engine = create_relational_engine(self.db_path, self.db_name, self.db_provider, self.db_host, self.db_port, self.db_user, self.db_password) - return self.database_engine - def to_dict(self) -> dict: return { "db_path": self.db_path, @@ -31,10 +25,9 @@ class RelationalConfig(BaseSettings): "db_port": self.db_port, "db_user": self.db_user, "db_password": self.db_password, - "db_engine": self.database_engine, "db_provider": self.db_provider, } @lru_cache -def get_relationaldb_config(): +def get_relational_config(): return RelationalConfig() diff --git a/cognee/infrastructure/databases/relational/create_db_and_tables.py b/cognee/infrastructure/databases/relational/create_db_and_tables.py new file mode 100644 index 000000000..d6cf22f87 --- /dev/null +++ b/cognee/infrastructure/databases/relational/create_db_and_tables.py @@ -0,0 +1,9 @@ +from .ModelBase import Base +from .get_relational_engine import get_relational_engine + +async def create_db_and_tables(): + relational_engine = get_relational_engine() + + async with relational_engine.engine.begin() as connection: + if len(Base.metadata.tables.keys()) > 0: + await connection.run_sync(Base.metadata.create_all) diff --git a/cognee/infrastructure/databases/relational/create_relational_engine.py b/cognee/infrastructure/databases/relational/create_relational_engine.py index 5bbfa8ee3..713614eab 100644 --- a/cognee/infrastructure/databases/relational/create_relational_engine.py +++ b/cognee/infrastructure/databases/relational/create_relational_engine.py @@ -1,40 +1,21 @@ -from enum import Enum - -from cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter -from cognee.infrastructure.files.storage import LocalStorage -from cognee.infrastructure.databases.relational import DuckDBAdapter +from .sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter -class DBProvider(Enum): - DUCKDB = "duckdb" - POSTGRES = "postgresql+asyncpg" - -def create_relational_engine(db_path: str, db_name: str, db_provider:str, db_host:str, db_port:str, db_user:str, db_password:str): - LocalStorage.ensure_directory_exists(db_path) - - provider = DBProvider(db_provider) - - if provider == DBProvider.DUCKDB: - # return DuckDBAdapter( - # db_name = db_name, - # db_path = db_path, - # ) - return SQLAlchemyAdapter( - db_name = db_name, - db_path = db_path, - db_type = db_provider, - db_host=db_host, - db_port=db_port, - db_user=db_user, - db_password=db_password - ) - elif provider == DBProvider.POSTGRES: - return SQLAlchemyAdapter( - db_name = db_name, - db_path = db_path, - db_type = db_provider, - db_host= db_host, - db_port= db_port, - db_user= db_user, - db_password= db_password - ) \ No newline at end of file +def create_relational_engine( + db_path: str, + db_name: str, + db_provider: str, + db_host: str, + db_port: str, + db_user: str, + db_password: str, +): + return SQLAlchemyAdapter( + db_name = db_name, + db_path = db_path, + db_type = db_provider, + db_host = db_host, + db_port = db_port, + db_user = db_user, + db_password = db_password + ) diff --git a/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py b/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py index 1a0fbde3e..2695e04cc 100644 --- a/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py +++ b/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py @@ -79,7 +79,6 @@ class DuckDBAdapter(): connection.execute(""" CREATE TABLE IF NOT EXISTS cognify ( document_id STRING, - layer_id STRING, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT NULL, processed BOOLEAN DEFAULT FALSE, @@ -89,8 +88,8 @@ class DuckDBAdapter(): # Prepare the insert statement insert_query = """ - INSERT INTO cognify (document_id, layer_id) - VALUES (?, ?); + INSERT INTO cognify (document_id) + VALUES (?); """ # Insert each record into the "cognify" table @@ -98,7 +97,6 @@ class DuckDBAdapter(): with self.get_connection() as connection: connection.execute(insert_query, [ record.get("document_id"), - record.get("layer_id") ]) def fetch_cognify_data(self, excluded_document_id: str): @@ -106,7 +104,6 @@ class DuckDBAdapter(): create_table_sql = """ CREATE TABLE IF NOT EXISTS cognify ( document_id STRING, - layer_id STRING, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT NULL, processed BOOLEAN DEFAULT FALSE, @@ -118,7 +115,7 @@ class DuckDBAdapter(): connection.execute(create_table_sql) # SQL command to select data from the "cognify" table - select_data_sql = f"SELECT document_id, layer_id, created_at, updated_at, processed FROM cognify WHERE document_id != '{excluded_document_id}' AND processed = FALSE;" + select_data_sql = f"SELECT document_id, created_at, updated_at, processed FROM cognify WHERE document_id != '{excluded_document_id}' AND processed = FALSE;" with self.get_connection() as connection: # Execute the query and fetch the results @@ -144,7 +141,6 @@ class DuckDBAdapter(): create_table_sql = """ CREATE TABLE IF NOT EXISTS cognify ( document_id STRING, - layer_id STRING, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT NULL, processed BOOLEAN DEFAULT FALSE, @@ -166,7 +162,8 @@ class DuckDBAdapter(): def delete_database(self): from cognee.infrastructure.files.storage import LocalStorage - LocalStorage.remove(self.db_location) + if LocalStorage.file_exists(self.db_location): + LocalStorage.remove(self.db_location) if LocalStorage.file_exists(self.db_location + ".wal"): LocalStorage.remove(self.db_location + ".wal") diff --git a/cognee/infrastructure/databases/relational/get_relational_engine.py b/cognee/infrastructure/databases/relational/get_relational_engine.py new file mode 100644 index 000000000..6024c7bd0 --- /dev/null +++ b/cognee/infrastructure/databases/relational/get_relational_engine.py @@ -0,0 +1,8 @@ +from .config import get_relational_config +from .create_relational_engine import create_relational_engine + + +def get_relational_engine(): + relational_config = get_relational_config() + + return create_relational_engine(**relational_config.to_dict()) diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index dbdc44c40..e518dda38 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -1,22 +1,37 @@ import os +import asyncio +from typing import AsyncGenerator +from contextlib import asynccontextmanager from sqlalchemy import create_engine, MetaData, Table, Column, String, Boolean, TIMESTAMP, text from sqlalchemy.orm import sessionmaker -from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker +from cognee.infrastructure.files.storage import LocalStorage +from cognee.infrastructure.databases.relational.FakeAsyncSession import FakeAsyncSession + +def make_async_sessionmaker(sessionmaker): + @asynccontextmanager + async def async_session_maker(): + await asyncio.sleep(0.1) + yield FakeAsyncSession(sessionmaker()) + + return async_session_maker class SQLAlchemyAdapter(): - def __init__(self, db_type: str, db_path: str, db_name: str, db_user:str, db_password:str, db_host:str, db_port:str): + def __init__(self, db_type: str, db_path: str, db_name: str, db_user: str, db_password: str, db_host: str, db_port: str): self.db_location = os.path.abspath(os.path.join(db_path, db_name)) - # self.engine = create_engine(f"{db_type}:///{self.db_location}") - if db_type == "duckdb": - self.engine = create_engine(f"duckdb:///{self.db_location}") - self.sessionmaker = sessionmaker(bind=self.engine) + if db_type == "duckdb": + LocalStorage.ensure_directory_exists(db_path) + + self.engine = create_engine(f"duckdb:///{self.db_location}") + self.sessionmaker = make_async_sessionmaker(sessionmaker(bind = self.engine)) else: self.engine = create_async_engine(f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}") - self.sessionmaker = sessionmaker(bind=self.engine, class_=AsyncSession, expire_on_commit=False) + self.sessionmaker = async_sessionmaker(bind = self.engine, expire_on_commit = False) - async def get_async_session(self): + @asynccontextmanager + async def get_async_session(self) -> AsyncGenerator[AsyncSession, None]: async_session_maker = self.sessionmaker async with async_session_maker() as session: yield session @@ -26,10 +41,10 @@ class SQLAlchemyAdapter(): with session_maker() as session: yield session - def get_datasets(self): - with self.engine.connect() as connection: - result = connection.execute(text("SELECT DISTINCT schema_name FROM information_schema.tables;")) - tables = [row['schema_name'] for row in result] + async def get_datasets(self): + async with self.engine.connect() as connection: + result = await connection.execute(text("SELECT DISTINCT schema_name FROM information_schema.tables;")) + tables = [row["schema_name"] for row in result] return list( filter( lambda schema_name: not schema_name.endswith("staging") and schema_name != "cognee", @@ -83,17 +98,20 @@ class SQLAlchemyAdapter(): def load_cognify_data(self, data): metadata = MetaData() + cognify_table = Table( - 'cognify', metadata, - Column('document_id', String), - Column('layer_id', String), - Column('created_at', TIMESTAMP, server_default=text('CURRENT_TIMESTAMP')), - Column('updated_at', TIMESTAMP, nullable=True, default=None), - Column('processed', Boolean, default=False), - Column('document_id_target', String, nullable=True) + "cognify", + metadata, + Column("document_id", String), + Column("created_at", TIMESTAMP, server_default=text("CURRENT_TIMESTAMP")), + Column("updated_at", TIMESTAMP, nullable=True, default=None), + Column("processed", Boolean, default=False), + Column("document_id_target", String, nullable=True) ) + metadata.create_all(self.engine) - insert_query = cognify_table.insert().values(document_id=text(':document_id'), layer_id=text(':layer_id')) + + insert_query = cognify_table.insert().values(document_id=text(":document_id")) with self.engine.connect() as connection: connection.execute(insert_query, data) @@ -102,23 +120,23 @@ class SQLAlchemyAdapter(): connection.execute(text(""" CREATE TABLE IF NOT EXISTS cognify ( document_id STRING, - layer_id STRING, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT NULL, processed BOOLEAN DEFAULT FALSE, document_id_target STRING NULL ); """)) - query = text(f""" - SELECT document_id, layer_id, created_at, updated_at, processed + query = text(""" + SELECT document_id, created_at, updated_at, processed FROM cognify WHERE document_id != :excluded_document_id AND processed = FALSE; """) - records = connection.execute(query, {'excluded_document_id': excluded_document_id}).fetchall() + records = connection.execute(query, {"excluded_document_id": excluded_document_id}).fetchall() + if records: - document_ids = tuple(record['document_id'] for record in records) - update_query = text(f"UPDATE cognify SET processed = TRUE WHERE document_id IN :document_ids;") - connection.execute(update_query, {'document_ids': document_ids}) + document_ids = tuple(record["document_id"] for record in records) + update_query = text("UPDATE cognify SET processed = TRUE WHERE document_id IN :document_ids;") + connection.execute(update_query, {"document_ids": document_ids}) return [dict(record) for record in records] def delete_cognify_data(self): @@ -126,7 +144,6 @@ class SQLAlchemyAdapter(): connection.execute(text(""" CREATE TABLE IF NOT EXISTS cognify ( document_id STRING, - layer_id STRING, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT NULL, processed BOOLEAN DEFAULT FALSE, @@ -136,8 +153,7 @@ class SQLAlchemyAdapter(): connection.execute(text("DELETE FROM cognify;")) connection.execute(text("DROP TABLE cognify;")) - def delete_database(self): - from cognee.infrastructure.files.storage import LocalStorage - LocalStorage.remove(self.db_location) - if LocalStorage.file_exists(self.db_location + ".wal"): - LocalStorage.remove(self.db_location + ".wal") + async def delete_database(self): + async with self.engine.begin() as connection: + from ..ModelBase import Base + await connection.run_sync(Base.metadata.drop_all) diff --git a/cognee/infrastructure/databases/relational/user_authentication/authentication_db.py b/cognee/infrastructure/databases/relational/user_authentication/authentication_db.py deleted file mode 100644 index bbc0a01f7..000000000 --- a/cognee/infrastructure/databases/relational/user_authentication/authentication_db.py +++ /dev/null @@ -1,79 +0,0 @@ -import hashlib -import uuid -from typing import AsyncGenerator, Generator, Optional - -from fastapi import Depends -from fastapi_users.db import SQLAlchemyBaseUserTableUUID, SQLAlchemyUserDatabase -from sqlalchemy import create_engine, UUID -from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine -from sqlalchemy.orm import DeclarativeBase, sessionmaker, Session -from cognee.infrastructure.databases.relational import get_relationaldb_config, Base -from sqlalchemy import Column, String, ForeignKey, Table, Integer -from contextlib import asynccontextmanager -from fastapi_users.exceptions import UserAlreadyExists -from fastapi_users.authentication import JWTStrategy -from sqlalchemy.orm import DeclarativeBase, sessionmaker, Session, relationship -from cognee.infrastructure.databases.relational.user_authentication.schemas import UserRead, UserCreate -# from cognee.infrastructure.databases.relational.user_authentication.users import get_user_manager, get_jwt_strategy -from fastapi.security import OAuth2PasswordRequestForm -# Association table for many-to-many relationship between users and groups -user_group = Table('user_group', Base.metadata, - Column('user_id', UUID, ForeignKey('users.id')), - Column('group_id', UUID, ForeignKey('groups.id'))) - -# Association table for many-to-many relationship between groups and permissions -group_permission = Table('group_permission', Base.metadata, - Column('group_id', UUID, ForeignKey('groups.id')), - Column('permission_id', UUID, ForeignKey('permissions.id'))) - - - -class User(SQLAlchemyBaseUserTableUUID, Base): - __tablename__ = 'users' - groups = relationship('Group', secondary=user_group, back_populates='users') - acls = relationship('ACL', back_populates='user') - -class Group(Base): - __tablename__ = 'groups' - id = Column(UUID, primary_key=True, index=True, default=uuid.uuid4) - name = Column(String, unique=True, index=True) - users = relationship('User', secondary=user_group, back_populates='groups') - permissions = relationship('Permission', secondary=group_permission, back_populates='groups') - acls = relationship('ACL', back_populates='group') -class Permission(Base): - __tablename__ = 'permissions' - id = Column(UUID, primary_key=True, index=True) - name = Column(String, unique=True, index=True) - groups = relationship('Group', secondary=group_permission, back_populates='permissions') - -class ACL(Base): - __tablename__ = 'acls' - id = Column(UUID, primary_key=True, index=True, default=uuid.uuid4) - document_id = Column(UUID, ForeignKey('dataset_data.id')) - user_id = Column(UUID, ForeignKey('users.id'), nullable=True) - group_id = Column(UUID, ForeignKey('groups.id'), nullable=True) - permission = Column(String) # 'read', 'write', 'execute' - document = relationship('DatasetData', back_populates='acls') - user = relationship('User', back_populates='acls') - group = relationship('Group', back_populates='acls') - -relational_config = get_relationaldb_config() - - - -engine = relational_config.create_engine() -async_session_maker = async_sessionmaker(engine.engine, expire_on_commit=False) - -async def create_db_and_tables(): - async with engine.engine.begin() as conn: - await conn.run_sync(Base.metadata.create_all) - -async def get_async_session() -> AsyncGenerator[AsyncSession, None]: - async with async_session_maker() as session: - yield session - # yield async_session_maker - -async def get_user_db(session: AsyncSession = Depends(get_async_session)): - yield SQLAlchemyUserDatabase(session, User) - - diff --git a/cognee/infrastructure/databases/relational/user_authentication/routers.py b/cognee/infrastructure/databases/relational/user_authentication/routers.py deleted file mode 100644 index 42843db6f..000000000 --- a/cognee/infrastructure/databases/relational/user_authentication/routers.py +++ /dev/null @@ -1,28 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException -from sqlalchemy.orm import Session -from cognee.infrastructure.databases.relational.user_authentication.authentication_db import get_user_db, User, Group, Permission - -permission_router = APIRouter() - -@permission_router.post("/groups/{group_id}/permissions") -async def assign_permission_to_group(group_id: int, permission: str, db: Session = Depends(get_user_db)): - group = db.query(Group).filter(Group.id == group_id).first() - if not group: - raise HTTPException(status_code=404, detail="Group not found") - perm = db.query(Permission).filter(Permission.name == permission).first() - if not perm: - perm = Permission(name=permission) - db.add(perm) - group.permissions.append(perm) - db.commit() - return {"msg": "Permission added to group"} - -@permission_router.post("/users/{user_id}/groups") -async def add_user_to_group(user_id: int, group_id: int, db: Session = Depends(get_user_db)): - user = db.query(User).filter(User.id == user_id).first() - group = db.query(Group).filter(Group.id == group_id).first() - if not user or not group: - raise HTTPException(status_code=404, detail="User or group not found") - user.groups.append(group) - db.commit() - return {"msg": "User added to group"} \ No newline at end of file diff --git a/cognee/infrastructure/databases/relational/user_authentication/schemas.py b/cognee/infrastructure/databases/relational/user_authentication/schemas.py deleted file mode 100644 index d7156223f..000000000 --- a/cognee/infrastructure/databases/relational/user_authentication/schemas.py +++ /dev/null @@ -1,15 +0,0 @@ -import uuid - -from fastapi_users import schemas - - -class UserRead(schemas.BaseUser[uuid.UUID]): - pass - - -class UserCreate(schemas.BaseUserCreate): - pass - - -class UserUpdate(schemas.BaseUserUpdate): - pass \ No newline at end of file diff --git a/cognee/infrastructure/databases/relational/user_authentication/users.py b/cognee/infrastructure/databases/relational/user_authentication/users.py deleted file mode 100644 index 7435eaf5b..000000000 --- a/cognee/infrastructure/databases/relational/user_authentication/users.py +++ /dev/null @@ -1,252 +0,0 @@ -import hashlib -import uuid -from typing import Optional - -from fastapi import Depends, Request -from fastapi_users import BaseUserManager, FastAPIUsers, UUIDIDMixin, models -from fastapi_users.authentication import ( - AuthenticationBackend, - BearerTransport, - JWTStrategy, -) -from fastapi_users.exceptions import UserAlreadyExists -from fastapi_users.db import SQLAlchemyUserDatabase -from fastapi import Depends, HTTPException, status -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import Session -from cognee.infrastructure.databases.relational.user_authentication.authentication_db import User, get_user_db, \ - get_async_session, ACL -from fastapi.security import OAuth2PasswordRequestForm -from fastapi_users.authentication import JWTStrategy -from cognee.infrastructure.databases.relational.user_authentication.schemas import UserRead, UserCreate -from contextlib import asynccontextmanager - -SECRET = "SECRET" - - -class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): - reset_password_token_secret = SECRET - verification_token_secret = SECRET - - async def on_after_register(self, user: User, request: Optional[Request] = None): - print(f"User {user.id} has registered.") - - async def on_after_forgot_password( - self, user: User, token: str, request: Optional[Request] = None - ): - print(f"User {user.id} has forgot their password. Reset token: {token}") - - async def on_after_request_verify( - self, user: User, token: str, request: Optional[Request] = None - ): - print(f"Verification requested for user {user.id}. Verification token: {token}") - - -async def get_user_manager(user_db: SQLAlchemyUserDatabase = Depends(get_user_db)): - yield UserManager(user_db) - - -bearer_transport = BearerTransport(tokenUrl="auth/jwt/login") - - -def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]: - return JWTStrategy(secret=SECRET, lifetime_seconds=3600) - - -auth_backend = AuthenticationBackend( - name="jwt", - transport=bearer_transport, - get_strategy=get_jwt_strategy, -) - -fastapi_users = FastAPIUsers[User, uuid.UUID](get_user_manager, [auth_backend]) - -async def fast_api_users_init(): - return fastapi_users - -current_active_user = fastapi_users.current_user(active=True) - - -async def get_user_permissions(user: User, session: Session): - permissions = set() - for group in user.groups: - permissions.update(permission.name for permission in group.permissions) - return permissions - -def has_permission(permission: str): - async def permission_checker(user: User = Depends(current_active_user), session: Session = Depends(get_user_db)): - user_permissions = await get_user_permissions(user, session) - if permission not in user_permissions: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Not enough permissions") - return True - return Depends(permission_checker) - - -async def hash_password(password: str) -> str: - return hashlib.sha256(password.encode()).hexdigest() - -# Define context managers for dependencies -get_async_session_context = asynccontextmanager(get_async_session) -get_user_db_context = asynccontextmanager(get_user_db) -get_user_manager_context = asynccontextmanager(get_user_manager) - -async def create_user_method(email: str, password: str, is_superuser: bool = False, is_active: bool = True): - try: - async with get_async_session_context() as session: - async with get_user_db_context(session) as user_db: - async with get_user_manager_context(user_db) as user_manager: - user = await user_manager.create( - UserCreate(email=email, password=password, is_superuser=is_superuser, is_active=is_active) - ) - print(f"User created: {user.email}") - except UserAlreadyExists: - print(f"User {email} already exists") - -async def authenticate_user_method(email: str, password: str) -> Optional[User]: - try: - async with get_async_session_context() as session: - async with get_user_db_context(session) as user_db: - async with get_user_manager_context(user_db) as user_manager: - credentials = OAuth2PasswordRequestForm(username=email, password=password) - user = await user_manager.authenticate(credentials) - if user is None or not user.is_active: - return None - return user - except Exception as e: - print(f"Error during authentication: {e}") - return None - -async def reset_user_password_method(email: str, new_password: str) -> bool: - async with get_async_session_context() as session: - async with get_user_db_context(session) as user_db: - user = await user_db.get_by_email(email) - if not user: - return False - user.hashed_password = await hash_password(new_password) - await user_db.update(user) - return True - -# async def generate_verification_token(email: str, tokens_db: dict) -> str: -# async with get_async_session_context() as session: -# async with get_user_db_context(session) as user_db: -# if not await user_db.get_by_email(email): -# raise ValueError("User does not exist") -# token = str(uuid.uuid4()) -# tokens_db[token] = email -# return token - -# async def verify_user_method(token: str, tokens_db: dict) -> bool: -# async with get_async_session_context() as session: -# async with get_user_db_context(session) as user_db: -# email = tokens_db.get(token) -# if not email or not await user_db.get_by_email(email): -# return False -# user = await user_db.get_by_email(email) -# user.is_verified = True -# await user_db.update(user) -# return True - - -async def user_create_token(user: User) -> Optional[str]: - try: - async with get_async_session_context() as session: - async with get_user_db_context(session) as user_db: - async with get_user_manager_context(user_db) as user_manager: - if user is None: - return None - strategy = get_jwt_strategy() - token = await strategy.write_token(user) - if token is not None: - return token - else: - return None - except: - return None - -async def user_check_token(token: str) -> bool: - try: - async with get_async_session_context() as session: - async with get_user_db_context(session) as user_db: - async with get_user_manager_context(user_db) as user_manager: - if token is None: - return False - strategy = get_jwt_strategy() - user = await strategy.read_token(token, user_manager) - if user is None or not user.is_active: - return False - else: - return True - except: - return False - -async def has_permission_document(user: User, document_id: str, permission: str, session: AsyncSession) -> bool: - # Check if the user has the specified permission for the document - acl_entry = await session.execute( - """ - SELECT 1 FROM acls - WHERE user_id = :user_id AND document_id = :document_id AND permission = :permission - """, - {'user_id': str(user.id), 'document_id': str(document_id), 'permission': permission} - ) - if acl_entry.scalar_one_or_none(): - return True - - # Check if any of the user's groups have the specified permission for the document - group_acl_entry = await session.execute( - """ - SELECT 1 FROM acls - JOIN user_group ON acls.group_id = user_group.group_id - WHERE user_group.user_id = :user_id AND acls.document_id = :document_id AND acls.permission = :permission - """, - {'user_id': str(user.id), 'document_id': str(document_id), 'permission': permission} - ) - if group_acl_entry.scalar_one_or_none(): - return True - - return False - -async def create_default_user(): - async with get_async_session_context() as session: - default_user_email = "default_user@example.com" - default_user_password = "default_password" - - user = await create_user_method( - email=default_user_email, - password=await hash_password(default_user_password), - is_superuser=True, - is_active=True) - session.add(user) - out = await session.commit() - await session.refresh(user) - return out.id - -async def give_permission_document(user: Optional[User], document_id: str, permission: str, - session: AsyncSession): - - acl_entry = ACL( - document_id=document_id, - user_id=user.id, - permission=permission - ) - session.add(acl_entry) - await session.commit() - - - if user.is_superuser: - permission = 'all_permissions' # Example permission, change as needed - acl_entry = ACL( - document_id=document_id, - user_id=user.id, - permission=permission - ) - session.add(acl_entry) - await session.commit() - - -async def get_document_ids_for_user(user_id: uuid.UUID, session: AsyncSession) -> list[str]: - result = await session.execute( - select(ACL.document_id).filter_by(user_id=user_id) - ) - document_ids = [row[0] for row in result.fetchall()] - return document_ids \ No newline at end of file diff --git a/cognee/modules/data/deletion/prune_system.py b/cognee/modules/data/deletion/prune_system.py index 70627bdc7..38c625558 100644 --- a/cognee/modules/data/deletion/prune_system.py +++ b/cognee/modules/data/deletion/prune_system.py @@ -1,6 +1,6 @@ from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine -from cognee.infrastructure.databases.relational import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine async def prune_system(graph = True, vector = True, metadata = False): if graph: @@ -12,6 +12,5 @@ async def prune_system(graph = True, vector = True, metadata = False): await vector_engine.prune() if metadata: - db_config = get_relationaldb_config() - db_engine = db_config.database_engine - db_engine.delete_database() + db_engine = get_relational_engine() + await db_engine.delete_database() diff --git a/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py b/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py index 3735b41b9..0a959df52 100644 --- a/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py +++ b/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py @@ -1,7 +1,7 @@ import json import asyncio from uuid import uuid5, NAMESPACE_OID -from datetime import datetime +from datetime import datetime, timezone from typing import Type from pydantic import BaseModel from cognee.infrastructure.databases.graph import get_graph_engine @@ -91,8 +91,8 @@ async def expand_knowledge_graph(data_chunks: list[DocumentChunk], graph_model: name = node_name, type = node_name, description = node.description, - created_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - updated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + created_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + updated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), ) graph_nodes.append(( @@ -145,8 +145,8 @@ async def expand_knowledge_graph(data_chunks: list[DocumentChunk], graph_model: name = type_node_name, type = type_node_id, description = type_node_name, - created_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - updated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + created_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + updated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), ) graph_nodes.append((type_node_id, dict( diff --git a/cognee/modules/pipelines/models/Pipeline.py b/cognee/modules/pipelines/models/Pipeline.py index 7666b8bbf..8005844be 100644 --- a/cognee/modules/pipelines/models/Pipeline.py +++ b/cognee/modules/pipelines/models/Pipeline.py @@ -1,22 +1,22 @@ -from typing import List from uuid import uuid4 from datetime import datetime, timezone from sqlalchemy import Column, UUID, DateTime, String, Text from sqlalchemy.orm import relationship, Mapped -from cognee.infrastructure.databases.relational import ModelBase +from cognee.infrastructure.databases.relational import Base from .PipelineTask import PipelineTask -class Pipeline(ModelBase): +class Pipeline(Base): __tablename__ = "pipelines" - id = Column(UUID, primary_key = True, default = uuid4()) + id = Column(UUID, primary_key = True, default = uuid4) + name = Column(String) description = Column(Text, nullable = True) - created_at = Column(DateTime, default = datetime.now(timezone.utc)) - updated_at = Column(DateTime, onupdate = datetime.now(timezone.utc)) + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) - tasks = Mapped[List["Task"]] = relationship( + tasks = Mapped[list["Task"]] = relationship( secondary = PipelineTask.__tablename__, back_populates = "pipeline", ) diff --git a/cognee/modules/pipelines/models/PipelineRun.py b/cognee/modules/pipelines/models/PipelineRun.py new file mode 100644 index 000000000..bc5f890d9 --- /dev/null +++ b/cognee/modules/pipelines/models/PipelineRun.py @@ -0,0 +1,15 @@ +from uuid import uuid4 +from datetime import datetime, timezone +from sqlalchemy import Column, UUID, DateTime, String, JSON +from cognee.infrastructure.databases.relational import Base + +class PipelineRun(Base): + __tablename__ = "pipeline_runs" + + id = Column(UUID, primary_key = True, default = uuid4) + + dataset_name = Column(String) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + + run_info = Column(JSON) diff --git a/cognee/modules/pipelines/models/PipelineTask.py b/cognee/modules/pipelines/models/PipelineTask.py index 82b618368..0c70baddc 100644 --- a/cognee/modules/pipelines/models/PipelineTask.py +++ b/cognee/modules/pipelines/models/PipelineTask.py @@ -1,14 +1,14 @@ from uuid import uuid4 from datetime import datetime, timezone from sqlalchemy import Column, DateTime, UUID, ForeignKey -from cognee.infrastructure.databases.relational import ModelBase +from cognee.infrastructure.databases.relational import Base -class PipelineTask(ModelBase): +class PipelineTask(Base): __tablename__ = "pipeline_task" - id = Column(UUID, primary_key = True, default = uuid4()) + id = Column(UUID, primary_key = True, default = uuid4) - created_at = Column(DateTime, default = datetime.now(timezone.utc)) + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) pipeline_id = Column("pipeline", UUID, ForeignKey("pipeline.id"), primary_key = True) task_id = Column("task", UUID, ForeignKey("task.id"), primary_key = True) diff --git a/cognee/modules/pipelines/models/Task.py b/cognee/modules/pipelines/models/Task.py index 34a193dc7..c3ee89e41 100644 --- a/cognee/modules/pipelines/models/Task.py +++ b/cognee/modules/pipelines/models/Task.py @@ -1,24 +1,24 @@ from uuid import uuid4 -from typing import List from datetime import datetime, timezone from sqlalchemy.orm import relationship, Mapped from sqlalchemy import Column, String, DateTime, UUID, Text -from cognee.infrastructure.databases.relational import ModelBase +from cognee.infrastructure.databases.relational import Base from .PipelineTask import PipelineTask -class Task(ModelBase): +class Task(Base): __tablename__ = "tasks" - id = Column(UUID, primary_key = True, default = uuid4()) + id = Column(UUID, primary_key = True, default = uuid4) + name = Column(String) description = Column(Text, nullable = True) executable = Column(Text) - created_at = Column(DateTime, default = datetime.now(timezone.utc)) - updated_at = Column(DateTime, onupdate = datetime.now(timezone.utc)) + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) - datasets: Mapped[List["Pipeline"]] = relationship( + datasets: Mapped[list["Pipeline"]] = relationship( secondary = PipelineTask.__tablename__, back_populates = "task" ) diff --git a/cognee/infrastructure/databases/relational/user_authentication/__init__.py b/cognee/modules/pipelines/models/TaskStatus.py similarity index 100% rename from cognee/infrastructure/databases/relational/user_authentication/__init__.py rename to cognee/modules/pipelines/models/TaskStatus.py diff --git a/cognee/modules/tasks/create_task_status_table.py b/cognee/modules/tasks/create_task_status_table.py index f92709f06..42b6a0333 100644 --- a/cognee/modules/tasks/create_task_status_table.py +++ b/cognee/modules/tasks/create_task_status_table.py @@ -1,8 +1,7 @@ -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine def create_task_status_table(): - config = get_relationaldb_config() - db_engine = config.database_engine + db_engine = get_relational_engine() db_engine.create_table("cognee.cognee", "cognee_task_status", [ dict(name = "data_id", type = "STRING"), diff --git a/cognee/modules/tasks/get_task_status.py b/cognee/modules/tasks/get_task_status.py index 863ada164..17b796084 100644 --- a/cognee/modules/tasks/get_task_status.py +++ b/cognee/modules/tasks/get_task_status.py @@ -1,8 +1,7 @@ -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine def get_task_status(data_ids: [str]): - relational_config = get_relationaldb_config() - db_engine = relational_config.database_engine + db_engine = get_relational_engine() formatted_data_ids = ", ".join([f"'{data_id}'" for data_id in data_ids]) diff --git a/cognee/modules/tasks/update_task_status.py b/cognee/modules/tasks/update_task_status.py index 1efb3823e..0676a5a92 100644 --- a/cognee/modules/tasks/update_task_status.py +++ b/cognee/modules/tasks/update_task_status.py @@ -1,6 +1,5 @@ -from cognee.infrastructure.databases.relational.config import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine def update_task_status(data_id: str, status: str): - config = get_relationaldb_config() - db_engine = config.database_engine + db_engine = get_relational_engine() db_engine.insert_data("cognee.cognee", "cognee_task_status", [dict(data_id = data_id, status = status)]) diff --git a/cognee/modules/topology/topology.py b/cognee/modules/topology/topology.py index c733763c2..95ee510e5 100644 --- a/cognee/modules/topology/topology.py +++ b/cognee/modules/topology/topology.py @@ -14,7 +14,7 @@ from pydantic import BaseModel from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine -from cognee.infrastructure.databases.relational import get_relationaldb_config +from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException from cognee.modules.cognify.config import get_cognify_config @@ -162,11 +162,9 @@ async def main(): await add(f"data://{data_dir}", dataset_name="explanations") - relational_config = get_relationaldb_config() - db_engine = relational_config.database_engine + db_engine = get_relational_engine() - - datasets = db_engine.get_datasets() + datasets = await db_engine.get_datasets() dataset_files =[] for added_dataset in datasets: @@ -180,7 +178,7 @@ async def main(): file_path = "example_data.json" # or 'example_data.csv' # # # Adding graph topology - graph = await topology_engine.add_graph_topology(file_path, dataset_files=dataset_files) + graph = await topology_engine.add_graph_topology(file_path, files = dataset_files) print(graph) # Run the main function diff --git a/cognee/modules/users/__init__.py b/cognee/modules/users/__init__.py index e69de29bb..2fe7c570c 100644 --- a/cognee/modules/users/__init__.py +++ b/cognee/modules/users/__init__.py @@ -0,0 +1 @@ +from .get_user_db import get_user_db diff --git a/cognee/modules/users/authentication/get_auth_backend.py b/cognee/modules/users/authentication/get_auth_backend.py new file mode 100644 index 000000000..3e5017ffa --- /dev/null +++ b/cognee/modules/users/authentication/get_auth_backend.py @@ -0,0 +1,24 @@ +import os +from functools import lru_cache +from fastapi_users import models +from fastapi_users.authentication import ( + AuthenticationBackend, + BearerTransport, + JWTStrategy, +) + +@lru_cache +def get_auth_backend(): + bearer_transport = BearerTransport(tokenUrl = "auth/jwt/login") + + def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]: + secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret") + return JWTStrategy(secret, lifetime_seconds = 3600) + + auth_backend = AuthenticationBackend( + name = "jwt", + transport = bearer_transport, + get_strategy = get_jwt_strategy, + ) + + return auth_backend diff --git a/cognee/modules/users/authentication/methods/authenticate_user.py b/cognee/modules/users/authentication/methods/authenticate_user.py new file mode 100644 index 000000000..5095092f4 --- /dev/null +++ b/cognee/modules/users/authentication/methods/authenticate_user.py @@ -0,0 +1,21 @@ +from fastapi.security import OAuth2PasswordRequestForm +from fastapi_users.exceptions import UserNotExists +from cognee.infrastructure.databases.relational import get_relational_engine +from ...get_user_manager import get_user_manager_context +from ...get_user_db import get_user_db_context + +async def authenticate_user(email: str, password: str): + try: + relational_engine = get_relational_engine() + + async with relational_engine.get_async_session() as session: + async with get_user_db_context(session) as user_db: + async with get_user_manager_context(user_db) as user_manager: + credentials = OAuth2PasswordRequestForm(username = email, password = password) + user = await user_manager.authenticate(credentials) + if user is None or not user.is_active: + return None + return user + except UserNotExists as error: + print(f"User {email} doesn't exist") + raise error diff --git a/cognee/modules/users/get_fastapi_users.py b/cognee/modules/users/get_fastapi_users.py new file mode 100644 index 000000000..81abe3551 --- /dev/null +++ b/cognee/modules/users/get_fastapi_users.py @@ -0,0 +1,15 @@ +import uuid +from functools import lru_cache +from fastapi_users import FastAPIUsers +from .authentication.get_auth_backend import get_auth_backend + +from .get_user_manager import get_user_manager +from .models.User import User + +@lru_cache +def get_fastapi_users(): + auth_backend = get_auth_backend() + + fastapi_users = FastAPIUsers[User, uuid.UUID](get_user_manager, [auth_backend]) + + return fastapi_users diff --git a/cognee/modules/users/get_user_db.py b/cognee/modules/users/get_user_db.py new file mode 100644 index 000000000..fb99defef --- /dev/null +++ b/cognee/modules/users/get_user_db.py @@ -0,0 +1,10 @@ +# from fastapi import Depends +from fastapi_users.db import SQLAlchemyUserDatabase +# from cognee.infrastructure.databases.relational import get_relational_engine +from .models.User import User + +async def get_user_db(session): + yield SQLAlchemyUserDatabase(session, User) + +from contextlib import asynccontextmanager +get_user_db_context = asynccontextmanager(get_user_db) diff --git a/cognee/modules/users/get_user_manager.py b/cognee/modules/users/get_user_manager.py new file mode 100644 index 000000000..b9d69e947 --- /dev/null +++ b/cognee/modules/users/get_user_manager.py @@ -0,0 +1,32 @@ +import os +import uuid +from typing import Optional +from fastapi import Depends, Request +from fastapi_users import BaseUserManager, UUIDIDMixin +from fastapi_users.db import SQLAlchemyUserDatabase + +from .get_user_db import get_user_db +from .models import User + +class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): + reset_password_token_secret = os.getenv("FASTAPI_USERS_RESET_PASSWORD_TOKEN_SECRET", "super_secret") + verification_token_secret = os.getenv("FASTAPI_USERS_VERIFICATION_TOKEN_SECRET", "super_secret") + + async def on_after_register(self, user: User, request: Optional[Request] = None): + print(f"User {user.id} has registered.") + + async def on_after_forgot_password( + self, user: User, token: str, request: Optional[Request] = None + ): + print(f"User {user.id} has forgot their password. Reset token: {token}") + + async def on_after_request_verify( + self, user: User, token: str, request: Optional[Request] = None + ): + print(f"Verification requested for user {user.id}. Verification token: {token}") + +async def get_user_manager(user_db: SQLAlchemyUserDatabase = Depends(get_user_db)): + yield UserManager(user_db) + +from contextlib import asynccontextmanager +get_user_manager_context = asynccontextmanager(get_user_manager) diff --git a/cognee/modules/users/memory/__init__.py b/cognee/modules/users/memory/__init__.py deleted file mode 100644 index 736ec61f6..000000000 --- a/cognee/modules/users/memory/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .is_existing_memory import is_existing_memory -from .register_memory_for_user import register_memory_for_user -from .create_information_points import create_information_points diff --git a/cognee/modules/users/memory/create_information_points.py b/cognee/modules/users/memory/create_information_points.py deleted file mode 100644 index 748f70706..000000000 --- a/cognee/modules/users/memory/create_information_points.py +++ /dev/null @@ -1,23 +0,0 @@ -import uuid -from typing import List -from qdrant_client.models import PointStruct -from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database -from cognee.infrastructure.llm.openai.openai_tools import async_get_embedding_with_backoff - -async def create_information_points(memory_name: str, payload: List[str]): - vector_db = get_vector_database() - - data_points = list() - for point in map(create_data_point, payload): - data_points.append(await point) - - return await vector_db.create_data_points(memory_name, data_points) - -async def create_data_point(data: str) -> PointStruct: - return PointStruct( - id = str(uuid.uuid4()), - vector = await async_get_embedding_with_backoff(data), - payload = { - "raw": data, - } - ) diff --git a/cognee/modules/users/memory/is_existing_memory.py b/cognee/modules/users/memory/is_existing_memory.py deleted file mode 100644 index 76b7fe34b..000000000 --- a/cognee/modules/users/memory/is_existing_memory.py +++ /dev/null @@ -1,6 +0,0 @@ -from cognee.infrastructure.databases.relational.get_database import get_database - -async def is_existing_memory(memory_name: str): - memory = await (get_database().get_memory_by_name(memory_name)) - - return memory is not None diff --git a/cognee/modules/users/memory/register_memory_for_user.py b/cognee/modules/users/memory/register_memory_for_user.py deleted file mode 100644 index e8bb29de7..000000000 --- a/cognee/modules/users/memory/register_memory_for_user.py +++ /dev/null @@ -1,4 +0,0 @@ -from cognee.infrastructure.databases.relational.get_database import get_database - -def register_memory_for_user(user_id: str, memory_name: str): - return get_database().add_memory(user_id, memory_name) diff --git a/cognee/modules/users/methods/__init__.py b/cognee/modules/users/methods/__init__.py new file mode 100644 index 000000000..bb962a227 --- /dev/null +++ b/cognee/modules/users/methods/__init__.py @@ -0,0 +1,3 @@ +from .create_user import create_user +from .get_default_user import get_default_user +from .create_default_user import create_default_user diff --git a/cognee/modules/users/methods/create_default_user.py b/cognee/modules/users/methods/create_default_user.py new file mode 100644 index 000000000..673756ed1 --- /dev/null +++ b/cognee/modules/users/methods/create_default_user.py @@ -0,0 +1,24 @@ +import hashlib +# from cognee.infrastructure.databases.relational import get_relational_engine +from .create_user import create_user + +async def create_default_user(): + default_user_email = "default_user@example.com" + default_user_password = "default_password" + + user = await create_user( + email = default_user_email, + password = await hash_password(default_user_password), + is_superuser = True, + is_active = True, + is_verified = True, + ) + + # db_engine = get_relational_engine() + # async with db_engine.get_async_session() as session: + # await session.refresh(user) + + return user + +async def hash_password(password: str) -> str: + return hashlib.sha256(password.encode()).hexdigest() diff --git a/cognee/modules/users/methods/create_user.py b/cognee/modules/users/methods/create_user.py new file mode 100644 index 000000000..f2886caae --- /dev/null +++ b/cognee/modules/users/methods/create_user.py @@ -0,0 +1,33 @@ +from fastapi_users.exceptions import UserAlreadyExists +from cognee.infrastructure.databases.relational import get_relational_engine +from ..get_user_manager import get_user_manager_context +from ..get_user_db import get_user_db_context +from ..models.User import UserCreate + +async def create_user( + email: str, + password: str, + is_superuser: bool = False, + is_active: bool = True, + is_verified: bool = False, +): + try: + relational_engine = get_relational_engine() + + async with relational_engine.get_async_session() as session: + async with get_user_db_context(session) as user_db: + async with get_user_manager_context(user_db) as user_manager: + user = await user_manager.create( + UserCreate( + email = email, + password = password, + is_superuser = is_superuser, + is_active = is_active, + is_verified = is_verified, + ) + ) + return user + print(f"User created: {user.email}") + except UserAlreadyExists as error: + print(f"User {email} already exists") + raise error diff --git a/cognee/modules/users/methods/get_default_user.py b/cognee/modules/users/methods/get_default_user.py new file mode 100644 index 000000000..011c06ec4 --- /dev/null +++ b/cognee/modules/users/methods/get_default_user.py @@ -0,0 +1,8 @@ +from cognee.modules.users.models import User +from cognee.infrastructure.databases.relational import get_relational_engine + +async def get_default_user() -> User: + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + return session.query(User).filter(User.email == "default_user@example.com").first() diff --git a/cognee/modules/users/models/ACL.py b/cognee/modules/users/models/ACL.py new file mode 100644 index 000000000..2bcffcc92 --- /dev/null +++ b/cognee/modules/users/models/ACL.py @@ -0,0 +1,25 @@ +from uuid import uuid4 +from datetime import datetime, timezone +from sqlalchemy.orm import relationship, Mapped +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base +from .ACLResources import ACLResources + +class ACL(Base): + __tablename__ = "acls" + + id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) + + principal_id = Column(UUID(as_uuid = True), ForeignKey("principals.id")) + permission_id = Column(UUID(as_uuid = True), ForeignKey("permissions.id")) + + principal = relationship("Principal") + permission = relationship("Permission") + resources: Mapped[list["Resource"]] = relationship( + "Resource", + secondary = ACLResources.__tablename__, + back_populates = "acls", + ) diff --git a/cognee/modules/users/models/ACLResources.py b/cognee/modules/users/models/ACLResources.py new file mode 100644 index 000000000..73d0a22fc --- /dev/null +++ b/cognee/modules/users/models/ACLResources.py @@ -0,0 +1,11 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, ForeignKey, UUID, DateTime +from cognee.infrastructure.databases.relational import Base + +class ACLResources(Base): + __tablename__ = "acl_resources" + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + + acl_id = Column(UUID(as_uuid = True), ForeignKey("acls.id"), primary_key = True) + resource_id = Column(UUID(as_uuid = True), ForeignKey("resources.id"), primary_key = True) diff --git a/cognee/modules/users/models/Group.py b/cognee/modules/users/models/Group.py new file mode 100644 index 000000000..19b93443d --- /dev/null +++ b/cognee/modules/users/models/Group.py @@ -0,0 +1,21 @@ +from sqlalchemy.orm import relationship, Mapped +from sqlalchemy import Column, String, ForeignKey, UUID +from .Principal import Principal +from .UserGroup import UserGroup + +class Group(Principal): + __tablename__ = "groups" + + id = Column(UUID(as_uuid = True), ForeignKey("principals.id"), primary_key = True) + + name = Column(String, unique = True, nullable = False, index = True) + + users: Mapped[list["User"]] = relationship( + "User", + secondary = UserGroup.__tablename__, + back_populates = "groups", + ) + + __mapper_args__ = { + "polymorphic_identity": "group", + } diff --git a/cognee/modules/users/models/Permission.py b/cognee/modules/users/models/Permission.py new file mode 100644 index 000000000..41ed8b135 --- /dev/null +++ b/cognee/modules/users/models/Permission.py @@ -0,0 +1,23 @@ +from uuid import uuid4 +from datetime import datetime, timezone +# from sqlalchemy.orm import relationship, Mapped +from sqlalchemy import Column, DateTime, UUID, String +from cognee.infrastructure.databases.relational import Base + +class Permission(Base): + __tablename__ = "permissions" + + id = Column(UUID, primary_key = True, index = True, default = uuid4) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) + + name = Column(String, unique = True, nullable = False, index = True) + + # acls = relationship("ACL", back_populates = "permission") + + # groups: Mapped[list["Group"]] = relationship( + # "Group", + # secondary = "group_permissions", + # back_populates = "permissions", + # ) diff --git a/cognee/modules/users/models/Principal.py b/cognee/modules/users/models/Principal.py new file mode 100644 index 000000000..b5f14a428 --- /dev/null +++ b/cognee/modules/users/models/Principal.py @@ -0,0 +1,19 @@ +from uuid import uuid4 +from datetime import datetime, timezone +from sqlalchemy import Column, String, DateTime, UUID +from cognee.infrastructure.databases.relational import Base + +class Principal(Base): + __tablename__ = "principals" + + id = Column(UUID(as_uuid = True), primary_key = True, index = True, default = uuid4) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) + + type = Column(String, nullable = False) + + __mapper_args__ = { + "polymorphic_identity": "principal", + "polymorphic_on": "type", + } diff --git a/cognee/modules/users/models/Resource.py b/cognee/modules/users/models/Resource.py new file mode 100644 index 000000000..bc9e14023 --- /dev/null +++ b/cognee/modules/users/models/Resource.py @@ -0,0 +1,18 @@ +from uuid import uuid4 +from datetime import datetime, timezone +from sqlalchemy.orm import relationship +from sqlalchemy import Column, DateTime, UUID +from cognee.infrastructure.databases.relational import Base +from .ACLResources import ACLResources + +class Resource(Base): + __tablename__ = "resources" + + id = Column(UUID(as_uuid = True), primary_key = True, default = uuid4) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) + + resource_id = Column(UUID(as_uuid = True), nullable = False) + + acls = relationship("ACL", secondary = ACLResources.__tablename__, back_populates = "resources") diff --git a/cognee/modules/users/models/User.py b/cognee/modules/users/models/User.py new file mode 100644 index 000000000..fba04e6a0 --- /dev/null +++ b/cognee/modules/users/models/User.py @@ -0,0 +1,33 @@ +from uuid import UUID as uuid_UUID +from sqlalchemy import ForeignKey, UUID, Column +from sqlalchemy.orm import relationship, Mapped +from fastapi_users.db import SQLAlchemyBaseUserTableUUID +from .Principal import Principal +from .UserGroup import UserGroup + +class User(SQLAlchemyBaseUserTableUUID, Principal): + __tablename__ = "users" + + id = Column(UUID(as_uuid = True), ForeignKey("principals.id"), primary_key = True) + + groups: Mapped[list["Group"]] = relationship( + secondary = UserGroup.__tablename__, + back_populates = "users", + ) + + __mapper_args__ = { + "polymorphic_identity": "user", + } + + +# Keep these schemas in sync with User model +from fastapi_users import schemas + +class UserRead(schemas.BaseUser[uuid_UUID]): + pass + +class UserCreate(schemas.BaseUserCreate): + pass + +class UserUpdate(schemas.BaseUserUpdate): + pass diff --git a/cognee/modules/users/models/UserGroup.py b/cognee/modules/users/models/UserGroup.py new file mode 100644 index 000000000..0bb6cf657 --- /dev/null +++ b/cognee/modules/users/models/UserGroup.py @@ -0,0 +1,11 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base + +class UserGroup(Base): + __tablename__ = "user_groups" + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + + user_id = Column(UUID(as_uuid = True), ForeignKey("users.id"), primary_key = True) + group_id = Column(UUID(as_uuid = True), ForeignKey("groups.id"), primary_key = True) diff --git a/cognee/modules/users/models/__init__.py b/cognee/modules/users/models/__init__.py new file mode 100644 index 000000000..7dc1bf8ca --- /dev/null +++ b/cognee/modules/users/models/__init__.py @@ -0,0 +1,5 @@ +from .User import User +from .Group import Group +from .Resource import Resource +from .Permission import Permission +from .ACL import ACL diff --git a/cognee/modules/users/permissions/methods/__init__.py b/cognee/modules/users/permissions/methods/__init__.py new file mode 100644 index 000000000..bc4c475dc --- /dev/null +++ b/cognee/modules/users/permissions/methods/__init__.py @@ -0,0 +1,3 @@ +from .check_permissions_on_documents import check_permissions_on_documents +from .give_permission_on_document import give_permission_on_document +from .get_document_ids_for_user import get_document_ids_for_user diff --git a/cognee/modules/users/permissions/methods/check_permissions_on_documents.py b/cognee/modules/users/permissions/methods/check_permissions_on_documents.py new file mode 100644 index 000000000..4bc6f82c7 --- /dev/null +++ b/cognee/modules/users/permissions/methods/check_permissions_on_documents.py @@ -0,0 +1,32 @@ +import logging +from cognee.infrastructure.databases.relational import get_relational_engine +from ...models.User import User +from ...models.ACL import ACL + +logger = logging.getLogger(__name__) + +async def check_permissions_on_documents( + user: User, + permission_type: str, + document_ids: list[str], +): + try: + relational_engine = get_relational_engine() + + async with relational_engine.get_async_session() as session: + user_group_ids = [group.id for group in user.groups] + + acls = session.query(ACL) \ + .filter(ACL.principal_id.in_([user.id, *user_group_ids])) \ + .filter(ACL.permission.name == permission_type) \ + .all() + + resource_ids = [resource.resource_id for resource in acl.resources for acl in acls] + + has_permissions = all([document_id in resource_ids for document_id in document_ids]) + + if not has_permissions: + raise Exception(f"User {user.username} does not have {permission_type} permission on documents") + except Exception as error: + logger.error("Error checking permissions on documents: %s", str(error)) + raise error diff --git a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py new file mode 100644 index 000000000..31f031d75 --- /dev/null +++ b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py @@ -0,0 +1,24 @@ +from uuid import UUID +from sqlalchemy import select +from cognee.infrastructure.databases.relational import get_relational_engine +from ...models import ACL + +async def get_document_ids_for_user(user_id: UUID) -> list[str]: + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + async with session.begin(): + result = await session.execute( + select(ACL.resources.resource_id) \ + .join(ACL.resources) \ + .filter_by( + ACL.principal_id == user_id, + ACL.permission.name == "read", + ) + ) + document_ids = [row[0] for row in result.fetchall()] + return document_ids + + + + diff --git a/cognee/modules/users/permissions/methods/give_permission_on_document.py b/cognee/modules/users/permissions/methods/give_permission_on_document.py new file mode 100644 index 000000000..f4dabfd6b --- /dev/null +++ b/cognee/modules/users/permissions/methods/give_permission_on_document.py @@ -0,0 +1,38 @@ +from sqlalchemy.future import select +from cognee.infrastructure.databases.relational import get_relational_engine +from ...models import User, ACL, Resource, Permission + +async def give_permission_on_document( + user: User, + document_id: str, + permission_name: str, +): + db_engine = get_relational_engine() + + document_resource = Resource(resource_id = document_id) + + async with db_engine.get_async_session() as session: + permission = (await session.execute(select(Permission).filter(Permission.name == permission_name))).first() + + if permission is None: + permission = Permission(name = permission_name) + + acl = ACL(principal_id = user.id) + acl.permission = permission + acl.resources.append(document_resource) + + session.add(acl) + + await session.commit() + + + # if user.is_superuser: + # permission = "all_permissions" # Example permission, change as needed + + # acl_entry = ACL( + # document_id=document_id, + # user_id=user.id, + # permission=permission + # ) + # session.add(acl_entry) + # await session.commit() \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9dfd54cee..2ef05170a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -60,6 +60,7 @@ services: - 3001:3000 networks: - cognee-network + postgres: image: postgres:latest container_name: postgres diff --git a/poetry.lock b/poetry.lock index d952b574a..b2e39ed3c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -1259,13 +1259,13 @@ files = [ [[package]] name = "dlt" -version = "0.5.1" +version = "0.5.2" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." optional = false python-versions = "<3.13,>=3.8.1" files = [ - {file = "dlt-0.5.1-py3-none-any.whl", hash = "sha256:19d7920816fadd049a1a92c4ecc4e740bf1cd5f6484fce535715f8c214a835ce"}, - {file = "dlt-0.5.1.tar.gz", hash = "sha256:dfa11e498feec3aca0022541850b6fa7dd3fddce4c2e6fef195530638269fec4"}, + {file = "dlt-0.5.2-py3-none-any.whl", hash = "sha256:f4ebf5eb0fbdaca5a12df9bd95c259bccb33989e93fe649ecefb6cf3f2506d30"}, + {file = "dlt-0.5.2.tar.gz", hash = "sha256:6254c56421765fb1b1b81a7c68fa0221709b67654488c31595b7f98254327ac2"}, ] [package.dependencies] @@ -1282,6 +1282,8 @@ orjson = {version = ">=3.6.7,<3.9.11 || >3.9.11,<3.9.12 || >3.9.12,<3.9.13 || >3 packaging = ">=21.1" pathvalidate = ">=2.5.2" pendulum = ">=2.1.2" +psycopg2-binary = {version = ">=2.9.1", optional = true, markers = "extra == \"postgres\" or extra == \"redshift\""} +psycopg2cffi = {version = ">=2.9.0", optional = true, markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"redshift\")"} pytz = ">=2022.6" PyYAML = ">=5.4.1" requests = ">=2.26.0" @@ -4819,6 +4821,101 @@ files = [ [package.extras] test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win32.whl", hash = "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win32.whl", hash = "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, +] + +[[package]] +name = "psycopg2cffi" +version = "2.9.0" +description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" +optional = false +python-versions = "*" +files = [ + {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, +] + +[package.dependencies] +cffi = ">=1.0" +six = "*" + [[package]] name = "ptyprocess" version = "0.7.0" @@ -5430,6 +5527,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -6451,6 +6549,14 @@ files = [ {file = "SQLAlchemy-2.0.21-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b69f1f754d92eb1cc6b50938359dead36b96a1dcf11a8670bff65fd9b21a4b09"}, {file = "SQLAlchemy-2.0.21-cp311-cp311-win32.whl", hash = "sha256:af520a730d523eab77d754f5cf44cc7dd7ad2d54907adeb3233177eeb22f271b"}, {file = "SQLAlchemy-2.0.21-cp311-cp311-win_amd64.whl", hash = "sha256:141675dae56522126986fa4ca713739d00ed3a6f08f3c2eb92c39c6dfec463ce"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:56628ca27aa17b5890391ded4e385bf0480209726f198799b7e980c6bd473bd7"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db726be58837fe5ac39859e0fa40baafe54c6d54c02aba1d47d25536170b690f"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7421c1bfdbb7214313919472307be650bd45c4dc2fcb317d64d078993de045b"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:632784f7a6f12cfa0e84bf2a5003b07660addccf5563c132cd23b7cc1d7371a9"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f6f7276cf26145a888f2182a98f204541b519d9ea358a65d82095d9c9e22f917"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2a1f7ffac934bc0ea717fa1596f938483fb8c402233f9b26679b4f7b38d6ab6e"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-win32.whl", hash = "sha256:bfece2f7cec502ec5f759bbc09ce711445372deeac3628f6fa1c16b7fb45b682"}, + {file = "SQLAlchemy-2.0.21-cp312-cp312-win_amd64.whl", hash = "sha256:526b869a0f4f000d8d8ee3409d0becca30ae73f494cbb48801da0129601f72c6"}, {file = "SQLAlchemy-2.0.21-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7614f1eab4336df7dd6bee05bc974f2b02c38d3d0c78060c5faa4cd1ca2af3b8"}, {file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d59cb9e20d79686aa473e0302e4a82882d7118744d30bb1dfb62d3c47141b3ec"}, {file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a95aa0672e3065d43c8aa80080cdd5cc40fe92dc873749e6c1cf23914c4b83af"}, @@ -7547,4 +7653,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "003bc600a702d9e31cbe73414f5db06c38bec29faa448fede74256efce786be2" +content-hash = "e8f056b2287bd8749be814fb94a8636170fa7ef781e38c0d7666b505e429b269" diff --git a/pyproject.toml b/pyproject.toml index 092328993..a808728b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ greenlet = "^3.0.3" ruff = "^0.2.2" filetype = "^1.2.0" nltk = "^3.8.1" -dlt = "0.5.1" +dlt = {extras = ["postgres"], version = "^0.5.2"} duckdb = {version = "^0.10.0", extras = ["dlt"]} overrides = "^7.7.0" aiofiles = "^23.2.1"