From 1df12c125924f0333249c93802ba761a4bc4999a Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:47:13 +0100 Subject: [PATCH 1/6] fix: Fixes processing false Class keyword issue --- cognee/shared/SourceCodeGraph.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cognee/shared/SourceCodeGraph.py b/cognee/shared/SourceCodeGraph.py index 60f425e32..0fc8f9487 100644 --- a/cognee/shared/SourceCodeGraph.py +++ b/cognee/shared/SourceCodeGraph.py @@ -28,7 +28,7 @@ class Class(DataPoint): description: str constructor_parameters: List[Variable] extended_from_class: Optional["Class"] = None - has_methods: list["Function"] + has_methods: List["Function"] _metadata = { "index_fields": ["name"] @@ -89,7 +89,8 @@ class SourceCodeGraph(DataPoint): Operator, Expression, ]] - Class.model_rebuild() ClassInstance.model_rebuild() Expression.model_rebuild() +FunctionCall.model_rebuild() +SourceCodeGraph.model_rebuild() From d90f5fe7c12313a8cddaebdc05f0dd42c7d2d2e5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 15 Nov 2024 15:05:46 +0100 Subject: [PATCH 2/6] feat: Add proxy for analytics Added proxy usage with vercel hosting for telemetry and analytics Feature COG-597 --- cognee/shared/utils.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index 42a95b88b..d05782ea3 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -1,5 +1,6 @@ """ This module contains utility functions for the cognee. """ import os +import requests from datetime import datetime, timezone import graphistry import networkx as nx @@ -8,7 +9,6 @@ import pandas as pd import matplotlib.pyplot as plt import tiktoken import nltk -from posthog import Posthog from cognee.base_config import get_base_config from cognee.infrastructure.databases.graph import get_graph_engine @@ -16,6 +16,9 @@ from cognee.infrastructure.databases.graph import get_graph_engine from uuid import uuid4 import pathlib +# Analytics Proxy Url, currently hosted by Vercel +vercel_url = "https://proxyanalytics.vercel.app" + def get_anonymous_id(): """Creates or reads a anonymous user id""" home_dir = str(pathlib.Path(pathlib.Path(__file__).parent.parent.parent.resolve())) @@ -40,25 +43,23 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}): if env in ["test", "dev"]: return - posthog = Posthog( - project_api_key = "phc_UB1YVere1KtJg1MFxAo6ABfpkwN3OxCvGNDkMTjvH0", - host = "https://eu.i.posthog.com" - ) - current_time = datetime.now(timezone.utc) - properties = { - "time": current_time.strftime("%m/%d/%Y"), - "user_id": user_id, - **additional_properties, + payload = { + "anonymous_id": str(get_anonymous_id()), + "event_name": event_name, + "user_properties": { + "user_id": str(user_id), + }, + "properties": { + "time": current_time.strftime("%m/%d/%Y"), + **additional_properties + }, } - # Needed to forward properties to PostHog along with id - posthog.identify(get_anonymous_id(), properties) + response = requests.post(vercel_url, json=payload) - try: - posthog.capture(get_anonymous_id(), event_name, properties) - except Exception as e: - print("ERROR sending telemetric data to Posthog. See exception: %s", e) + if response.status_code != 200: + print(f"Error sending telemetry through proxy: {response.status_code}") def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" From 2703215dec6a292f6f38e38b7bf561c3287411b4 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 15 Nov 2024 15:20:41 +0100 Subject: [PATCH 3/6] refactor: Add user_id to event properties Adding user_id to event properties allows tracking of which user started the event Refactor COG-597 --- cognee/shared/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index d05782ea3..a1792a2ed 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -52,6 +52,7 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}): }, "properties": { "time": current_time.strftime("%m/%d/%Y"), + "user_id": str(user_id), **additional_properties }, } From a63490b9162128aab36618df4329af7a8978213f Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 16 Nov 2024 14:01:28 +0100 Subject: [PATCH 4/6] Bump release version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5f0347ee0..f23958978 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cognee" -version = "0.1.18" +version = "0.1.19" description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." authors = ["Vasilije Markovic", "Boris Arzentar"] readme = "README.md" From d30adb53f38634e4feb1ff5e0bee4e1b8c92b24d Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Sun, 17 Nov 2024 11:47:08 +0100 Subject: [PATCH 5/6] Cog 337 llama index support (#186) * feat: Add support for LlamaIndex Document type Added support for LlamaIndex Document type Feature #COG-337 * docs: Add Jupyer Notebook for cognee with llama index document type Added jupyter notebook which demonstrates cognee with LlamaIndex document type usage Docs #COG-337 * feat: Add metadata migration from LlamaIndex document type Allow usage of metadata from LlamaIndex documents Feature #COG-337 * refactor: Change llama index migration function name Change name of llama index function Refactor #COG-337 * chore: Add llama index core dependency Downgrade needed on tenacity and instructor modules to support llama index Chore #COG-337 * Feature: Add ingest_data_with_metadata task Added task that will have access to metadata if data is provided from different data ingestion tools Feature #COG-337 * docs: Add description on why specific type checking is done Explained why specific type checking is used instead of isinstance, as isinstace returns True for child classes as well Docs #COG-337 * fix: Add missing parameter to function call Added missing parameter to function call Fix #COG-337 * refactor: Move storing of data from async to sync function Moved data storing from async to sync Refactor #COG-337 * refactor: Pretend ingest_data was changes instead of having two tasks Refactor so ingest_data file was modified instead of having two ingest tasks Refactor #COG-337 * refactor: Use old name for data ingestion with metadata Merged new and old data ingestion tasks into one Refactor #COG-337 * refactor: Return ingest_data and save_data_to_storage Tasks Returned ingest_data and save_data_to_storage tasks Refactor #COG-337 * refactor: Return previous ingestion Tasks to add function Returned previous ignestion tasks to add function Refactor #COG-337 * fix: Remove dict and use string for search query Remove dictionary and use string for query in notebook and simple example Fix COG-337 * refactor: Add changes request in pull request Added the following changes that were requested in pull request: Added synchronize label, Made uniform syntax in if statement in workflow, fixed instructor dependency, added llama-index to be optional Refactor COG-337 * fix: Resolve issue with llama-index being mandatory Resolve issue with llama-index being mandatory to run cognee Fix COG-337 * fix: Add install of llama-index to notebook Removed additional references to llama-index from core cognee lib. Added llama-index-core install from notebook Fix COG-337 --------- --- .DS_Store | Bin 6148 -> 0 bytes .../test_cognee_llama_index_notebook.yml | 63 +++++ cognee/.DS_Store | Bin 6148 -> 0 bytes cognee/api/v1/add/add_v2.py | 2 +- .../methods/check_permission_on_documents.py | 2 +- cognee/tasks/ingestion/__init__.py | 2 + cognee/tasks/ingestion/ingest_data.py | 2 +- .../ingestion/ingest_data_with_metadata.py | 92 +++++++ .../ingestion/save_data_item_to_storage.py | 20 ++ ...save_data_item_with_metadata_to_storage.py | 28 +++ .../tasks/ingestion/save_data_to_storage.py | 18 +- cognee/tasks/ingestion/transform_data.py | 18 ++ examples/python/simple_example.py | 3 +- notebooks/cognee_llama_index.ipynb | 229 ++++++++++++++++++ poetry.lock | 138 +++++++++-- pyproject.toml | 8 +- 16 files changed, 588 insertions(+), 37 deletions(-) delete mode 100644 .DS_Store create mode 100644 .github/workflows/test_cognee_llama_index_notebook.yml delete mode 100644 cognee/.DS_Store create mode 100644 cognee/tasks/ingestion/ingest_data_with_metadata.py create mode 100644 cognee/tasks/ingestion/save_data_item_to_storage.py create mode 100644 cognee/tasks/ingestion/save_data_item_with_metadata_to_storage.py create mode 100644 cognee/tasks/ingestion/transform_data.py create mode 100644 notebooks/cognee_llama_index.ipynb diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index c53aac86a09d129220b9f904fda3e88a43eb0059..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKISv9b477m)L_?wu z8Wo@dRDcRl0V?oC1+usf$6q{?M^OPP@E;1;{ZQbBHL(r!s{@0#0KfslZkT&70W1~( z*2Fdt5ts%Q7*x#`LxYZZ$-J7_1_oU;n-9%5Yj!B=Z^!w?(?x3_M=C%CUKQxaa$@y= z3IEXlzmmA30#x9y6wtw9wV30RvbJ_U&T4Ieui=(+hnr#U6bxRDfnJWWuyQ>0q{u5a Y$9_$01D%ez(}DaMFkNU=;MWQ~01mYkvj6}9 diff --git a/.github/workflows/test_cognee_llama_index_notebook.yml b/.github/workflows/test_cognee_llama_index_notebook.yml new file mode 100644 index 000000000..c46d0de0d --- /dev/null +++ b/.github/workflows/test_cognee_llama_index_notebook.yml @@ -0,0 +1,63 @@ +name: test | llama index notebook + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: [labeled, synchronize] + + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_notebook_test: + name: test + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && github.event.label.name == 'run-checks' + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Check out + uses: actions/checkout@master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11.x' + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: | + poetry install --no-interaction --all-extras --no-root + poetry add jupyter --no-interaction + + - name: Execute Jupyter Notebook + env: + ENV: 'dev' + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }} + GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }} + run: | + poetry run jupyter nbconvert \ + --to notebook \ + --execute notebooks/cognee_llama_index.ipynb \ + --output executed_notebook.ipynb \ + --ExecutePreprocessor.timeout=1200 \ No newline at end of file diff --git a/cognee/.DS_Store b/cognee/.DS_Store deleted file mode 100644 index 51227d71a94e52683c4d00d01ac912a7dfa75f3b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKI|>3p3{CuiU}I@HSMUad=n3`$7K)81_^Y?_TprDrPoXS!S|~4&yqQeiEc=Sh zMnrUeSOjyq1*=D6P-_S;VFyM%Fva+Zyp5TE&WLZbpy zfC^9nDnJE3tw46z(fHFB^FAs-1%6%u`#u!7VNGlU{nLTqBLHxKv>VnwO8|={fHkoV zL$0gG>bW2DSK<@<*e5h_zG?{ceokWPC@W?4D@!4jkV*c7e!sMHO^~d U8|ZZ8oet#BfayY`0^e5P0W;M1& diff --git a/cognee/api/v1/add/add_v2.py b/cognee/api/v1/add/add_v2.py index 4d43dd652..9d6e33012 100644 --- a/cognee/api/v1/add/add_v2.py +++ b/cognee/api/v1/add/add_v2.py @@ -21,4 +21,4 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam pipeline = run_tasks(tasks, data, "add_pipeline") async for result in pipeline: - print(result) + print(result) \ No newline at end of file diff --git a/cognee/modules/users/permissions/methods/check_permission_on_documents.py b/cognee/modules/users/permissions/methods/check_permission_on_documents.py index e3f5d171e..c8c283e4a 100644 --- a/cognee/modules/users/permissions/methods/check_permission_on_documents.py +++ b/cognee/modules/users/permissions/methods/check_permission_on_documents.py @@ -33,4 +33,4 @@ async def check_permission_on_documents(user: User, permission_type: str, docume has_permissions = all(document_id in resource_ids for document_id in document_ids) if not has_permissions: - raise PermissionDeniedException(f"User {user.username} does not have {permission_type} permission on documents") + raise PermissionDeniedException(f"User {user.email} does not have {permission_type} permission on documents") diff --git a/cognee/tasks/ingestion/__init__.py b/cognee/tasks/ingestion/__init__.py index cc36e4ebe..56cab2756 100644 --- a/cognee/tasks/ingestion/__init__.py +++ b/cognee/tasks/ingestion/__init__.py @@ -1,2 +1,4 @@ from .ingest_data import ingest_data from .save_data_to_storage import save_data_to_storage +from .save_data_item_to_storage import save_data_item_to_storage +from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 2c0eba4e8..cb4b54598 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -3,7 +3,7 @@ import cognee.modules.ingestion as ingestion from cognee.shared.utils import send_telemetry from cognee.modules.users.models import User -from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine +from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.data.methods import create_dataset from cognee.modules.users.permissions.methods import give_permission_on_document from .get_dlt_destination import get_dlt_destination diff --git a/cognee/tasks/ingestion/ingest_data_with_metadata.py b/cognee/tasks/ingestion/ingest_data_with_metadata.py new file mode 100644 index 000000000..e5a50c13b --- /dev/null +++ b/cognee/tasks/ingestion/ingest_data_with_metadata.py @@ -0,0 +1,92 @@ +import dlt +import cognee.modules.ingestion as ingestion +from typing import Any +from cognee.shared.utils import send_telemetry +from cognee.modules.users.models import User +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.data.methods import create_dataset +from cognee.modules.users.permissions.methods import give_permission_on_document +from .get_dlt_destination import get_dlt_destination +from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage + +async def ingest_data_with_metadata(data: Any, dataset_name: str, user: User): + destination = get_dlt_destination() + + pipeline = dlt.pipeline( + pipeline_name = "file_load_from_filesystem", + destination = destination, + ) + + @dlt.resource(standalone = True, merge_key = "id") + async def data_resources(data: Any, user: User): + if not isinstance(data, list): + # Convert data to a list as we work with lists further down. + data = [data] + + # Process data + for data_item in data: + + file_path = save_data_item_with_metadata_to_storage(data_item, dataset_name) + + # Ingest data and add metadata + with open(file_path.replace("file://", ""), mode = "rb") as file: + classified_data = ingestion.classify(file) + + data_id = ingestion.identify(classified_data) + + file_metadata = classified_data.get_metadata() + + from sqlalchemy import select + from cognee.modules.data.models import Data + + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + dataset = await create_dataset(dataset_name, user.id, session) + + data_point = (await session.execute( + select(Data).filter(Data.id == data_id) + )).scalar_one_or_none() + + if data_point is not None: + data_point.name = file_metadata["name"] + data_point.raw_data_location = file_metadata["file_path"] + data_point.extension = file_metadata["extension"] + data_point.mime_type = file_metadata["mime_type"] + + await session.merge(data_point) + await session.commit() + else: + data_point = Data( + id = data_id, + name = file_metadata["name"], + raw_data_location = file_metadata["file_path"], + extension = file_metadata["extension"], + mime_type = file_metadata["mime_type"], + ) + + dataset.data.append(data_point) + await session.commit() + + yield { + "id": data_id, + "name": file_metadata["name"], + "file_path": file_metadata["file_path"], + "extension": file_metadata["extension"], + "mime_type": file_metadata["mime_type"], + } + + await give_permission_on_document(user, data_id, "read") + await give_permission_on_document(user, data_id, "write") + + + send_telemetry("cognee.add EXECUTION STARTED", user_id = user.id) + run_info = pipeline.run( + data_resources(data, user), + table_name = "file_metadata", + dataset_name = dataset_name, + write_disposition = "merge", + ) + send_telemetry("cognee.add EXECUTION COMPLETED", user_id = user.id) + + return run_info diff --git a/cognee/tasks/ingestion/save_data_item_to_storage.py b/cognee/tasks/ingestion/save_data_item_to_storage.py new file mode 100644 index 000000000..4782f271f --- /dev/null +++ b/cognee/tasks/ingestion/save_data_item_to_storage.py @@ -0,0 +1,20 @@ +from typing import Union, BinaryIO +from cognee.modules.ingestion import save_data_to_file + +def save_data_item_to_storage(data_item: Union[BinaryIO, str], dataset_name: str) -> str: + + # data is a file object coming from upload. + if hasattr(data_item, "file"): + file_path = save_data_to_file(data_item.file, dataset_name, filename=data_item.filename) + + elif isinstance(data_item, str): + # data is a file path + if data_item.startswith("file://") or data_item.startswith("/"): + file_path = data_item.replace("file://", "") + # data is text + else: + file_path = save_data_to_file(data_item, dataset_name) + else: + raise ValueError(f"Data type not supported: {type(data_item)}") + + return file_path \ No newline at end of file diff --git a/cognee/tasks/ingestion/save_data_item_with_metadata_to_storage.py b/cognee/tasks/ingestion/save_data_item_with_metadata_to_storage.py new file mode 100644 index 000000000..ec29edb89 --- /dev/null +++ b/cognee/tasks/ingestion/save_data_item_with_metadata_to_storage.py @@ -0,0 +1,28 @@ +from typing import Union, BinaryIO, Any +from cognee.modules.ingestion import save_data_to_file + +def save_data_item_with_metadata_to_storage(data_item: Union[BinaryIO, str, Any], dataset_name: str) -> str: + # Dynamic import is used because the llama_index module is optional. + # For the same reason Any is accepted as a data item + from llama_index.core import Document + from .transform_data import get_data_from_llama_index + + # Check if data is of type Document or any of it's subclasses + if isinstance(data_item, Document): + file_path = get_data_from_llama_index(data_item, dataset_name) + + # data is a file object coming from upload. + elif hasattr(data_item, "file"): + file_path = save_data_to_file(data_item.file, dataset_name, filename=data_item.filename) + + elif isinstance(data_item, str): + # data is a file path + if data_item.startswith("file://") or data_item.startswith("/"): + file_path = data_item.replace("file://", "") + # data is text + else: + file_path = save_data_to_file(data_item, dataset_name) + else: + raise ValueError(f"Data type not supported: {type(data_item)}") + + return file_path \ No newline at end of file diff --git a/cognee/tasks/ingestion/save_data_to_storage.py b/cognee/tasks/ingestion/save_data_to_storage.py index f646db52e..85eb81582 100644 --- a/cognee/tasks/ingestion/save_data_to_storage.py +++ b/cognee/tasks/ingestion/save_data_to_storage.py @@ -1,5 +1,5 @@ from typing import Union, BinaryIO -from cognee.modules.ingestion import save_data_to_file +from cognee.tasks.ingestion.save_data_item_to_storage import save_data_item_to_storage def save_data_to_storage(data: Union[BinaryIO, str], dataset_name) -> list[str]: if not isinstance(data, list): @@ -9,19 +9,7 @@ def save_data_to_storage(data: Union[BinaryIO, str], dataset_name) -> list[str]: file_paths = [] for data_item in data: - # data is a file object coming from upload. - if hasattr(data_item, "file"): - file_path = save_data_to_file(data_item.file, dataset_name, filename = data_item.filename) - file_paths.append(file_path) - - if isinstance(data_item, str): - # data is a file path - if data_item.startswith("file://") or data_item.startswith("/"): - file_paths.append(data_item.replace("file://", "")) - - # data is text - else: - file_path = save_data_to_file(data_item, dataset_name) - file_paths.append(file_path) + file_path = save_data_item_to_storage(data_item, dataset_name) + file_paths.append(file_path) return file_paths diff --git a/cognee/tasks/ingestion/transform_data.py b/cognee/tasks/ingestion/transform_data.py new file mode 100644 index 000000000..c2ea86c47 --- /dev/null +++ b/cognee/tasks/ingestion/transform_data.py @@ -0,0 +1,18 @@ +from llama_index.core import Document +from llama_index.core.schema import ImageDocument +from cognee.modules.ingestion import save_data_to_file +from typing import Union + +def get_data_from_llama_index(data_point: Union[Document, ImageDocument], dataset_name: str) -> str: + # Specific type checking is used to ensure it's not a child class from Document + if type(data_point) == Document: + file_path = data_point.metadata.get("file_path") + if file_path is None: + file_path = save_data_to_file(data_point.text, dataset_name) + return file_path + return file_path + elif type(data_point) == ImageDocument: + if data_point.image_path is None: + file_path = save_data_to_file(data_point.text, dataset_name) + return file_path + return data_point.image_path \ No newline at end of file diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index 9b64142ed..4e0e61834 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -27,8 +27,7 @@ async def main(): # Query cognee for insights on the added text search_results = await cognee.search( - SearchType.INSIGHTS, - {'query': 'Tell me about NLP'} + SearchType.INSIGHTS, query='Tell me about NLP' ) # Display search results diff --git a/notebooks/cognee_llama_index.ipynb b/notebooks/cognee_llama_index.ipynb new file mode 100644 index 000000000..15e17163d --- /dev/null +++ b/notebooks/cognee_llama_index.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cognee GraphRAG with LlamaIndex Documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install llama-index-core" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data\n", + "\n", + "We will use a sample news article dataset retrieved from Diffbot, which Tomaz has conveniently made available on GitHub for easy access.\n", + "\n", + "The dataset contains 2,500 samples; for ease of experimentation, we will use 5 of these samples, which include the `title` and `text` of news articles." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from llama_index.core import Document\n", + "\n", + "news = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv\"\n", + ")[:5]\n", + "\n", + "news.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare documents as required by LlamaIndex" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "documents = [\n", + " Document(text=f\"{row['title']}: {row['text']}\")\n", + " for i, row in news.iterrows()\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Setting environment variables\n", + "if \"GRAPHISTRY_USERNAME\" not in os.environ: \n", + " os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n", + "\n", + "if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n", + " os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n", + "\n", + "if \"LLM_API_KEY\" not in os.environ:\n", + " os.environ[\"LLM_API_KEY\"] = \"\"\n", + "\n", + "# \"neo4j\" or \"networkx\"\n", + "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" \n", + "# Not needed if using networkx\n", + "#GRAPH_DATABASE_URL=\"\"\n", + "#GRAPH_DATABASE_USERNAME=\"\"\n", + "#GRAPH_DATABASE_PASSWORD=\"\"\n", + "\n", + "# \"qdrant\", \"weaviate\" or \"lancedb\"\n", + "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" \n", + "# Not needed if using \"lancedb\"\n", + "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", + "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", + "\n", + "# Database provider\n", + "os.environ[\"DB_PROVIDER\"]=\"sqlite\" # or \"postgres\"\n", + "\n", + "# Database name\n", + "os.environ[\"DB_NAME\"]=\"cognee_db\"\n", + "\n", + "# Postgres specific parameters (Only if Postgres is run)\n", + "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n", + "# os.environ[\"DB_PORT\"]=\"5432\"\n", + "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n", + "# os.environ[\"DB_PASSWORD\"]=\"cognee\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run Cognee with LlamaIndex Documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Union, BinaryIO\n", + "\n", + "from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables\n", + "from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables\n", + "from cognee.infrastructure.databases.graph import get_graph_engine\n", + "from cognee.shared.utils import render_graph\n", + "from cognee.modules.users.models import User\n", + "from cognee.modules.users.methods import get_default_user\n", + "from cognee.tasks.ingestion.ingest_data_with_metadata import ingest_data_with_metadata\n", + "import cognee\n", + "\n", + "# Create a clean slate for cognee -- reset data and system state\n", + "await cognee.prune.prune_data()\n", + "await cognee.prune.prune_system(metadata=True)\n", + "\n", + "# Add the LlamaIndex documents, and make it available for cognify\n", + "async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_name: str = \"main_dataset\", user: User = None):\n", + " await create_relational_db_and_tables()\n", + " await create_pgvector_db_and_tables()\n", + "\n", + " if user is None:\n", + " user = await get_default_user()\n", + "\n", + " await ingest_data_with_metadata(data, dataset_name, user)\n", + "\n", + "await add(documents)\n", + "\n", + "# Use LLMs and cognee to create knowledge graph\n", + "await cognee.cognify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query Cognee for summaries related to data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cognee import SearchType\n", + "\n", + "# Query cognee for summaries\n", + "search_results = await cognee.search(\n", + " SearchType.SUMMARIES, query=\"What are the main news discussed in the document?\"\n", + ")\n", + "# Display search results\n", + "print(\"\\n Summary of main news discussed:\\n\")\n", + "print(search_results[0][\"text\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Render Knowledge Graph generated from provided data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import graphistry\n", + "\n", + "# Get graph\n", + "graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n", + "graph_engine = await get_graph_engine()\n", + "\n", + "graph_url = await render_graph(graph_engine.graph)\n", + "print(graph_url)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/poetry.lock b/poetry.lock index 81cf27717..93eb4a4d1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1125,6 +1125,21 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "dataclasses-json" +version = "0.6.7" +description = "Easily serialize dataclasses to and from JSON." +optional = true +python-versions = "<4.0,>=3.7" +files = [ + {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, + {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "datasets" version = "3.1.0" @@ -1220,6 +1235,23 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + [[package]] name = "deprecation" version = "2.1.0" @@ -1275,6 +1307,17 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "dirtyjson" +version = "1.0.8" +description = "JSON decoder for Python that can extract data from the muck" +optional = true +python-versions = "*" +files = [ + {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, + {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -2396,37 +2439,35 @@ files = [ [[package]] name = "instructor" -version = "1.6.3" +version = "1.5.2" description = "structured outputs for llm" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "instructor-1.6.3-py3-none-any.whl", hash = "sha256:a8f973fea621c0188009b65a3429a526c24aeb249fc24100b605ea496e92d622"}, - {file = "instructor-1.6.3.tar.gz", hash = "sha256:399cd90e30b5bc7cbd47acd7399c9c4e84926a96c20c8b5d00c5a04b41ed41ab"}, + {file = "instructor-1.5.2-py3-none-any.whl", hash = "sha256:da25abbf1ab792fb094992f1d9ce593e26fe458cb1f9a8e7ebf9d68f3f2c757a"}, + {file = "instructor-1.5.2.tar.gz", hash = "sha256:fdd5ccbca21b4c558a24e9ba12c84afd907e65153a39d035f47c25800011a977"}, ] [package.dependencies] aiohttp = ">=3.9.1,<4.0.0" docstring-parser = ">=0.16,<0.17" -jinja2 = ">=3.1.4,<4.0.0" jiter = ">=0.5.0,<0.6.0" -openai = ">=1.52.0,<2.0.0" +openai = ">=1.45.0,<2.0.0" pydantic = ">=2.8.0,<3.0.0" pydantic-core = ">=2.18.0,<3.0.0" rich = ">=13.7.0,<14.0.0" -tenacity = ">=9.0.0,<10.0.0" +tenacity = ">=8.4.1,<9.0.0" typer = ">=0.9.0,<1.0.0" [package.extras] -anthropic = ["anthropic (>=0.36.2,<0.37.0)", "xmltodict (>=0.13.0,<0.14.0)"] +anthropic = ["anthropic (>=0.34.0,<0.35.0)", "xmltodict (>=0.13.0,<0.14.0)"] cerebras-cloud-sdk = ["cerebras_cloud_sdk (>=1.5.0,<2.0.0)"] cohere = ["cohere (>=5.1.8,<6.0.0)"] -fireworks-ai = ["fireworks-ai (>=0.15.4,<0.16.0)"] google-generativeai = ["google-generativeai (>=0.8.2,<0.9.0)"] groq = ["groq (>=0.4.2,<0.5.0)"] litellm = ["litellm (>=1.35.31,<2.0.0)"] mistralai = ["mistralai (>=1.0.3,<2.0.0)"] -test-docs = ["anthropic (>=0.36.2,<0.37.0)", "cohere (>=5.1.8,<6.0.0)", "diskcache (>=5.6.3,<6.0.0)", "fastapi (>=0.109.2,<0.110.0)", "groq (>=0.4.2,<0.5.0)", "litellm (>=1.35.31,<2.0.0)", "mistralai (>=1.0.3,<2.0.0)", "pandas (>=2.2.0,<3.0.0)", "pydantic_extra_types (>=2.6.0,<3.0.0)", "redis (>=5.0.1,<6.0.0)", "tabulate (>=0.9.0,<0.10.0)"] +test-docs = ["anthropic (>=0.34.0,<0.35.0)", "cohere (>=5.1.8,<6.0.0)", "diskcache (>=5.6.3,<6.0.0)", "fastapi (>=0.109.2,<0.110.0)", "groq (>=0.4.2,<0.5.0)", "litellm (>=1.35.31,<2.0.0)", "mistralai (>=1.0.3,<2.0.0)", "pandas (>=2.2.0,<3.0.0)", "pydantic_extra_types (>=2.6.0,<3.0.0)", "redis (>=5.0.1,<6.0.0)", "tabulate (>=0.9.0,<0.10.0)"] vertexai = ["google-cloud-aiplatform (>=1.53.0,<2.0.0)", "jsonref (>=1.1.0,<2.0.0)"] [[package]] @@ -3246,6 +3287,40 @@ tokenizers = "*" extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"] proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"] +[[package]] +name = "llama-index-core" +version = "0.11.22" +description = "Interface between LLMs and your data" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_core-0.11.22-py3-none-any.whl", hash = "sha256:5c59d95dec9bb0727f25b03de89392c69076b2e4aaa6acbd8773de1f07502e9e"}, + {file = "llama_index_core-0.11.22.tar.gz", hash = "sha256:ddc30b9c873495de40ad8278d0c894ba09f32f6aa7fc638012b1b22b74c32553"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">3.8.1" +numpy = "<2.0.0" +pillow = ">=9.0.0" +pydantic = ">=2.7.0,<3.0.0" +PyYAML = ">=6.0.1" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<8.4.0 || >8.4.0,<9.0.0" +tiktoken = ">=0.3.3" +tqdm = ">=4.66.1,<5.0.0" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" +wrapt = "*" + [[package]] name = "makefun" version = "1.15.6" @@ -3388,6 +3463,25 @@ files = [ {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] +[[package]] +name = "marshmallow" +version = "3.23.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = true +python-versions = ">=3.9" +files = [ + {file = "marshmallow-3.23.1-py3-none-any.whl", hash = "sha256:fece2eb2c941180ea1b7fcbd4a83c51bfdd50093fdd3ad2585ee5e1df2508491"}, + {file = "marshmallow-3.23.1.tar.gz", hash = "sha256:3a8dfda6edd8dcdbf216c0ede1d1e78d230a6dc9c5a088f58c4083b974a0d468"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"] +docs = ["alabaster (==1.0.0)", "autodocsumm (==0.2.14)", "sphinx (==8.1.3)", "sphinx-issues (==5.0.0)", "sphinx-version-warning (==1.1.2)"] +tests = ["pytest", "simplejson"] + [[package]] name = "matplotlib" version = "3.9.2" @@ -4885,7 +4979,7 @@ test = ["pytest", "pytest-xdist", "setuptools"] name = "psycopg2" version = "2.9.10" description = "psycopg2 - Python-PostgreSQL Database Adapter" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716"}, @@ -6551,13 +6645,13 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7 [[package]] name = "tenacity" -version = "9.0.0" +version = "8.5.0" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" files = [ - {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, - {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, + {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, + {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"}, ] [package.extras] @@ -6946,6 +7040,21 @@ files = [ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = true +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "tzdata" version = "2024.2" @@ -7513,6 +7622,7 @@ type = ["pytest-mypy"] [extras] cli = [] filesystem = ["botocore"] +llama-index = ["llama-index-core"] neo4j = ["neo4j"] notebook = [] postgres = ["asyncpg", "pgvector", "psycopg2"] @@ -7522,4 +7632,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "57a154a7bbdd990e0fbe2313fa24c412dad98e47b9cd05e41bf378a3f597713f" +content-hash = "f5874af8364839dd2a362b6b3209c4aae108f30dcc27be43d0d07f7b28160eda" diff --git a/pyproject.toml b/pyproject.toml index f23958978..0bab3f615 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ boto3 = "^1.26.125" botocore="^1.35.54" gunicorn = "^20.1.0" sqlalchemy = "2.0.35" -instructor = "1.6.3" +instructor = "1.5.2" networkx = "^3.2.1" aiosqlite = "^0.20.0" pandas = "2.0.3" @@ -45,7 +45,7 @@ dlt = {extras = ["sqlalchemy"], version = "^1.3.0"} aiofiles = "^23.2.1" qdrant-client = "^1.9.0" graphistry = "^0.33.5" -tenacity = "^9.0.0" +tenacity = "^8.4.1" weaviate-client = "4.6.7" scikit-learn = "^1.5.0" pypdf = "^4.1.0" @@ -68,7 +68,8 @@ fastapi-users = {version = "*", extras = ["sqlalchemy"]} alembic = "^1.13.3" asyncpg = "^0.29.0" pgvector = "^0.3.5" -psycopg2 = "^2.9.10" +psycopg2 = {version = "^2.9.10", optional = true} +llama-index-core = {version = "^0.11.22", optional = true} [tool.poetry.extras] filesystem = ["s3fs", "botocore"] @@ -78,6 +79,7 @@ qdrant = ["qdrant-client"] neo4j = ["neo4j"] postgres = ["psycopg2", "pgvector", "asyncpg"] notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] +llama-index = ["llama-index-core"] [tool.poetry.group.dev.dependencies] From d8b6eeded5a21b3d63d7c992c08607d61ea366d4 Mon Sep 17 00:00:00 2001 From: Boris Date: Sun, 17 Nov 2024 11:59:10 +0100 Subject: [PATCH 6/6] feat: log search queries and results (#166) * feat: log search queries and results * fix: address coderabbit review comments * fix: parse UUID when logging search results * fix: remove custom UUID type and use DB agnostic UUID from sqlalchemy * Add new cognee_db --------- Co-authored-by: Leon Luithlen --- .../src/modules/chat/getHistory.ts | 8 ++++ .../src/ui/Partials/SearchView/SearchView.tsx | 15 +++++- cognee/__init__.py | 2 +- cognee/api/v1/search/__init__.py | 1 + cognee/api/v1/search/get_search_history.py | 9 ++++ .../v1/search/routers/get_search_router.py | 31 ++++++++++-- cognee/api/v1/search/search_v2.py | 17 +++++-- .../databases/relational/__init__.py | 3 -- .../databases/relational/data_types/UUID.py | 45 ------------------ .../pipeline/models/Operation.py | 4 +- cognee/modules/data/models/Data.py | 4 +- cognee/modules/data/models/Dataset.py | 4 +- cognee/modules/data/models/DatasetData.py | 4 +- cognee/modules/pipelines/models/Pipeline.py | 5 +- .../modules/pipelines/models/PipelineRun.py | 4 +- .../modules/pipelines/models/PipelineTask.py | 4 +- cognee/modules/search/models/Query.py | 16 +++++++ cognee/modules/search/models/Result.py | 16 +++++++ cognee/modules/search/operations/__init__.py | 3 ++ .../modules/search/operations/get_history.py | 31 ++++++++++++ .../modules/search/operations/get_queries.py | 17 +++++++ .../modules/search/operations/get_results.py | 17 +++++++ cognee/modules/search/operations/log_query.py | 19 ++++++++ .../modules/search/operations/log_result.py | 15 ++++++ cognee/modules/users/models/ACL.py | 4 +- cognee/modules/users/models/ACLResources.py | 4 +- cognee/modules/users/models/Group.py | 3 +- cognee/modules/users/models/Permission.py | 4 +- cognee/modules/users/models/Principal.py | 4 +- cognee/modules/users/models/Resource.py | 4 +- cognee/modules/users/models/User.py | 5 +- cognee/modules/users/models/UserGroup.py | 4 +- .../integration/run_toy_tasks/data/cognee_db | Bin 139264 -> 159744 bytes cognee/tests/test_library.py | 9 ++-- cognee/tests/test_neo4j.py | 9 ++-- cognee/tests/test_pgvector.py | 9 ++-- cognee/tests/test_qdrant.py | 9 ++-- cognee/tests/test_weaviate.py | 9 ++-- notebooks/cognee_demo.ipynb | 6 +-- 39 files changed, 267 insertions(+), 110 deletions(-) create mode 100644 cognee-frontend/src/modules/chat/getHistory.ts create mode 100644 cognee/api/v1/search/get_search_history.py delete mode 100644 cognee/infrastructure/databases/relational/data_types/UUID.py create mode 100644 cognee/modules/search/models/Query.py create mode 100644 cognee/modules/search/models/Result.py create mode 100644 cognee/modules/search/operations/__init__.py create mode 100644 cognee/modules/search/operations/get_history.py create mode 100644 cognee/modules/search/operations/get_queries.py create mode 100644 cognee/modules/search/operations/get_results.py create mode 100644 cognee/modules/search/operations/log_query.py create mode 100644 cognee/modules/search/operations/log_result.py diff --git a/cognee-frontend/src/modules/chat/getHistory.ts b/cognee-frontend/src/modules/chat/getHistory.ts new file mode 100644 index 000000000..dce914da7 --- /dev/null +++ b/cognee-frontend/src/modules/chat/getHistory.ts @@ -0,0 +1,8 @@ +import { fetch } from '@/utils'; + +export default function getHistory() { + return fetch( + '/v1/search', + ) + .then((response) => response.json()); +} diff --git a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx index b20beb5b9..b4fa07777 100644 --- a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx +++ b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx @@ -1,9 +1,12 @@ +'use client'; + import { v4 } from 'uuid'; import classNames from 'classnames'; -import { useCallback, useState } from 'react'; +import { useCallback, useEffect, useState } from 'react'; import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean } from 'ohmy-ui'; import { fetch } from '@/utils'; import styles from './SearchView.module.css'; +import getHistory from '@/modules/chat/getHistory'; interface Message { id: string; @@ -52,6 +55,14 @@ export default function SearchView() { }, 300); }, []); + useEffect(() => { + getHistory() + .then((history) => { + setMessages(history); + scrollToBottom(); + }); + }, [scrollToBottom]); + const handleSearchSubmit = useCallback((event: React.FormEvent) => { event.preventDefault(); @@ -78,7 +89,7 @@ export default function SearchView() { 'Content-Type': 'application/json', }, body: JSON.stringify({ - query: inputValue, + query: inputValue.trim(), searchType: searchTypeValue, }), }) diff --git a/cognee/__init__.py b/cognee/__init__.py index aca7f5d4f..e89ef1dc3 100644 --- a/cognee/__init__.py +++ b/cognee/__init__.py @@ -2,7 +2,7 @@ from .api.v1.config.config import config from .api.v1.add import add from .api.v1.cognify import cognify from .api.v1.datasets.datasets import datasets -from .api.v1.search import search, SearchType +from .api.v1.search import search, SearchType, get_search_history from .api.v1.prune import prune # Pipelines diff --git a/cognee/api/v1/search/__init__.py b/cognee/api/v1/search/__init__.py index f01dcd63e..91cf35c88 100644 --- a/cognee/api/v1/search/__init__.py +++ b/cognee/api/v1/search/__init__.py @@ -1 +1,2 @@ from .search_v2 import search, SearchType +from .get_search_history import get_search_history diff --git a/cognee/api/v1/search/get_search_history.py b/cognee/api/v1/search/get_search_history.py new file mode 100644 index 000000000..fada67c85 --- /dev/null +++ b/cognee/api/v1/search/get_search_history.py @@ -0,0 +1,9 @@ +from cognee.modules.search.operations import get_history +from cognee.modules.users.methods import get_default_user +from cognee.modules.users.models import User + +async def get_search_history(user: User = None) -> list: + if not user: + user = await get_default_user() + + return await get_history(user.id) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 5df49635f..893067c20 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -1,8 +1,11 @@ -from cognee.api.v1.search import SearchType -from fastapi.responses import JSONResponse -from cognee.modules.users.models import User +from uuid import UUID +from datetime import datetime from fastapi import Depends, APIRouter -from cognee.api.DTO import InDTO +from fastapi.responses import JSONResponse +from cognee.api.v1.search import SearchType +from cognee.api.DTO import InDTO, OutDTO +from cognee.modules.users.models import User +from cognee.modules.search.operations import get_history from cognee.modules.users.methods import get_authenticated_user @@ -13,6 +16,24 @@ class SearchPayloadDTO(InDTO): def get_search_router() -> APIRouter: router = APIRouter() + class SearchHistoryItem(OutDTO): + id: UUID + text: str + user: str + created_at: datetime + + @router.get("/", response_model = list[SearchHistoryItem]) + async def get_search_history(user: User = Depends(get_authenticated_user)): + try: + history = await get_history(user.id) + + return history + except Exception as error: + return JSONResponse( + status_code = 500, + content = {"error": str(error)} + ) + @router.post("/", response_model = list) async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)): """ This endpoint is responsible for searching for nodes in the graph.""" @@ -28,4 +49,4 @@ def get_search_router() -> APIRouter: content = {"error": str(error)} ) - return router \ No newline at end of file + return router diff --git a/cognee/api/v1/search/search_v2.py b/cognee/api/v1/search/search_v2.py index a82f14210..c1bc0ee4d 100644 --- a/cognee/api/v1/search/search_v2.py +++ b/cognee/api/v1/search/search_v2.py @@ -1,6 +1,9 @@ +import json from uuid import UUID from enum import Enum from typing import Callable, Dict +from cognee.modules.search.operations import log_query, log_result +from cognee.modules.storage.utils import JSONEncoder from cognee.shared.utils import send_telemetry from cognee.modules.users.models import User from cognee.modules.users.methods import get_default_user @@ -14,15 +17,17 @@ class SearchType(Enum): INSIGHTS = "INSIGHTS" CHUNKS = "CHUNKS" -async def search(search_type: SearchType, query: str, user: User = None) -> list: +async def search(query_type: SearchType, query_text: str, user: User = None) -> list: if user is None: user = await get_default_user() if user is None: raise PermissionError("No user found in the system. Please create a user.") + query = await log_query(query_text, str(query_type), user.id) + own_document_ids = await get_document_ids_for_user(user.id) - search_results = await specific_search(search_type, query, user) + search_results = await specific_search(query_type, query_text, user) filtered_search_results = [] @@ -33,19 +38,21 @@ async def search(search_type: SearchType, query: str, user: User = None) -> list if document_id is None or document_id in own_document_ids: filtered_search_results.append(search_result) + await log_result(query.id, json.dumps(filtered_search_results, cls = JSONEncoder), user.id) + return filtered_search_results -async def specific_search(search_type: SearchType, query: str, user) -> list: +async def specific_search(query_type: SearchType, query: str, user) -> list: search_tasks: Dict[SearchType, Callable] = { SearchType.SUMMARIES: query_summaries, SearchType.INSIGHTS: query_graph_connections, SearchType.CHUNKS: query_chunks, } - search_task = search_tasks.get(search_type) + search_task = search_tasks.get(query_type) if search_task is None: - raise ValueError(f"Unsupported search type: {search_type}") + raise ValueError(f"Unsupported search type: {query_type}") send_telemetry("cognee.search EXECUTION STARTED", user.id) diff --git a/cognee/infrastructure/databases/relational/__init__.py b/cognee/infrastructure/databases/relational/__init__.py index 1ef847903..09a4d669f 100644 --- a/cognee/infrastructure/databases/relational/__init__.py +++ b/cognee/infrastructure/databases/relational/__init__.py @@ -2,6 +2,3 @@ from .ModelBase import Base from .config import get_relational_config from .create_db_and_tables import create_db_and_tables from .get_relational_engine import get_relational_engine - -# Global data types -from .data_types.UUID import UUID diff --git a/cognee/infrastructure/databases/relational/data_types/UUID.py b/cognee/infrastructure/databases/relational/data_types/UUID.py deleted file mode 100644 index 722204b37..000000000 --- a/cognee/infrastructure/databases/relational/data_types/UUID.py +++ /dev/null @@ -1,45 +0,0 @@ -import uuid - -from sqlalchemy.types import TypeDecorator, BINARY -from sqlalchemy.dialects.postgresql import UUID as psqlUUID - -class UUID(TypeDecorator): - """Platform-independent GUID type. - - Uses Postgresql's UUID type, otherwise uses - BINARY(16), to store UUID. - - """ - impl = BINARY - - def load_dialect_impl(self, dialect): - if dialect.name == 'postgresql': - return dialect.type_descriptor(psqlUUID()) - else: - return dialect.type_descriptor(BINARY(16)) - - def process_bind_param(self, value, dialect): - if value is None: - return value - else: - if not isinstance(value, uuid.UUID): - if isinstance(value, bytes): - value = uuid.UUID(bytes = value) - elif isinstance(value, int): - value = uuid.UUID(int = value) - elif isinstance(value, str): - value = uuid.UUID(value) - if dialect.name == 'postgresql': - return str(value) - else: - return value.bytes - - def process_result_value(self, value, dialect): - if value is None: - return value - if dialect.name == 'postgresql': - if isinstance(value, uuid.UUID): - return value - return uuid.UUID(value) - else: - return uuid.UUID(bytes = value) diff --git a/cognee/infrastructure/pipeline/models/Operation.py b/cognee/infrastructure/pipeline/models/Operation.py index 1834c1a36..62eb74c44 100644 --- a/cognee/infrastructure/pipeline/models/Operation.py +++ b/cognee/infrastructure/pipeline/models/Operation.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from sqlalchemy.orm import Mapped, MappedColumn from sqlalchemy import Column, DateTime, ForeignKey, Enum, JSON from cognee.infrastructure.databases.relational import Base, UUID @@ -24,4 +24,4 @@ class Operation(Base): data_id = Column(UUID, ForeignKey("data.id")) meta_data: Mapped[dict] = MappedColumn(type_ = JSON) - created_at = Column(DateTime, default = datetime.utcnow) + created_at = Column(DateTime, default = datetime.now(timezone.utc)) diff --git a/cognee/modules/data/models/Data.py b/cognee/modules/data/models/Data.py index 064521539..2e9745600 100644 --- a/cognee/modules/data/models/Data.py +++ b/cognee/modules/data/models/Data.py @@ -2,8 +2,8 @@ from uuid import uuid4 from typing import List from datetime import datetime, timezone from sqlalchemy.orm import relationship, Mapped -from sqlalchemy import Column, String, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, String, DateTime, UUID +from cognee.infrastructure.databases.relational import Base from .DatasetData import DatasetData class Data(Base): diff --git a/cognee/modules/data/models/Dataset.py b/cognee/modules/data/models/Dataset.py index 5cf5d2351..f7078b8f1 100644 --- a/cognee/modules/data/models/Dataset.py +++ b/cognee/modules/data/models/Dataset.py @@ -2,8 +2,8 @@ from uuid import uuid4 from typing import List from datetime import datetime, timezone from sqlalchemy.orm import relationship, Mapped -from sqlalchemy import Column, Text, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, Text, DateTime, UUID +from cognee.infrastructure.databases.relational import Base from .DatasetData import DatasetData class Dataset(Base): diff --git a/cognee/modules/data/models/DatasetData.py b/cognee/modules/data/models/DatasetData.py index ed9d3c64c..a35c120eb 100644 --- a/cognee/modules/data/models/DatasetData.py +++ b/cognee/modules/data/models/DatasetData.py @@ -1,6 +1,6 @@ from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, ForeignKey -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, DateTime, ForeignKey, UUID +from cognee.infrastructure.databases.relational import Base class DatasetData(Base): __tablename__ = "dataset_data" diff --git a/cognee/modules/pipelines/models/Pipeline.py b/cognee/modules/pipelines/models/Pipeline.py index e9cad5945..f4d20bb0f 100644 --- a/cognee/modules/pipelines/models/Pipeline.py +++ b/cognee/modules/pipelines/models/Pipeline.py @@ -1,9 +1,10 @@ from uuid import uuid4 from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, String, Text +from sqlalchemy import Column, DateTime, String, Text, UUID from sqlalchemy.orm import relationship, Mapped -from cognee.infrastructure.databases.relational import Base, UUID +from cognee.infrastructure.databases.relational import Base from .PipelineTask import PipelineTask +from .Task import Task class Pipeline(Base): __tablename__ = "pipelines" diff --git a/cognee/modules/pipelines/models/PipelineRun.py b/cognee/modules/pipelines/models/PipelineRun.py index 5d5969b2c..ab3498efe 100644 --- a/cognee/modules/pipelines/models/PipelineRun.py +++ b/cognee/modules/pipelines/models/PipelineRun.py @@ -1,8 +1,8 @@ import enum from uuid import uuid4 from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, JSON, Enum -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, DateTime, JSON, Enum, UUID +from cognee.infrastructure.databases.relational import Base class PipelineRunStatus(enum.Enum): DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED" diff --git a/cognee/modules/pipelines/models/PipelineTask.py b/cognee/modules/pipelines/models/PipelineTask.py index acbf44e5d..c6c7eb5e9 100644 --- a/cognee/modules/pipelines/models/PipelineTask.py +++ b/cognee/modules/pipelines/models/PipelineTask.py @@ -1,6 +1,6 @@ from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, ForeignKey -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, DateTime, ForeignKey, UUID +from cognee.infrastructure.databases.relational import Base class PipelineTask(Base): __tablename__ = "pipeline_task" diff --git a/cognee/modules/search/models/Query.py b/cognee/modules/search/models/Query.py new file mode 100644 index 000000000..182196333 --- /dev/null +++ b/cognee/modules/search/models/Query.py @@ -0,0 +1,16 @@ +from uuid import uuid4 +from datetime import datetime, timezone +from sqlalchemy import Column, DateTime, String, UUID +from cognee.infrastructure.databases.relational import Base + +class Query(Base): + __tablename__ = "queries" + + id = Column(UUID, primary_key = True, default = uuid4) + + text = Column(String) + query_type = Column(String) + user_id = Column(UUID) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) diff --git a/cognee/modules/search/models/Result.py b/cognee/modules/search/models/Result.py new file mode 100644 index 000000000..acda59ddb --- /dev/null +++ b/cognee/modules/search/models/Result.py @@ -0,0 +1,16 @@ +from datetime import datetime, timezone +from uuid import uuid4 +from sqlalchemy import Column, DateTime, Text, UUID +from cognee.infrastructure.databases.relational import Base + +class Result(Base): + __tablename__ = "results" + + id = Column(UUID, primary_key = True, default = uuid4) + + value = Column(Text) + query_id = Column(UUID) + user_id = Column(UUID, index = True) + + created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc)) diff --git a/cognee/modules/search/operations/__init__.py b/cognee/modules/search/operations/__init__.py new file mode 100644 index 000000000..41d2a4e4a --- /dev/null +++ b/cognee/modules/search/operations/__init__.py @@ -0,0 +1,3 @@ +from .log_query import log_query +from .log_result import log_result +from .get_history import get_history diff --git a/cognee/modules/search/operations/get_history.py b/cognee/modules/search/operations/get_history.py new file mode 100644 index 000000000..831c4acc2 --- /dev/null +++ b/cognee/modules/search/operations/get_history.py @@ -0,0 +1,31 @@ +from uuid import UUID +from sqlalchemy import literal, select +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models.Query import Query +from ..models.Result import Result + +async def get_history(user_id: UUID, limit: int = 10) -> list[Result]: + db_engine = get_relational_engine() + + queries_query = select( + Query.id, + Query.text.label("text"), + Query.created_at, + literal("user").label("user") + ) \ + .filter(Query.user_id == user_id) + + results_query = select( + Result.id, + Result.value.label("text"), + Result.created_at, + literal("system").label("user") + ) \ + .filter(Result.user_id == user_id) + + history_query = queries_query.union(results_query).order_by("created_at").limit(limit) + + async with db_engine.get_async_session() as session: + history = (await session.execute(history_query)).all() + + return history diff --git a/cognee/modules/search/operations/get_queries.py b/cognee/modules/search/operations/get_queries.py new file mode 100644 index 000000000..ded10a8e5 --- /dev/null +++ b/cognee/modules/search/operations/get_queries.py @@ -0,0 +1,17 @@ +from uuid import UUID +from sqlalchemy import select +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models.Query import Query + +async def get_queries(user_id: UUID, limit: int) -> list[Query]: + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + queries = (await session.scalars( + select(Query) + .filter(Query.user_id == user_id) + .order_by(Query.created_at.desc()) + .limit(limit) + )).all() + + return queries diff --git a/cognee/modules/search/operations/get_results.py b/cognee/modules/search/operations/get_results.py new file mode 100644 index 000000000..7f90a3f0f --- /dev/null +++ b/cognee/modules/search/operations/get_results.py @@ -0,0 +1,17 @@ +from uuid import UUID +from sqlalchemy import select +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models.Result import Result + +async def get_results(user_id: UUID, limit: int = 10) -> list[Result]: + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + results = (await session.scalars( + select(Result) + .filter(Result.user_id == user_id) + .order_by(Result.created_at.desc()) + .limit(limit) + )).all() + + return results diff --git a/cognee/modules/search/operations/log_query.py b/cognee/modules/search/operations/log_query.py new file mode 100644 index 000000000..02ed3f157 --- /dev/null +++ b/cognee/modules/search/operations/log_query.py @@ -0,0 +1,19 @@ +from uuid import UUID +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models.Query import Query + +async def log_query(query_text: str, query_type: str, user_id: UUID) -> Query: + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + query = Query( + text = query_text, + query_type = query_type, + user_id = user_id, + ) + + session.add(query) + + await session.commit() + + return query diff --git a/cognee/modules/search/operations/log_result.py b/cognee/modules/search/operations/log_result.py new file mode 100644 index 000000000..b81e0b447 --- /dev/null +++ b/cognee/modules/search/operations/log_result.py @@ -0,0 +1,15 @@ +from uuid import UUID +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models.Result import Result + +async def log_result(query_id: UUID, result: str, user_id: UUID): + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + session.add(Result( + value = result, + query_id = query_id, + user_id = user_id, + )) + + await session.commit() diff --git a/cognee/modules/users/models/ACL.py b/cognee/modules/users/models/ACL.py index b01fe6010..f54d24224 100644 --- a/cognee/modules/users/models/ACL.py +++ b/cognee/modules/users/models/ACL.py @@ -1,8 +1,8 @@ from uuid import uuid4 from datetime import datetime, timezone from sqlalchemy.orm import relationship, Mapped -from sqlalchemy import Column, ForeignKey, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base from .ACLResources import ACLResources class ACL(Base): diff --git a/cognee/modules/users/models/ACLResources.py b/cognee/modules/users/models/ACLResources.py index 268d4a75a..464fed2e8 100644 --- a/cognee/modules/users/models/ACLResources.py +++ b/cognee/modules/users/models/ACLResources.py @@ -1,6 +1,6 @@ from datetime import datetime, timezone -from sqlalchemy import Column, ForeignKey, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base class ACLResources(Base): __tablename__ = "acl_resources" diff --git a/cognee/modules/users/models/Group.py b/cognee/modules/users/models/Group.py index d86dbee90..793decb35 100644 --- a/cognee/modules/users/models/Group.py +++ b/cognee/modules/users/models/Group.py @@ -1,6 +1,5 @@ from sqlalchemy.orm import relationship, Mapped -from sqlalchemy import Column, String, ForeignKey -from cognee.infrastructure.databases.relational import UUID +from sqlalchemy import Column, String, ForeignKey, UUID from .Principal import Principal from .UserGroup import UserGroup diff --git a/cognee/modules/users/models/Permission.py b/cognee/modules/users/models/Permission.py index 84b1a3077..3b1709371 100644 --- a/cognee/modules/users/models/Permission.py +++ b/cognee/modules/users/models/Permission.py @@ -1,8 +1,8 @@ from uuid import uuid4 from datetime import datetime, timezone # from sqlalchemy.orm import relationship -from sqlalchemy import Column, DateTime, String -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, DateTime, String, UUID +from cognee.infrastructure.databases.relational import Base class Permission(Base): __tablename__ = "permissions" diff --git a/cognee/modules/users/models/Principal.py b/cognee/modules/users/models/Principal.py index 4ef91ffac..dc6e51302 100644 --- a/cognee/modules/users/models/Principal.py +++ b/cognee/modules/users/models/Principal.py @@ -1,7 +1,7 @@ from uuid import uuid4 from datetime import datetime, timezone -from sqlalchemy import Column, String, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, String, DateTime, UUID +from cognee.infrastructure.databases.relational import Base class Principal(Base): __tablename__ = "principals" diff --git a/cognee/modules/users/models/Resource.py b/cognee/modules/users/models/Resource.py index 0eca509a2..563f96272 100644 --- a/cognee/modules/users/models/Resource.py +++ b/cognee/modules/users/models/Resource.py @@ -1,8 +1,8 @@ from uuid import uuid4 from datetime import datetime, timezone from sqlalchemy.orm import relationship -from sqlalchemy import Column, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, DateTime, UUID +from cognee.infrastructure.databases.relational import Base from .ACLResources import ACLResources class Resource(Base): diff --git a/cognee/modules/users/models/User.py b/cognee/modules/users/models/User.py index 96f78b12f..3536ac948 100644 --- a/cognee/modules/users/models/User.py +++ b/cognee/modules/users/models/User.py @@ -1,10 +1,10 @@ from uuid import UUID as uuid_UUID -from sqlalchemy import ForeignKey, Column +from sqlalchemy import ForeignKey, Column, UUID from sqlalchemy.orm import relationship, Mapped from fastapi_users.db import SQLAlchemyBaseUserTableUUID -from cognee.infrastructure.databases.relational import UUID from .Principal import Principal from .UserGroup import UserGroup +from .Group import Group class User(SQLAlchemyBaseUserTableUUID, Principal): __tablename__ = "users" @@ -25,7 +25,6 @@ class User(SQLAlchemyBaseUserTableUUID, Principal): from fastapi_users import schemas class UserRead(schemas.BaseUser[uuid_UUID]): - # groups: list[uuid_UUID] # Add groups attribute pass class UserCreate(schemas.BaseUserCreate): diff --git a/cognee/modules/users/models/UserGroup.py b/cognee/modules/users/models/UserGroup.py index a2dfa8bc1..5a85c9d3c 100644 --- a/cognee/modules/users/models/UserGroup.py +++ b/cognee/modules/users/models/UserGroup.py @@ -1,6 +1,6 @@ from datetime import datetime, timezone -from sqlalchemy import Column, ForeignKey, DateTime -from cognee.infrastructure.databases.relational import Base, UUID +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base class UserGroup(Base): __tablename__ = "user_groups" diff --git a/cognee/tests/integration/run_toy_tasks/data/cognee_db b/cognee/tests/integration/run_toy_tasks/data/cognee_db index 60455ad298370fbce1f08c2ea7554de6a85116b8..912adf2955f51b156d0affe520e4b360c5554031 100644 GIT binary patch literal 159744 zcmeI*-)|d7Vh8YDQYIzY5}orow9n@=Z8??ISklhU{+bwW!&r>ZjVYb=BZ==?v$MOi zHWNye`NNj4O;ITU`qEy3dpi_J``Etrp+ON8edvEspl>N!6!&ruJ>0_;NPnQ{?DB_5 ziPSo^e6fY^nx=MVXNNPN*(G;ohs%{ax62KY3#zqs-pEnqq>@M|Z{~7}qMVWcr{(|7 z^s1a0=`6^+}N&-Iwge8;vqw{=Z7W(k=kMviE2$cN76m}4>WF2tWV=5P$##AOHafKmY=VEKnK=2e218)4A{Y!gGXf z_<}NKnE^LkTlk)9nRMR(cB;?W)O0M{GpS<)uI4$0W;(j%a2^DHU<$@4^DMWNj9!3g zmLS46TrV&U$MiW7fy;EpJkO$XoczES&<$df*cV7dFF>hgxQ`-PI`(HqtBQpOwH8mz|OODVd#4(vp7 z4)IMVFa)s}F{~yV$%klr2&oV!jGngO%0SG_<0uX=z1Rwwb2tWV= z5O_9$%gLe|*V{1?MvCjPNQRN(dL0ts_5Zk@gYf!)BGZ)j|C3LcAOHafKmY;|fB*y_ z009U<00Iy=Jb{I1*g#fTj?TH}XhIlr7=y(HcU{*onQ0l+W9E=y1JpDh*W{1(weSB& zEICR-xI_zd$6!>BZg6bVlVKil1C z7XQG1;+7xXckgf2et5t6(DOvF`S3yTu>Ro_|8Zlh5yqR}tnl*2{90Yq>hl#*-KaFn zjRzI6F~3=>ejvO?ecr3y-w>knry=SxY|iHmUaB{$HGW^rhhe2k*{$)~lX+fil!LNY z=9N-;qaiAl@_n)4g^`{jS93C3`@cRfe|P`@7kg?ajuf zAMnl1O4;L$a#a@65598Qqtb@u?_T`~Ptr6XErLHSPhSBCA5Bj_8>F!=huA%6+L@XWw= zb?P{b8l1XrKt0!EuKZ;H+veO3um7u=FXa9IpKk$?Is_m90SG_<0uX=z1Rwwb2tWV= z&qrV?SsaV&P(b_t`Pd^>2tWV=5P$##AOHafKmY;|fWWUxAZ-7~UH^~n|2OfaGWkF9 z2@?b$009U<00Izz00bZa0SFv9fgLrGF6Q&aowM8}KDU@=_`YQjU2_QaUgt;O^D@aoa%-VZqfN>{H*5~ z+N|m5)~v&MFdO)RDHx;7v)oI+^U;o~whe60B5wF(3eV69VLl;Tus|m!w{7`Cnr_Sz zGE0mc(cX{`YR>7z)(m}s!OCiWdG%#6!2SP@+@XyEK>z{}fB*y_009U<00Izzz|j#1 z@BbIqaTDGDFRo_;?f;{*Q=lXefB*y_009U<00Izz00ba#@V?k6@*uNDzPk1Rwwb2tWV=5P$##AOL|q0vP|l2Ly`{fB*y_009U<00Izz z00bZafg>n@>;FfvQc)xbKmY;|fB*y_009U<00Izzz#aj#|M!4k5dsi^00bZa0SG_< z0uX=z1R!t(1;X+FYUZ~U`M?AL2tWV=5P$##AOHafKmY;|fWZGlVEdvvTD-o!G&VYB zcxGU`I&~aI4NhG*pq}e7m-&uub8h=0;LS>-)U1ozn_`QvZ&t*dS6#o%Yxk=gdfC7H z=sIDS*RNZqNv*4m>-6g8b>s48?dAjRoi$-R`C#L1y}CiH<`eJE<;K=p^*wE6%~^Q; z!-li|!Tr16{~;?b-M#Z*`E7ga@#5{ZW&OLiy$A0+d>mf?S2JHI@_`8g5P$##AOHaf zKmY;|fB*y_0D+fKU@BRR?RaRl|9`4ve)`kXkn>B%sUIC9uM-&PWxEI6L&YJsmAw&8Ktv3+j%a$tU-dDL>P{aMiF zWLkupaao90P+M7W1qnQ6`#Q0Nq3MpxU7K*r@(hhJp9X`oAj>5)eDD_w;uON`|09`o zC9|ISEc0W03*g8c_X=NOvl=$6BI5cq*97^BRy+cWN4JknY0uX=z1Rwwb z2tWV=5P$##An@V|gxCMk{(tf61v!EM1Rwwb2tWV=5P$##AOHaf9HziT=1b*N`coy7 z&HTgU-%tF@}N(R-=CNmo-pIQFB| z+%cB?w_}sZA0=)kh4N2{N##@d?`ftT>q0vF+H2}3^9}A+gwGqiE*hoqZ|{8W#&RLQ z`ZNh54g??o0SG|gs0nOe|3*4{?wq=FC;G5_*c7$0sJH)|iasI>xz+qzw+p#;VD8HJ zSlQ35t=(G4EiSF*7T0dyzM2~!Yly8z?%n+Ijqm1{!{spglTzc!rsxfA)>T?=9ad=9lm0-YMM8T`BuB<1@3TQrY}DRVi=y zVyphJQf`P6Z#JsYa;cr=5^2wTv-(Opdu>|X9*fqOw@RDkO;IUth*GV&Q4gPF=K8|E z(KfiXxKMaMH)M<4(qgV}tK5}VQZw65CY_z0R(Dz_BYlzj7EbnA_pKTxKE$rx;EiUz zgMLMKW*oJE6lMH~6~9 ztrp&24Izf84%s6+8S29BcbuO{WpA7x=Ge(wiFD>Bx7Wtg+0&=hk0-ii3)darSEd8Y z({m`wBU5bD%hip}P7#)Ky}T~=9iBD*I66@-RjM9u?AhY0$?S)iKAy^6JADvD3*p4~ z$5Pq(DfMAz_t_M+^>RHdKs`J>cbCTZ?mj*1!grbQu-uIuy=6QYO=szpx}6@Bc(^0< zY%tbNer@sAowY)4_bp&I%~G`Q?0uauaAx~zDxIC0Qg<#y)!376&uqG{QhQbmtJ|+u z2~qCadqUs4_5Rv1)2ZyOsbRI#lQVg+@0Ndb``1U(*$Wps2VzastIe7x>fM=ReI@J$ zA5Qr?$#p(bV#-L4rLu(!!^+r6tX?AB+0?tqRCe~Fy58MgYUK^DyvZxkuGn1|*|#Tl zS9Ny8Za}mjZqKOc?8S@f&iU5f(OtXQolEq|?S>x?UGFDz3>!_PvNtadW82M`+%vkF zPs%f;(eP|(BK_~mE2*C-nVXq2lYcjPe&QDse|DmJV&V8N#{Y5ruf{gV?9r>~|498L z{ntnB+zRD^00bZaffrI>=hdmGY4-7l(I+tXDy8nne&52c^?eTatqMPihn$gx@dmY% zJFhWtW#4I8D}}4Mo-LxL+}lgbgR-BpKa^da5a$)gCVI?Pn>d`4ic$VKA zw>O>M4Laeit;T2A<%1f2;VX8xD3QL!X<5JEIq) zQ|6D~i>jn&RnP3DzUt^%aX@86u?E#fl#BSPXm5Wt*NMNcBHA(fY9b69Tov79dh4Mj z((c%>bE)h*uMIorYPS_zZM9CzFg{x7sUT(3E zayL$&W;?hA8aC|~3bzZZh1`w&%8mR&)NRzex3}}NFNw%^|K{T})89F$c^V#7(cWIa zuygKw)IR^@Z0C)m9QRJqebv)WesEW2^rD@)= z_4?y#&F@Ae@+N5gG0XK5_ZsC#BKOwP((OWiu`eCDw%(MqVP1wscqIE{gR<}ku8*s3 zxd-&VzwD{x@tKvgvZ~JRR#oc_DC*K^&3#Fm{6?J>OekPUu&9lSms=Xy? z{XM&VKAX;-J*)1Vj%uqF+Il|IS6{8LVF%PxcXRChNQ@J@`)Vpn&Z*7L&MZIpnw3Vq z6jqr$@Y;*nc2>I`M0=uzM(v>7mG-7Hv!@U0x$dM8_C$oQ`0BRx%~nVKjYz5;-2QXA z&#E0b4C@iEGT$oc$23b2;Tx_On1*BeoQS|>I%A$^QJ49{?*A|E&+pjsM*CK9 z@Q?ISsu`{$%Nl65q1lxA!m+5sT=_Eqlv&0B_y2dOPOP}!_tRJqI2z@a;DMnV!gYvm zI)NdG#V9Ay{r^WMf2K_SEYr-)XFh)pRg5Gd009U<00Izz00bZa0SG_<0>3(e=$`w} b+M6&ErShzO2FZ3R&(<^0y8r*Pw*UVRz$b%1 literal 139264 zcmeI*4{#ILodx{BVGKE4`q0`A^vg!Qa z0{(CGF_nLk8GXV3md8C$_jJ0f`0lk?LS^DJp-w^MSGwvbhX4d1009U<00Izz00bZa z0SG|g`U}jSlFJ8`6DMT+A3Puc0SG_<0uX=z1Rwwb2tWV=5E%agMtPRrCY!Fgw|%|M zxpd#&C0_FAkmC|f(u{$k45URz85dI2LXs+`Ek?7++7;r0YJOaD;)0C-g9ij4009U< z00Izz00bZa0SG_<0^b0EsTq2eG&_Lf|8KyG#SkC>0SG_<0uX=z1Rwwb2tWV=aRRB1 z|Ap)S)w$g=;zi_1#&2JbvPflmSdeW2yD! z%Rf?_n6}0^^uqVc_y4)Zy(n7@uwz=o-Usm(W-Gyg1vduZE15mV;NeZA41MD$DIhsQN0uX=z1Rwwb2tWV= z5P$##ZdQS58G5-iYoitlrI{F&P$g+gg&BTFchW-Bsic&&gUJ7c;E=l{R< z6@m^x00Izz00bZa0SG_<0uX=z1ip&EHJ|?%?*Gr4a7soLX&Q6yoVbsFiU$NB009U< z00Izz00ba#GYAYgvQ+vtQ)drsvdN?qzK6Nqu(PM5r^DxN+tlk}yLvjid%dowVJ>%V?pk^ngPV8j>+AFdUA|UdIN)pXw{P6o)fepCbfbpVGsgR-9Tsh|ko>UZv0+Ik(HDp1X$iIm z108I*tHa&Py8K+HuO-mV`C3Drn^uFq0|F3$00bZa0SG_<0uX=z1Rwx`aU)PCSL)~4WcpV7=C>!ZjaTAt;7yyVd#$0gzXznq`?AG80Dn-z?XKmY;| zfB*y_009U<00Izz00h30z|;)AN}4sm@&8vMMPmp+00Izz00bZa0SG_<0uX?}xDybL z|E1^uh3o$nx$9+`r}-avKmY;|fB*y_009U<00Izzz?TvjT%pL;+wA(GQ12~M?kKsZ z?$LkQ_3!7mZn*MT()HOiQ*1Pv%?w#(bJ&{f4(E!-hDv)=Q*C{%r-2#)@;$ldb*faFK!x-F3gUHf{=YA^k)sg=AOHafKmY;| zfB*y_009U<00Li^fG~ZaH0dPH|0hkG#E$2X=s);6tfUOAWERBiE*3j+QQrnA~uLv!vb2kEM+v99X( zcvr2iWma0(RIT=PEOwBX|8M+mFX$NrAOHafKmY;|fB*y_009U<;L8Z4n*T3y{Qs_u zc=yXhL>mY|00Izz00bZa0SG_<0uX=z1jdWNZ5d^%bQc&1*Z<2VjhCLH9}s{51Rwwb z2tWV=5P$##AOHaf+z0{T{6CKWZ-gvjLI45~fB*y_009U<00Izz00hR50FM91&$>WQ zAOHafKmY;|fB*y_009U<00K8kK)C*2J@KTBc#2q0s5Q@PS~b&hkL3n(Z|4i~fB*y_ z009U<00Izz00bZ~P6bw|m3o^@ufO!n#e#n|Hut`Z`}QvJl1GOemja*c=o%v9NXK z;{%oFw!F7v?-rG-KXpXX%p^s8g89Iq{kZb3!^OV0GP=mmR^&t?Hp)M3Sf;Dm^5{#O zo~JD5XQYhCPxW4E9J7zvk%*t1zw9FKTi(3SK;8A|najUQ9g(z>7HPz!J}M&-*`fcv zc(t-R{1g467iLwKew;EQW3*VU(uhfpS&@j!pKbWrxArWXyIXdo;*E2`_S6w6(qxiG zOzLB1B;vu3j<0?sP+PWR|L!?Q>*oDi%7{kBXeyOPOzLAsB;wU`6N|sKbLj{B`b$p! z^MiX#sUuRRQc4;zsgH_C#Q$(LJ^SBu1^sOKs{!`m!t*I3(u~QRH0q>2$|Di;D-L@e zd$_K0+pF7mR-Ss9OC6CS$6nXM`F}N0A|oClE)dN`6){9SNxV*!5I*AAIQ^KTTM&Q% z1Rwwb2tWV=5P$##AOL|IEif%ZFPEm5)Iy;&L!=T4rAeL~p-`Hq$rcKwsTQSBD9x5+ z35C*xN2X9H%~@m!h0?TxLYV(wnmG{8|7U4lk`XTvTZjrm#y`aa0uX=z1Rwwb2tWV= z5P$##ATaI(%Cjaz)zqw3VVurR`VWSMlpTJ6FE<^wdS?CNDhm*K&E<_LSLTP3r&72Y;(H zw68mGU`F7>nN3ga5YGQ+5tTCH5b-8aNqC3{#@#ALhadm}2tWV=5P$##AOHafKmY>c zL112{UN4s(r%Uq;St)Btvk93gYe{nk87XT?GXsjbe7{uEeo2r2@&EsghfN55fB*y_ z009U<00Izz00bZa0SMe!0nGn@V|@`F0uX=z1Rwwb2tWV=5P$##ATS;T#QFbox5VB5 zui2uh(8zLM;tTPB00bZa0SG_<0uX=z1Rwx`>n2dHPJ92hnPEs;I@#QxUgI`j%&*8e zJoB9w9#`*wzg(5RJwMmKSvoIWcim(9Z}-%eAKmrC+PtHV-13~X?M+6jm6A?j_fg=Z zosDlT{@ebm@6P(;@e^N^XQyvZQ5Lgww!3wI*g5&dU5oPu`rhy~oc;VyfTY_Zl@83?6VYpgs&D$e|9`CG|J)qCY>M(z88MkSt~ru>O0#X^+qn-%&vQ{a6l5s)atO&?&m*0*J83GW100ba#vj_~9Ov%=2 zweqcHVYbD~b%nTKNc=Iive9mH*mVxu;ySxdEYi(at2}Pq;@Wy!WAl8k50i zIo;xhhB~{gKH+WnrJ=3>7ZiGwu0l5#^t5_7zN;w$tZ1xV&i7xp%-*bZa6f*zmC6JWg|v9`L1xGY<6jceCGIa~XkLivEo<8yPp zo?d6b6X3iaALk5q`9i`Er^lU8Hf317w!X@~R+pxQuAyF+&`LKy5~^sRipbU$7Rm>! z#kHAGDdEMWgteQnX!N!03UX|iUyp29SH*80j@spR0WcJ1!(E}+N~J9#You3k8;np@ zEcCVdbxWHX>V*R73Q-qXCn>ed3*|DAE!64tggGbM750m-C5Ti)=O^ExQEE;3^373k zHydU{TsY+P_xQLV&mAowq|(s^v6=#Nv`{xc+P-LjnV7B3&zBE=M>LMsi2lw^kd2m( zj_PAOIGw}3)**1D5$+1`v6A7V-I5?Do0jA%wUznlOr!mDQqgCcf$yrbwbQ4|hw@_M z6qZd$8s9aHZFsLB43Fy#bH0$r?~86MLScuegL8)a0$hAyknM2_@96aUU2Is=;_p*R z<|v$?)P86BXdDrWiR)BKZOI&YXLN(&cRd}RkT8Id@ZlYKrIv0`am)Cc;)iymwzxCd zn3JtF&XEsfrwT4?6mcO`!{Jxe*REV;*NyBTMq)a}?MT=;jFjo-3x!1kv@%;eXO4Vu zL1f1;QX#HxcEYNSTQK?>{`*#i*vH}RAz{a!ypBxSN^R|&bn7Ute@-gyMb3aZGg~`r zR`dfJ(M;Q$&toStlUM7#1%2I0Wv(k+>8ZzXhVo#N;Gn87x zYJ?XD2|k0PsaFN z!WbKiQK8gU&rXLo(mj4#i47)$|93n`__tiR{y#V8jEs1V7|^__c}mlqTa+uC@QVq9 z>P>1!MevL8fB*y_009U<00QG)z&dNRgJ{;}b;iC%I;n)0y1|0m#Wcd!4Ds7v!oq|X zvl6}oCM*)Z2BtYO5bCA>UL5V!C{+@^I!B`Eb#d*IaypSZ33(m9D3Z|;jz8n`Kk;$m z6G74PXdcLvKibiWke+h31^rzC{`-2|i`xgM%@mKshWw&!+@iRr`3bIZ3r0~DYo%uu z`*N+O!?E}>UR9`Zt*9f>^kQC#Zmv{i{2xcE(YxK~LbMC;_{>Y~ED(Xt`JM@8I4gk}sHZWWV4!|7sSiSRcs zK{xWz8ZB3xX2*~w#}zwZOOzHqxFg5?$;%S<5-F_X!zNJ07Np*o6W%ki;YeC#c(ZQO zDz!y<@=)~nJhIJ;iJHjMTZiSOX`Q%=!U~IiR>Wk^f?Jf@MS1ByE5j|R$Zzeyoax!x zygd2fY;k`RDUJNRC1I^a%0@qWzvkqPbf-F2QoV5+@1z}Z5_cs|@sxu2bb1=5xOxK9 Z@E%BPS5!1rsV&o{V~VsCcg>+`{|}Zl+Y