From 57ae93b3cd0c6061a5192dfaa5dee35eb48b0520 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Tue, 17 Dec 2024 18:53:28 +0100 Subject: [PATCH 1/9] Update get_cognify_router.py --- .../v1/cognify/routers/get_cognify_router.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index a56dbd7a5..1a53a7338 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from typing import List, Optional -from pydantic import BaseModel +from typing import List, Optional, Any +from pydantic import BaseModel, create_model from cognee.modules.users.models import User from fastapi.responses import JSONResponse from cognee.modules.users.methods import get_authenticated_user @@ -9,7 +9,14 @@ from fastapi import Depends class CognifyPayloadDTO(BaseModel): datasets: List[str] - graph_model: Optional[BaseModel] = None + graph_model: Optional[Any] = None + + +def json_to_pydantic_model(name: str, json_schema: dict) -> BaseModel: + """ + Create a Pydantic model on the fly from JSON. + """ + return create_model(name, **{k: (type(v), ...) for k, v in json_schema.items()}) def get_cognify_router() -> APIRouter: router = APIRouter() @@ -18,6 +25,15 @@ def get_cognify_router() -> APIRouter: async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)): """ This endpoint is responsible for the cognitive processing of the content.""" from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify + try: + # Dynamic conversion of `graph_model` to Pydantic + if payload.graph_model: + graph_model_schema = payload.graph_model + GraphModelDynamic = json_to_pydantic_model("GraphModelDynamic", graph_model_schema) + graph_model_instance = GraphModelDynamic(**graph_model_schema) + print(graph_model_instance) + else: + graph_model_instance = None try: await cognee_cognify(payload.datasets, user, payload.graph_model) except Exception as error: From a42ab3e215f898c83ded7f546a96d7dce1127713 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 10:58:48 +0100 Subject: [PATCH 2/9] fix: Resolve syntax issue with cognify router Resolve syntax issue with cognify router Fix --- cognee/api/v1/cognify/routers/get_cognify_router.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 1a53a7338..739bd1657 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -6,6 +6,8 @@ from fastapi.responses import JSONResponse from cognee.modules.users.methods import get_authenticated_user from fastapi import Depends +from cognee.shared.data_models import KnowledgeGraph + class CognifyPayloadDTO(BaseModel): datasets: List[str] @@ -32,10 +34,10 @@ def get_cognify_router() -> APIRouter: GraphModelDynamic = json_to_pydantic_model("GraphModelDynamic", graph_model_schema) graph_model_instance = GraphModelDynamic(**graph_model_schema) print(graph_model_instance) - else: - graph_model_instance = None - try: - await cognee_cognify(payload.datasets, user, payload.graph_model) + else: + graph_model_instance = KnowledgeGraph + + await cognee_cognify(payload.datasets, user, graph_model_instance) except Exception as error: return JSONResponse( status_code=409, From 7be7265c93fc32ba2b0e0ceab6d78e4072b82d7f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 13:19:02 +0100 Subject: [PATCH 3/9] feat: Add ruff pre-commit hook for linting and formatting Added formatting and linting on pre-commit hook Feature COG-650 --- .pre-commit-config.yaml | 20 ++++++++++ poetry.lock | 87 ++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..c862cb36f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.8.3 + hooks: + # Run the linter. + - id: ruff + types_or: [ python, pyi ] + # Run the formatter. + - id: ruff-format + types_or: [ python, pyi ] \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index d2c0a8533..3aee967b2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -726,6 +726,17 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "chardet" version = "5.2.0" @@ -1379,6 +1390,17 @@ files = [ {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, ] +[[package]] +name = "distlib" +version = "0.3.9" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, + {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -2527,6 +2549,20 @@ files = [ {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, ] +[[package]] +name = "identify" +version = "2.6.3" +description = "File identification library for Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "identify-2.6.3-py2.py3-none-any.whl", hash = "sha256:9edba65473324c2ea9684b1f944fe3191db3345e50b6d04571d10ed164f8d7bd"}, + {file = "identify-2.6.3.tar.gz", hash = "sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.10" @@ -4535,6 +4571,17 @@ plot = ["matplotlib"] tgrep = ["pyparsing"] twitter = ["twython"] +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + [[package]] name = "notebook" version = "7.3.1" @@ -5348,6 +5395,24 @@ dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] sentry = ["django", "sentry-sdk"] test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] +[[package]] +name = "pre-commit" +version = "4.0.1" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878"}, + {file = "pre_commit-4.0.1.tar.gz", hash = "sha256:80905ac375958c0444c65e9cebebd948b3cdb518f335a091a670a89d652139d2"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "prometheus-client" version = "0.21.1" @@ -8375,6 +8440,26 @@ files = [ [package.extras] crypto-eth-addresses = ["eth-hash[pycryptodome] (>=0.7.0)"] +[[package]] +name = "virtualenv" +version = "20.28.0" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.8" +files = [ + {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"}, + {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [[package]] name = "watchdog" version = "6.0.0" @@ -8877,4 +8962,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "af91e3dcf6a8927ed938fe3f78172a5f1e0c0f9c8fbcbc76767b0e0d84645c9e" +content-hash = "7b339ad192fdc1bf97bbc0797b9657f0245b00627f811009a545e4a3380368bb" diff --git a/pyproject.toml b/pyproject.toml index 79f5a6855..cbee3e056 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ deepeval = {version = "^2.0.1", optional = true} transformers = "^4.46.3" pymilvus = {version = "^2.5.0", optional = true} unstructured = { extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], version = "^0.16.10", optional = true } +pre-commit = "^4.0.1" From 875dd1055e375d65dee1d22b9e8cd90ce677f3d9 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 13:31:19 +0100 Subject: [PATCH 4/9] chore: Update ruff lint options in pyproject file Update ruff lint options in pyproject file Chore --- .pre-commit-config.yaml | 2 +- pyproject.toml | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c862cb36f..42f12ea51 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,4 +17,4 @@ repos: types_or: [ python, pyi ] # Run the formatter. - id: ruff-format - types_or: [ python, pyi ] \ No newline at end of file + types_or: [ python, pyi ] diff --git a/pyproject.toml b/pyproject.toml index cbee3e056..0d5845d78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,11 +115,10 @@ mkdocstrings = {extras = ["python"], version = "^0.26.2"} [tool.ruff] # https://beta.ruff.rs/docs/ line-length = 100 + +[tool.ruff.lint] ignore = ["F401"] -ignore-init-module-imports = true [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" - - From 856f228981a46ad931347c08a16db91886fcf95e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 14:04:49 +0100 Subject: [PATCH 5/9] test: Add ruff linter github action Added linting check with ruff in github actions Test COG-650 --- .github/workflows/ruff_lint.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .github/workflows/ruff_lint.yaml diff --git a/.github/workflows/ruff_lint.yaml b/.github/workflows/ruff_lint.yaml new file mode 100644 index 000000000..43cfef95b --- /dev/null +++ b/.github/workflows/ruff_lint.yaml @@ -0,0 +1,9 @@ +name: Ruff +on: [ push, pull_request ] + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/ruff-action@v2 From f660127d2d25e0265cac1e865c8c962865551fe3 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 14:24:14 +0100 Subject: [PATCH 6/9] refactor: Remove changes to cognify router Remove changes to cognify router Refactor COG-650 --- .../v1/cognify/routers/get_cognify_router.py | 26 +++---------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 739bd1657..257ac994f 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -1,24 +1,15 @@ from fastapi import APIRouter -from typing import List, Optional, Any -from pydantic import BaseModel, create_model +from typing import List, Optional +from pydantic import BaseModel from cognee.modules.users.models import User from fastapi.responses import JSONResponse from cognee.modules.users.methods import get_authenticated_user from fastapi import Depends - from cognee.shared.data_models import KnowledgeGraph - class CognifyPayloadDTO(BaseModel): datasets: List[str] - graph_model: Optional[Any] = None - - -def json_to_pydantic_model(name: str, json_schema: dict) -> BaseModel: - """ - Create a Pydantic model on the fly from JSON. - """ - return create_model(name, **{k: (type(v), ...) for k, v in json_schema.items()}) + graph_model: Optional[BaseModel] = KnowledgeGraph def get_cognify_router() -> APIRouter: router = APIRouter() @@ -28,16 +19,7 @@ def get_cognify_router() -> APIRouter: """ This endpoint is responsible for the cognitive processing of the content.""" from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify try: - # Dynamic conversion of `graph_model` to Pydantic - if payload.graph_model: - graph_model_schema = payload.graph_model - GraphModelDynamic = json_to_pydantic_model("GraphModelDynamic", graph_model_schema) - graph_model_instance = GraphModelDynamic(**graph_model_schema) - print(graph_model_instance) - else: - graph_model_instance = KnowledgeGraph - - await cognee_cognify(payload.datasets, user, graph_model_instance) + await cognee_cognify(payload.datasets, user, payload.graph_model) except Exception as error: return JSONResponse( status_code=409, From 8aff9962dd3f458fde899e97cf25cc02df3811f0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 14:42:19 +0100 Subject: [PATCH 7/9] test: Add test for ruff format for cognee code Test if code is formatted for cognee Test COG-650 --- .github/workflows/ruff_format.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/ruff_format.yaml diff --git a/.github/workflows/ruff_format.yaml b/.github/workflows/ruff_format.yaml new file mode 100644 index 000000000..e036c3ac8 --- /dev/null +++ b/.github/workflows/ruff_format.yaml @@ -0,0 +1,11 @@ +name: Ruff +on: [ push, pull_request ] + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/ruff-action@v2 + with: + args: "format --check" From 7a9f7b07f6465a37063365a94383d16a4cfe5ec9 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 14:43:31 +0100 Subject: [PATCH 8/9] refactor: Rename ruff gh actions Rename ruff gh actions to be more understandable Refactor COG-650 --- .github/workflows/ruff_format.yaml | 2 +- .github/workflows/ruff_lint.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ruff_format.yaml b/.github/workflows/ruff_format.yaml index e036c3ac8..899dfff22 100644 --- a/.github/workflows/ruff_format.yaml +++ b/.github/workflows/ruff_format.yaml @@ -1,4 +1,4 @@ -name: Ruff +name: ruff format on: [ push, pull_request ] jobs: diff --git a/.github/workflows/ruff_lint.yaml b/.github/workflows/ruff_lint.yaml index 43cfef95b..cf09ab53d 100644 --- a/.github/workflows/ruff_lint.yaml +++ b/.github/workflows/ruff_lint.yaml @@ -1,4 +1,4 @@ -name: Ruff +name: ruff lint on: [ push, pull_request ] jobs: From 8ec639a12a97b684ee665ef06955f2486b53eb48 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 15:01:13 +0100 Subject: [PATCH 9/9] chore: Remove checking of ruff lint and format on push Remove checking of ruff lint and format on push Chore COG-650 --- .github/workflows/ruff_format.yaml | 2 +- .github/workflows/ruff_lint.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ruff_format.yaml b/.github/workflows/ruff_format.yaml index 899dfff22..959b7fc4b 100644 --- a/.github/workflows/ruff_format.yaml +++ b/.github/workflows/ruff_format.yaml @@ -1,5 +1,5 @@ name: ruff format -on: [ push, pull_request ] +on: [ pull_request ] jobs: ruff: diff --git a/.github/workflows/ruff_lint.yaml b/.github/workflows/ruff_lint.yaml index cf09ab53d..214e8ec6d 100644 --- a/.github/workflows/ruff_lint.yaml +++ b/.github/workflows/ruff_lint.yaml @@ -1,5 +1,5 @@ name: ruff lint -on: [ push, pull_request ] +on: [ pull_request ] jobs: ruff: