From cdc992750a80d8ac32e4b2fa60931c1f07635f95 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 24 Jan 2025 18:12:16 +0100 Subject: [PATCH 1/2] test: Add github action to test code graph --- .github/workflows/reusable_python_example.yml | 6 ++++- .github/workflows/test_code_graph_example.yml | 22 +++++++++++++++++++ .../repo_processor/get_non_code_files.py | 1 - poetry.lock | 9 ++++---- pyproject.toml | 3 +++ 5 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/test_code_graph_example.yml diff --git a/.github/workflows/reusable_python_example.yml b/.github/workflows/reusable_python_example.yml index 4aa4aaba6..d1f7ee16b 100644 --- a/.github/workflows/reusable_python_example.yml +++ b/.github/workflows/reusable_python_example.yml @@ -7,6 +7,10 @@ on: description: "Location of example script to run" required: true type: string + arguments: + description: "Arguments for example script" + required: false + type: string secrets: GRAPHISTRY_USERNAME: required: true @@ -53,4 +57,4 @@ jobs: LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }} GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }} - run: poetry run python ${{ inputs.example-location }} + run: poetry run python ${{ inputs.example-location }} ${{ inputs.arguments }} diff --git a/.github/workflows/test_code_graph_example.yml b/.github/workflows/test_code_graph_example.yml new file mode 100644 index 000000000..a1f8d4e2c --- /dev/null +++ b/.github/workflows/test_code_graph_example.yml @@ -0,0 +1,22 @@ +name: test | code graph example + +on: + workflow_dispatch: + pull_request: + types: [labeled, synchronize] + + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + run_simple_example_test: + uses: ./.github/workflows/reusable_python_example.yml + with: + example-location: ./examples/python/code_graph_example.py + arguments: "--repo_path ./evals" + secrets: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }} + GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }} diff --git a/cognee/tasks/repo_processor/get_non_code_files.py b/cognee/tasks/repo_processor/get_non_code_files.py index 12f32e841..add991239 100644 --- a/cognee/tasks/repo_processor/get_non_code_files.py +++ b/cognee/tasks/repo_processor/get_non_code_files.py @@ -8,7 +8,6 @@ from cognee.modules.data.methods import get_datasets from cognee.modules.data.methods.get_dataset_data import get_dataset_data from cognee.modules.data.methods.get_datasets_by_name import get_datasets_by_name from cognee.modules.data.models import Data -from cognee.modules.data.operations.write_metadata import write_metadata from cognee.modules.ingestion.data_types import BinaryData from cognee.modules.users.methods import get_default_user from cognee.shared.CodeGraphEntities import Repository diff --git a/poetry.lock b/poetry.lock index 5c39dade0..fa17d3d53 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "aiofiles" @@ -4998,8 +4998,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -5927,8 +5927,8 @@ astroid = ">=3.3.8,<=3.4.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.6", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, + {version = ">=0.3.6", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, ] isort = ">=4.2.5,<5.13.0 || >5.13.0,<6" mccabe = ">=0.6,<0.8" @@ -8796,6 +8796,7 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] +codegraph = ["jedi", "parso"] deepeval = ["deepeval"] docs = ["unstructured"] falkordb = ["falkordb"] @@ -8814,4 +8815,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.10.0,<3.13" -content-hash = "d40b127fc83e2f623276d7f001e726490a4ccad195350e8ff0b10c7e3b53775a" +content-hash = "9b5d0162e4fdaaded920a2c8b448e07ec794c55914c1d6e18c6ab9b48c42df2d" diff --git a/pyproject.toml b/pyproject.toml index 14528777c..731203c95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,8 @@ pre-commit = "^4.0.1" httpx = "0.27.0" bokeh="^3.6.2" nltk = "3.9.1" +parso = {version = "^0.8.4", optional = true} +jedi = {version = "^0.19.2", optional = true} [tool.poetry.extras] @@ -94,6 +96,7 @@ falkordb = ["falkordb"] groq = ["groq"] milvus = ["pymilvus"] docs = ["unstructured"] +codegraph = ["parso", "jedi"] [tool.poetry.group.dev.dependencies] pytest = "^7.4.0" From ae016d4460630d5e8d32a3fd0e0c86bb31095155 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 24 Jan 2025 18:18:27 +0100 Subject: [PATCH 2/2] chore: Change to run profiler on HEAD instead of base branch --- .github/workflows/profiling.yaml | 40 ++++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/profiling.yaml b/.github/workflows/profiling.yaml index 2408a8f70..de4bb179d 100644 --- a/.github/workflows/profiling.yaml +++ b/.github/workflows/profiling.yaml @@ -68,32 +68,32 @@ jobs: echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV # Run profiler on the base branch - - name: Run profiler on base branch - env: - BASE_SHA: ${{ env.BASE_SHA }} - run: | - echo "Profiling the base branch for code_graph_pipeline.py" - echo "Checking out base SHA: $BASE_SHA" - git checkout $BASE_SHA - echo "This is the working directory: $PWD" - # Ensure the script is executable - chmod +x cognee/api/v1/cognify/code_graph_pipeline.py - # Run Scalene - poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py - - # Run profiler on head branch -# - name: Run profiler on head branch +# - name: Run profiler on base branch # env: -# HEAD_SHA: ${{ env.HEAD_SHA }} +# BASE_SHA: ${{ env.BASE_SHA }} # run: | -# echo "Profiling the head branch for code_graph_pipeline.py" -# echo "Checking out head SHA: $HEAD_SHA" -# git checkout $HEAD_SHA +# echo "Profiling the base branch for code_graph_pipeline.py" +# echo "Checking out base SHA: $BASE_SHA" +# git checkout $BASE_SHA # echo "This is the working directory: $PWD" # # Ensure the script is executable # chmod +x cognee/api/v1/cognify/code_graph_pipeline.py # # Run Scalene -# poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py +# poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py + + # Run profiler on head branch + - name: Run profiler on head branch + env: + HEAD_SHA: ${{ env.HEAD_SHA }} + run: | + echo "Profiling the head branch for code_graph_pipeline.py" + echo "Checking out head SHA: $HEAD_SHA" + git checkout $HEAD_SHA + echo "This is the working directory: $PWD" + # Ensure the script is executable + chmod +x cognee/api/v1/cognify/code_graph_pipeline.py + # Run Scalene + poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py # # Compare profiling results # - name: Compare profiling results