From 1989296b019d7b79f0be44756fb4404483194501 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 9 Jan 2025 12:17:42 +0100 Subject: [PATCH 1/4] fix: Resolve profiler issue with partial and recursive logger imports Resolve issue for profiler with partial and recursive logger imports --- cognee/tasks/repo_processor/__init__.py | 2 +- cognee/tasks/repo_processor/expand_dependency_graph.py | 4 +++- cognee/tasks/repo_processor/extract_code_parts.py | 4 +++- cognee/tasks/repo_processor/get_local_dependencies.py | 4 +++- cognee/tasks/repo_processor/get_source_code_chunks.py | 2 +- cognee/tasks/repo_processor/top_down_repo_parse.py | 4 +++- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cognee/tasks/repo_processor/__init__.py b/cognee/tasks/repo_processor/__init__.py index 6dc032547..b20351685 100644 --- a/cognee/tasks/repo_processor/__init__.py +++ b/cognee/tasks/repo_processor/__init__.py @@ -4,4 +4,4 @@ from .get_non_code_files import get_data_list_for_user, get_non_py_files from .get_repo_file_dependencies import get_repo_file_dependencies import logging -logger = logging.getLogger("task:repo_processor") +logger = logging.getLogger(__name__) diff --git a/cognee/tasks/repo_processor/expand_dependency_graph.py b/cognee/tasks/repo_processor/expand_dependency_graph.py index de26fe8d4..cc957742b 100644 --- a/cognee/tasks/repo_processor/expand_dependency_graph.py +++ b/cognee/tasks/repo_processor/expand_dependency_graph.py @@ -5,7 +5,9 @@ from uuid import NAMESPACE_OID, uuid5 from cognee.infrastructure.engine import DataPoint from cognee.shared.CodeGraphEntities import CodeFile, CodePart from cognee.tasks.repo_processor.extract_code_parts import extract_code_parts -from cognee.tasks.repo_processor import logger +import logging + +logger = logging.getLogger(__name__) def _add_code_parts_nodes_and_edges(code_file: CodeFile, part_type, code_parts) -> None: diff --git a/cognee/tasks/repo_processor/extract_code_parts.py b/cognee/tasks/repo_processor/extract_code_parts.py index 76cfef538..f25146232 100644 --- a/cognee/tasks/repo_processor/extract_code_parts.py +++ b/cognee/tasks/repo_processor/extract_code_parts.py @@ -1,7 +1,9 @@ from typing import Dict, List import parso -from cognee.tasks.repo_processor import logger +import logging + +logger = logging.getLogger(__name__) def _extract_parts_from_module(module, parts_dict: Dict[str, List[str]]) -> Dict[str, List[str]]: diff --git a/cognee/tasks/repo_processor/get_local_dependencies.py b/cognee/tasks/repo_processor/get_local_dependencies.py index b443829c9..b0ac2829f 100644 --- a/cognee/tasks/repo_processor/get_local_dependencies.py +++ b/cognee/tasks/repo_processor/get_local_dependencies.py @@ -10,7 +10,9 @@ import jedi import parso from parso.tree import BaseNode -from cognee.tasks.repo_processor import logger +import logging + +logger = logging.getLogger(__name__) @contextmanager diff --git a/cognee/tasks/repo_processor/get_source_code_chunks.py b/cognee/tasks/repo_processor/get_source_code_chunks.py index 5e14e11ac..980a86539 100644 --- a/cognee/tasks/repo_processor/get_source_code_chunks.py +++ b/cognee/tasks/repo_processor/get_source_code_chunks.py @@ -8,7 +8,7 @@ import tiktoken from cognee.infrastructure.engine import DataPoint from cognee.shared.CodeGraphEntities import CodeFile, CodePart, SourceCodeChunk -logger = logging.getLogger("task:get_source_code_chunks") +logger = logging.getLogger(__name__) def _count_tokens(tokenizer: tiktoken.Encoding, source_code: str) -> int: diff --git a/cognee/tasks/repo_processor/top_down_repo_parse.py b/cognee/tasks/repo_processor/top_down_repo_parse.py index aed971920..87b7b8c95 100644 --- a/cognee/tasks/repo_processor/top_down_repo_parse.py +++ b/cognee/tasks/repo_processor/top_down_repo_parse.py @@ -4,7 +4,9 @@ import jedi import parso from tqdm import tqdm -from . import logger +import logging + +logger = logging.getLogger(__name__) _NODE_TYPE_MAP = { "funcdef": "func_def", From b733590724d6a4ec2df1cac38e04d3a440cec508 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 9 Jan 2025 12:26:14 +0100 Subject: [PATCH 2/4] fix: Remove logger from __init__.py file --- cognee/tasks/repo_processor/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cognee/tasks/repo_processor/__init__.py b/cognee/tasks/repo_processor/__init__.py index b20351685..8f0df23d8 100644 --- a/cognee/tasks/repo_processor/__init__.py +++ b/cognee/tasks/repo_processor/__init__.py @@ -2,6 +2,3 @@ from .enrich_dependency_graph import enrich_dependency_graph from .expand_dependency_graph import expand_dependency_graph from .get_non_code_files import get_data_list_for_user, get_non_py_files from .get_repo_file_dependencies import get_repo_file_dependencies -import logging - -logger = logging.getLogger(__name__) From ccf758ed7b8a8be68949503900509d9a422c26fc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 9 Jan 2025 12:32:30 +0100 Subject: [PATCH 3/4] test: Test profiling on HEAD branch --- .github/workflows/profiling.yaml | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/profiling.yaml b/.github/workflows/profiling.yaml index 0ecbc960c..de4bb179d 100644 --- a/.github/workflows/profiling.yaml +++ b/.github/workflows/profiling.yaml @@ -68,33 +68,33 @@ jobs: echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV # Run profiler on the base branch - - name: Run profiler on base branch - env: - BASE_SHA: ${{ env.BASE_SHA }} - run: | - echo "Profiling the base branch for code_graph_pipeline.py" - echo "Checking out base SHA: $BASE_SHA" - git checkout $BASE_SHA - echo "This is the working directory: $PWD" - # Ensure the script is executable - chmod +x cognee/api/v1/cognify/code_graph_pipeline.py - # Run Scalene - poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py - - # Run profiler on head branch -# - name: Run profiler on head branch +# - name: Run profiler on base branch # env: -# HEAD_SHA: ${{ env.HEAD_SHA }} +# BASE_SHA: ${{ env.BASE_SHA }} # run: | -# echo "Profiling the head branch for code_graph_pipeline.py" -# echo "Checking out head SHA: $HEAD_SHA" -# git checkout $HEAD_SHA +# echo "Profiling the base branch for code_graph_pipeline.py" +# echo "Checking out base SHA: $BASE_SHA" +# git checkout $BASE_SHA # echo "This is the working directory: $PWD" # # Ensure the script is executable # chmod +x cognee/api/v1/cognify/code_graph_pipeline.py # # Run Scalene -# poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py -# +# poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py + + # Run profiler on head branch + - name: Run profiler on head branch + env: + HEAD_SHA: ${{ env.HEAD_SHA }} + run: | + echo "Profiling the head branch for code_graph_pipeline.py" + echo "Checking out head SHA: $HEAD_SHA" + git checkout $HEAD_SHA + echo "This is the working directory: $PWD" + # Ensure the script is executable + chmod +x cognee/api/v1/cognify/code_graph_pipeline.py + # Run Scalene + poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py + # # Compare profiling results # - name: Compare profiling results # run: | From 2ae66c2c2e5f0bb08a1a1e98706d6dc850a54d11 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 9 Jan 2025 12:38:42 +0100 Subject: [PATCH 4/4] test: Return profiler to base branch --- .github/workflows/profiling.yaml | 40 ++++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/profiling.yaml b/.github/workflows/profiling.yaml index de4bb179d..2408a8f70 100644 --- a/.github/workflows/profiling.yaml +++ b/.github/workflows/profiling.yaml @@ -68,32 +68,32 @@ jobs: echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV # Run profiler on the base branch -# - name: Run profiler on base branch -# env: -# BASE_SHA: ${{ env.BASE_SHA }} -# run: | -# echo "Profiling the base branch for code_graph_pipeline.py" -# echo "Checking out base SHA: $BASE_SHA" -# git checkout $BASE_SHA -# echo "This is the working directory: $PWD" -# # Ensure the script is executable -# chmod +x cognee/api/v1/cognify/code_graph_pipeline.py -# # Run Scalene -# poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py - - # Run profiler on head branch - - name: Run profiler on head branch + - name: Run profiler on base branch env: - HEAD_SHA: ${{ env.HEAD_SHA }} + BASE_SHA: ${{ env.BASE_SHA }} run: | - echo "Profiling the head branch for code_graph_pipeline.py" - echo "Checking out head SHA: $HEAD_SHA" - git checkout $HEAD_SHA + echo "Profiling the base branch for code_graph_pipeline.py" + echo "Checking out base SHA: $BASE_SHA" + git checkout $BASE_SHA echo "This is the working directory: $PWD" # Ensure the script is executable chmod +x cognee/api/v1/cognify/code_graph_pipeline.py # Run Scalene - poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py + poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py + + # Run profiler on head branch +# - name: Run profiler on head branch +# env: +# HEAD_SHA: ${{ env.HEAD_SHA }} +# run: | +# echo "Profiling the head branch for code_graph_pipeline.py" +# echo "Checking out head SHA: $HEAD_SHA" +# git checkout $HEAD_SHA +# echo "This is the working directory: $PWD" +# # Ensure the script is executable +# chmod +x cognee/api/v1/cognify/code_graph_pipeline.py +# # Run Scalene +# poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py # # Compare profiling results # - name: Compare profiling results