Merge branch 'dev' into dependabot/pip/fastapi-0.115.6

This commit is contained in:
Vasilije 2024-12-12 13:11:17 +01:00 committed by GitHub
commit cea1c7a4b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 161 additions and 92 deletions

View file

@ -3,7 +3,7 @@ name: publish dev | Docker image
on:
push:
branches:
- main
- dev
- feature/*
paths-ignore:
- '**.md'
@ -28,13 +28,8 @@ jobs:
- name: Set environment variable for stage
id: set-env
run: |
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "STAGE=prd" >> $GITHUB_ENV
echo "::set-output name=stage::prd"
else
echo "STAGE=dev" >> $GITHUB_ENV
echo "::set-output name=stage::dev"
fi
echo "STAGE=dev" >> $GITHUB_ENV
echo "::set-output name=stage::dev"
- name: Use output
run: echo "The stage is ${{ steps.set-env.outputs.stage }}"

View file

@ -4,9 +4,11 @@ on:
push:
branches:
- main
- dev
pull_request:
branches:
- main
- dev
jobs:
docker-compose-test:

140
.github/workflows/profiling.yaml vendored Normal file
View file

@ -0,0 +1,140 @@
name: test | profiling
on:
workflow_dispatch:
pull_request:
types: [labeled, synchronize]
jobs:
profiler:
runs-on: ubuntu-latest
steps:
# Checkout the code from the repository with full history
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history so we can checkout any commit
- name: Check if the sender is a maintainer
id: check_permissions
uses: actions/github-script@v6
with:
script: |
const sender = context.payload.sender.login;
const { data: membership } = await github.rest.orgs.getMembershipForUser({
org: context.repo.owner,
username: sender,
}).catch(() => ({ data: { role: null } }));
return membership.role;
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Set is_maintainer variable
run: |
echo "is_maintainer=${{ steps.check_permissions.outputs.result == 'admin' || steps.check_permissions.outputs.result == 'maintainer' }}" >> $GITHUB_ENV
- name: Stop if not a maintainer
if: env.is_maintainer != 'true'
run: |
echo "User ${{ github.event.sender.login }} is not a maintainer. Exiting."
exit 0 # Use exit 0 to mark the job as successful but stop execution
# Set up Python environment
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install dependencies
run: |
poetry install --no-interaction --all-extras
poetry run pip install pyinstrument
# Set environment variables for SHAs
- name: Set environment variables
run: |
echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> $GITHUB_ENV
echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
# Run profiler on the base branch
- name: Run profiler on base branch
env:
BASE_SHA: ${{ env.BASE_SHA }}
run: |
echo "Profiling the base branch for code_graph_pipeline.py"
echo "Checking out base SHA: $BASE_SHA"
git checkout $BASE_SHA
echo "This is the working directory: $PWD"
# Ensure the script is executable
chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
# Run Scalene
poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py
# Run profiler on head branch
- name: Run profiler on head branch
env:
HEAD_SHA: ${{ env.HEAD_SHA }}
run: |
echo "Profiling the head branch for code_graph_pipeline.py"
echo "Checking out head SHA: $HEAD_SHA"
git checkout $HEAD_SHA
echo "This is the working directory: $PWD"
# Ensure the script is executable
chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
# Run Scalene
poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
# Compare profiling results
- name: Compare profiling results
run: |
python -c '
import json
try:
with open("base_results.json") as f:
base = json.load(f)
with open("head_results.json") as f:
head = json.load(f)
cpu_diff = head.get("total_cpu_samples_python", 0) - base.get("total_cpu_samples_python", 0)
memory_diff = head.get("malloc_samples", 0) - base.get("malloc_samples", 0)
results = [
f"CPU Usage Difference: {cpu_diff}",
f"Memory Usage Difference: {memory_diff} bytes"
]
with open("profiling_diff.txt", "w") as f:
f.write("\\n".join(results) + "\\n")
print("\\n".join(results)) # Print results to terminal
except Exception as e:
error_message = f"Error comparing profiling results: {e}"
with open("profiling_diff.txt", "w") as f:
f.write(error_message + "\\n")
print(error_message) # Print error to terminal
'
- name: Upload profiling diff artifact
uses: actions/upload-artifact@v3
with:
name: profiling-diff
path: profiling_diff.txt
# Post results to the pull request
# - name: Post profiling results to PR
# uses: actions/github-script@v6
# with:
# script: |
# const fs = require('fs');
# const diff = fs.readFileSync('profiling_diff.txt', 'utf-8');
# github.rest.issues.createComment({
# issue_number: context.issue.number,
# owner: context.repo.owner,
# repo: context.repo.repo,
# body: `### Profiling Results for code_graph_pipeline.py\n\`\`\`\n${diff || 'No differences found.'}\n\`\`\``
# });

View file

@ -2,25 +2,16 @@
name: lint | code & tests
on:
pull_request:
branches:
- main
- devel
workflow_dispatch:
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_lint:
name: lint
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
strategy:
fail-fast: true
matrix:

View file

@ -19,14 +19,9 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_notebook_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:
run:

View file

@ -19,14 +19,9 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_notebook_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:
run:

View file

@ -3,8 +3,6 @@ name: test | deduplication
on:
workflow_dispatch:
pull_request:
branches:
- main
types: [labeled, synchronize]
@ -16,14 +14,8 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_deduplication_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:
run:

View file

@ -14,14 +14,9 @@ env:
ENV: 'dev'
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_milvus:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
strategy:
fail-fast: false

View file

@ -13,14 +13,8 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_neo4j_integration_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:

View file

@ -14,14 +14,9 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_pgvector_integration_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:
run:

View file

@ -14,14 +14,9 @@ env:
ENV: 'dev'
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_common:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
strategy:
fail-fast: false

View file

@ -14,14 +14,9 @@ env:
ENV: 'dev'
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_common:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
strategy:
fail-fast: false

View file

@ -14,14 +14,9 @@ env:
ENV: 'dev'
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_common:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
strategy:
fail-fast: false

View file

@ -14,14 +14,9 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_qdrant_integration_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:

View file

@ -14,14 +14,9 @@ env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_weaviate_integration_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
runs-on: ubuntu-latest
defaults:

View file

@ -53,6 +53,6 @@ async def retrieve_existing_edges(
existing_edges_map = {}
for edge in existing_edges:
existing_edges_map[edge[0] + edge[1] + edge[2]] = True
existing_edges_map[str(edge[0]) + str(edge[1]) + edge[2]] = True
return existing_edges_map

View file

@ -46,7 +46,7 @@
"import git\n",
"\n",
"notebook_path = path.abspath(\"\")\n",
"repo_clone_location = path.join(notebook_path, \"data/graphrag\")\n",
"repo_clone_location = path.join(notebook_path, \".data/graphrag\")\n",
"\n",
"LocalStorage.remove_all(repo_clone_location)\n",
"\n",
@ -86,7 +86,7 @@
"from cognee.modules.pipelines import run_tasks\n",
"\n",
"notebook_path = os.path.abspath(\"\")\n",
"repo_clone_location = os.path.join(notebook_path, \"data/graphrag\")\n",
"repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
"\n",
"pipeline = run_tasks(tasks, repo_clone_location, \"code_graph_pipeline\")\n",
"\n",

22
poetry.lock generated
View file

@ -3106,13 +3106,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (>
[[package]]
name = "jupyterlab"
version = "4.2.6"
version = "4.3.3"
description = "JupyterLab computational environment"
optional = true
python-versions = ">=3.8"
files = [
{file = "jupyterlab-4.2.6-py3-none-any.whl", hash = "sha256:78dd42cae5b460f377624b03966a8730e3b0692102ddf5933a2a3730c1bc0a20"},
{file = "jupyterlab-4.2.6.tar.gz", hash = "sha256:625f3ac19da91f9706baf66df25723b2f1307c1159fc7293035b066786d62a4a"},
{file = "jupyterlab-4.3.3-py3-none-any.whl", hash = "sha256:32a8fd30677e734ffcc3916a4758b9dab21b02015b668c60eb36f84357b7d4b1"},
{file = "jupyterlab-4.3.3.tar.gz", hash = "sha256:76fa39e548fdac94dc1204af5956c556f54c785f70ee26aa47ea08eda4d5bbcd"},
]
[package.dependencies]
@ -3127,15 +3127,15 @@ jupyter-server = ">=2.4.0,<3"
jupyterlab-server = ">=2.27.1,<3"
notebook-shim = ">=0.2"
packaging = "*"
setuptools = ">=40.1.0"
setuptools = ">=40.8.0"
tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""}
tornado = ">=6.2.0"
traitlets = "*"
[package.extras]
dev = ["build", "bump2version", "coverage", "hatch", "pre-commit", "pytest-cov", "ruff (==0.3.5)"]
docs = ["jsx-lexer", "myst-parser", "pydata-sphinx-theme (>=0.13.0)", "pytest", "pytest-check-links", "pytest-jupyter", "sphinx (>=1.8,<7.3.0)", "sphinx-copybutton"]
docs-screenshots = ["altair (==5.3.0)", "ipython (==8.16.1)", "ipywidgets (==8.1.2)", "jupyterlab-geojson (==3.4.0)", "jupyterlab-language-pack-zh-cn (==4.1.post2)", "matplotlib (==3.8.3)", "nbconvert (>=7.0.0)", "pandas (==2.2.1)", "scipy (==1.12.0)", "vega-datasets (==0.9.0)"]
dev = ["build", "bump2version", "coverage", "hatch", "pre-commit", "pytest-cov", "ruff (==0.6.9)"]
docs = ["jsx-lexer", "myst-parser", "pydata-sphinx-theme (>=0.13.0)", "pytest", "pytest-check-links", "pytest-jupyter", "sphinx (>=1.8,<8.1.0)", "sphinx-copybutton"]
docs-screenshots = ["altair (==5.4.1)", "ipython (==8.16.1)", "ipywidgets (==8.1.5)", "jupyterlab-geojson (==3.4.0)", "jupyterlab-language-pack-zh-cn (==4.2.post3)", "matplotlib (==3.9.2)", "nbconvert (>=7.0.0)", "pandas (==2.2.3)", "scipy (==1.14.1)", "vega-datasets (==0.9.0)"]
test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter (>=0.5.3)", "pytest-timeout", "pytest-tornasync", "requests", "requests-cache", "virtualenv"]
upgrade-extension = ["copier (>=9,<10)", "jinja2-time (<0.3)", "pydantic (<3.0)", "pyyaml-include (<3.0)", "tomli-w (<2.0)"]
@ -4532,18 +4532,18 @@ twitter = ["twython"]
[[package]]
name = "notebook"
version = "7.2.2"
version = "7.3.1"
description = "Jupyter Notebook - A web-based notebook environment for interactive computing"
optional = true
python-versions = ">=3.8"
files = [
{file = "notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c"},
{file = "notebook-7.2.2.tar.gz", hash = "sha256:2ef07d4220421623ad3fe88118d687bc0450055570cdd160814a59cf3a1c516e"},
{file = "notebook-7.3.1-py3-none-any.whl", hash = "sha256:212e1486b2230fe22279043f33c7db5cf9a01d29feb063a85cb139747b7c9483"},
{file = "notebook-7.3.1.tar.gz", hash = "sha256:84381c2a82d867517fd25b86e986dae1fe113a70b98f03edff9b94e499fec8fa"},
]
[package.dependencies]
jupyter-server = ">=2.4.0,<3"
jupyterlab = ">=4.2.0,<4.3"
jupyterlab = ">=4.3.2,<4.4"
jupyterlab-server = ">=2.27.1,<3"
notebook-shim = ">=0.2,<0.3"
tornado = ">=6.2.0"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "cognee"
version = "0.1.19"
version = "0.1.20"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = ["Vasilije Markovic", "Boris Arzentar"]
readme = "README.md"