feat: codegraph improvements and new CODE search [COG-1351] (#581)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Introduced an automated deployment workflow to build and push
container images.
	- Updated dependency management to include additional database support.
- **Refactor**
- Enhanced asynchronous operations and logging in the server for
improved performance.
	- Optimized extraction and retrieval processes for code-related data.
- **Chores**
- Streamlined build configurations and startup scripts for greater
reliability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Co-authored-by: Igor Ilic <igorilic03@gmail.com>
This commit is contained in:
Boris 2025-02-26 20:15:02 +01:00 committed by GitHub
parent f6ced4122a
commit 711ae8e675
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 532 additions and 121 deletions

48
.github/workflows/dockerhub-mcp.yml vendored Normal file
View file

@ -0,0 +1,48 @@
name: build | Build and Push Cognee MCP Docker Image to dockerhub
on:
push:
branches:
- main
jobs:
docker-build-and-push:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: cognee/cognee-mcp
tags: |
type=ref,event=branch
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
id: build
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=cognee/cognee-mcp:buildcache
cache-to: type=registry,ref=cognee/cognee-mcp:buildcache,mode=max
- name: Image digest
run: echo ${{ steps.build.outputs.digest }}

View file

@ -3,7 +3,6 @@ name: build | Build and Push Docker Image to dockerhub
on:
push:
branches:
- dev
- main
jobs:
@ -34,6 +33,7 @@ jobs:
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
id: build
uses: docker/build-push-action@v5
with:
context: .

49
cognee-mcp/Dockerfile Normal file
View file

@ -0,0 +1,49 @@
# Use a Python image with uv pre-installed
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv
# Set build argument
ARG DEBUG
# Set environment variable based on the build argument
ENV DEBUG=${DEBUG}
ENV PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
WORKDIR /app
# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1
# Copy from the cache instead of linking since it's a mounted volume
ENV UV_LINK_MODE=copy
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --no-dev --no-editable
# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
ADD . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev --no-editable
# RUN apt-get update && apt-get install
# RUN apt-get install -y \
# gcc \
# libpq-dev
FROM python:3.12-slim-bookworm
WORKDIR /app
COPY --from=uv /root/.local /root/.local
COPY --from=uv --chown=app:app /app/.venv /app/.venv
# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"
ENTRYPOINT ["cognee"]

View file

@ -6,7 +6,7 @@ readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"cognee[codegraph]",
"cognee[codegraph,postgres,neo4j]",
"mcp==1.2.1",
]
@ -21,5 +21,10 @@ build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src"]
[dependency-groups]
dev = [
"debugpy>=1.8.12",
]
[project.scripts]
cognee = "src:main"

View file

@ -1,3 +1,4 @@
import asyncio
import json
import os
import cognee
@ -92,7 +93,7 @@ async def call_tools(name: str, arguments: dict) -> list[types.TextContent]:
with open(os.devnull, "w") as fnull:
with redirect_stdout(fnull), redirect_stderr(fnull):
if name == "cognify":
cognify(
await cognify(
text=arguments["text"],
graph_model_file=arguments.get("graph_model_file", None),
graph_model_name=arguments.get("graph_model_name", None),
@ -161,6 +162,8 @@ async def main():
try:
from mcp.server.stdio import stdio_server
logger.info("Starting Cognee MCP server...")
async with stdio_server() as (read_stream, write_stream):
await mcp.run(
read_stream=read_stream,
@ -249,6 +252,4 @@ def load_class(model_file, model_name):
if __name__ == "__main__":
# Initialize and run the server
import asyncio
asyncio.run(main())

137
cognee-mcp/uv.lock generated
View file

@ -225,6 +225,49 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/fa/e01228c2938de91d47b307831c62ab9e4001e747789d0b05baf779a6488c/async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028", size = 5721 },
]
[[package]]
name = "asyncpg"
version = "0.30.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "async-timeout", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bb/07/1650a8c30e3a5c625478fa8aafd89a8dd7d85999bf7169b16f54973ebf2c/asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e", size = 673143 },
{ url = "https://files.pythonhosted.org/packages/a0/9a/568ff9b590d0954553c56806766914c149609b828c426c5118d4869111d3/asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0", size = 645035 },
{ url = "https://files.pythonhosted.org/packages/de/11/6f2fa6c902f341ca10403743701ea952bca896fc5b07cc1f4705d2bb0593/asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f", size = 2912384 },
{ url = "https://files.pythonhosted.org/packages/83/83/44bd393919c504ffe4a82d0aed8ea0e55eb1571a1dea6a4922b723f0a03b/asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af", size = 2947526 },
{ url = "https://files.pythonhosted.org/packages/08/85/e23dd3a2b55536eb0ded80c457b0693352262dc70426ef4d4a6fc994fa51/asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75", size = 2895390 },
{ url = "https://files.pythonhosted.org/packages/9b/26/fa96c8f4877d47dc6c1864fef5500b446522365da3d3d0ee89a5cce71a3f/asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f", size = 3015630 },
{ url = "https://files.pythonhosted.org/packages/34/00/814514eb9287614188a5179a8b6e588a3611ca47d41937af0f3a844b1b4b/asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf", size = 568760 },
{ url = "https://files.pythonhosted.org/packages/f0/28/869a7a279400f8b06dd237266fdd7220bc5f7c975348fea5d1e6909588e9/asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50", size = 625764 },
{ url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506 },
{ url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922 },
{ url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565 },
{ url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962 },
{ url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791 },
{ url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696 },
{ url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358 },
{ url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375 },
{ url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162 },
{ url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025 },
{ url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243 },
{ url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059 },
{ url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596 },
{ url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632 },
{ url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186 },
{ url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064 },
{ url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373 },
{ url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745 },
{ url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103 },
{ url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471 },
{ url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253 },
{ url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720 },
{ url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404 },
{ url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623 },
]
[[package]]
name = "attrs"
version = "25.1.0"
@ -529,22 +572,38 @@ codegraph = [
{ name = "tree-sitter" },
{ name = "tree-sitter-python" },
]
neo4j = [
{ name = "neo4j" },
]
postgres = [
{ name = "asyncpg" },
{ name = "pgvector" },
{ name = "psycopg2" },
]
[[package]]
name = "cognee-mcp"
version = "0.1.0"
source = { editable = "." }
dependencies = [
{ name = "cognee", extra = ["codegraph"] },
{ name = "cognee", extra = ["codegraph", "neo4j", "postgres"] },
{ name = "mcp" },
]
[package.dev-dependencies]
dev = [
{ name = "debugpy" },
]
[package.metadata]
requires-dist = [
{ name = "cognee", extras = ["codegraph"] },
{ name = "cognee", extras = ["codegraph", "postgres", "neo4j"] },
{ name = "mcp", specifier = "==1.2.1" },
]
[package.metadata.requires-dev]
dev = [{ name = "debugpy", specifier = ">=1.8.12" }]
[[package]]
name = "colorama"
version = "0.4.6"
@ -705,6 +764,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/a5/33cf000137545a08b0a3a6ea76c8ccbd87917f78bb5d737f9f56f3b11ef6/datasets-3.1.0-py3-none-any.whl", hash = "sha256:dc8808a6d17838fe05e13b39aa7ac3ea0fd0806ed7004eaf4d4eb2c2a356bc61", size = 480554 },
]
[[package]]
name = "debugpy"
version = "1.8.12"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/68/25/c74e337134edf55c4dfc9af579eccb45af2393c40960e2795a94351e8140/debugpy-1.8.12.tar.gz", hash = "sha256:646530b04f45c830ceae8e491ca1c9320a2d2f0efea3141487c82130aba70dce", size = 1641122 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/56/19/dd58334c0a1ec07babf80bf29fb8daf1a7ca4c1a3bbe61548e40616ac087/debugpy-1.8.12-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:a2ba7ffe58efeae5b8fad1165357edfe01464f9aef25e814e891ec690e7dd82a", size = 2076091 },
{ url = "https://files.pythonhosted.org/packages/4c/37/bde1737da15f9617d11ab7b8d5267165f1b7dae116b2585a6643e89e1fa2/debugpy-1.8.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbbd4149c4fc5e7d508ece083e78c17442ee13b0e69bfa6bd63003e486770f45", size = 3560717 },
{ url = "https://files.pythonhosted.org/packages/d9/ca/bc67f5a36a7de072908bc9e1156c0f0b272a9a2224cf21540ab1ffd71a1f/debugpy-1.8.12-cp310-cp310-win32.whl", hash = "sha256:b202f591204023b3ce62ff9a47baa555dc00bb092219abf5caf0e3718ac20e7c", size = 5180672 },
{ url = "https://files.pythonhosted.org/packages/c1/b9/e899c0a80dfa674dbc992f36f2b1453cd1ee879143cdb455bc04fce999da/debugpy-1.8.12-cp310-cp310-win_amd64.whl", hash = "sha256:9649eced17a98ce816756ce50433b2dd85dfa7bc92ceb60579d68c053f98dff9", size = 5212702 },
{ url = "https://files.pythonhosted.org/packages/af/9f/5b8af282253615296264d4ef62d14a8686f0dcdebb31a669374e22fff0a4/debugpy-1.8.12-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:36f4829839ef0afdfdd208bb54f4c3d0eea86106d719811681a8627ae2e53dd5", size = 2174643 },
{ url = "https://files.pythonhosted.org/packages/ef/31/f9274dcd3b0f9f7d1e60373c3fa4696a585c55acb30729d313bb9d3bcbd1/debugpy-1.8.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a28ed481d530e3138553be60991d2d61103ce6da254e51547b79549675f539b7", size = 3133457 },
{ url = "https://files.pythonhosted.org/packages/ab/ca/6ee59e9892e424477e0c76e3798046f1fd1288040b927319c7a7b0baa484/debugpy-1.8.12-cp311-cp311-win32.whl", hash = "sha256:4ad9a94d8f5c9b954e0e3b137cc64ef3f579d0df3c3698fe9c3734ee397e4abb", size = 5106220 },
{ url = "https://files.pythonhosted.org/packages/d5/1a/8ab508ab05ede8a4eae3b139bbc06ea3ca6234f9e8c02713a044f253be5e/debugpy-1.8.12-cp311-cp311-win_amd64.whl", hash = "sha256:4703575b78dd697b294f8c65588dc86874ed787b7348c65da70cfc885efdf1e1", size = 5130481 },
{ url = "https://files.pythonhosted.org/packages/ba/e6/0f876ecfe5831ebe4762b19214364753c8bc2b357d28c5d739a1e88325c7/debugpy-1.8.12-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:7e94b643b19e8feb5215fa508aee531387494bf668b2eca27fa769ea11d9f498", size = 2500846 },
{ url = "https://files.pythonhosted.org/packages/19/64/33f41653a701f3cd2cbff8b41ebaad59885b3428b5afd0d93d16012ecf17/debugpy-1.8.12-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:086b32e233e89a2740c1615c2f775c34ae951508b28b308681dbbb87bba97d06", size = 4222181 },
{ url = "https://files.pythonhosted.org/packages/32/a6/02646cfe50bfacc9b71321c47dc19a46e35f4e0aceea227b6d205e900e34/debugpy-1.8.12-cp312-cp312-win32.whl", hash = "sha256:2ae5df899732a6051b49ea2632a9ea67f929604fd2b036613a9f12bc3163b92d", size = 5227017 },
{ url = "https://files.pythonhosted.org/packages/da/a6/10056431b5c47103474312cf4a2ec1001f73e0b63b1216706d5fef2531eb/debugpy-1.8.12-cp312-cp312-win_amd64.whl", hash = "sha256:39dfbb6fa09f12fae32639e3286112fc35ae976114f1f3d37375f3130a820969", size = 5267555 },
{ url = "https://files.pythonhosted.org/packages/cf/4d/7c3896619a8791effd5d8c31f0834471fc8f8fb3047ec4f5fc69dd1393dd/debugpy-1.8.12-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:696d8ae4dff4cbd06bf6b10d671e088b66669f110c7c4e18a44c43cf75ce966f", size = 2485246 },
{ url = "https://files.pythonhosted.org/packages/99/46/bc6dcfd7eb8cc969a5716d858e32485eb40c72c6a8dc88d1e3a4d5e95813/debugpy-1.8.12-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:898fba72b81a654e74412a67c7e0a81e89723cfe2a3ea6fcd3feaa3395138ca9", size = 4218616 },
{ url = "https://files.pythonhosted.org/packages/03/dd/d7fcdf0381a9b8094da1f6a1c9f19fed493a4f8576a2682349b3a8b20ec7/debugpy-1.8.12-cp313-cp313-win32.whl", hash = "sha256:22a11c493c70413a01ed03f01c3c3a2fc4478fc6ee186e340487b2edcd6f4180", size = 5226540 },
{ url = "https://files.pythonhosted.org/packages/25/bd/ecb98f5b5fc7ea0bfbb3c355bc1dd57c198a28780beadd1e19915bf7b4d9/debugpy-1.8.12-cp313-cp313-win_amd64.whl", hash = "sha256:fdb3c6d342825ea10b90e43d7f20f01535a72b3a1997850c0c3cefa5c27a4a2c", size = 5267134 },
{ url = "https://files.pythonhosted.org/packages/38/c4/5120ad36405c3008f451f94b8f92ef1805b1e516f6ff870f331ccb3c4cc0/debugpy-1.8.12-py2.py3-none-any.whl", hash = "sha256:274b6a2040349b5c9864e475284bce5bb062e63dce368a394b8cc865ae3b00c6", size = 5229490 },
]
[[package]]
name = "deprecation"
version = "2.1.0"
@ -1999,6 +2083,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 },
]
[[package]]
name = "neo4j"
version = "5.28.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytz" },
]
sdist = { url = "https://files.pythonhosted.org/packages/4b/20/733dac16f7cedc80b23093415822c9763302519cba0e7c8bcdb5c01fc512/neo4j-5.28.1.tar.gz", hash = "sha256:ae8e37a1d895099062c75bc359b2cce62099baac7be768d0eba7180c1298e214", size = 231094 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6a/57/94225fe5e9dabdc0ff60c88cbfcedf11277f4b34e7ab1373d3e62dbdd207/neo4j-5.28.1-py3-none-any.whl", hash = "sha256:6755ef9e5f4e14b403aef1138fb6315b120631a0075c138b5ddb2a06b87b09fd", size = 312258 },
]
[[package]]
name = "nest-asyncio"
version = "1.6.0"
@ -2342,6 +2438,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7a/8a/166625d30f927e800e99f3f6556d8b3f4ad952c62d6a774844d73542b84b/pendulum-3.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:314c4038dc5e6a52991570f50edb2f08c339debdf8cea68ac355b32c4174e820", size = 293657 },
]
[[package]]
name = "pgvector"
version = "0.3.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7d/d8/fd6009cee3e03214667df488cdcf9609461d729968da94e4f95d6359d304/pgvector-0.3.6.tar.gz", hash = "sha256:31d01690e6ea26cea8a633cde5f0f55f5b246d9c8292d68efdef8c22ec994ade", size = 25421 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/81/f457d6d361e04d061bef413749a6e1ab04d98cfeec6d8abcfe40184750f3/pgvector-0.3.6-py3-none-any.whl", hash = "sha256:f6c269b3c110ccb7496bac87202148ed18f34b390a0189c783e351062400a75a", size = 24880 },
]
[[package]]
name = "pillow"
version = "10.4.0"
@ -2531,6 +2639,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fd/b2/ab07b09e0f6d143dfb839693aa05765257bceaa13d03bf1a696b78323e7a/protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f", size = 172550 },
]
[[package]]
name = "psycopg2"
version = "2.9.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/62/51/2007ea29e605957a17ac6357115d0c1a1b60c8c984951c19419b3474cdfd/psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11", size = 385672 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0a/a9/146b6bdc0d33539a359f5e134ee6dda9173fb8121c5b96af33fa299e50c4/psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716", size = 1024527 },
{ url = "https://files.pythonhosted.org/packages/47/50/c509e56f725fd2572b59b69bd964edaf064deebf1c896b2452f6b46fdfb3/psycopg2-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:c6f7b8561225f9e711a9c47087388a97fdc948211c10a4bccbf0ba68ab7b3b5a", size = 1163735 },
{ url = "https://files.pythonhosted.org/packages/20/a2/c51ca3e667c34e7852157b665e3d49418e68182081060231d514dd823225/psycopg2-2.9.10-cp311-cp311-win32.whl", hash = "sha256:47c4f9875125344f4c2b870e41b6aad585901318068acd01de93f3677a6522c2", size = 1024538 },
{ url = "https://files.pythonhosted.org/packages/33/39/5a9a229bb5414abeb86e33b8fc8143ab0aecce5a7f698a53e31367d30caa/psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4", size = 1163736 },
{ url = "https://files.pythonhosted.org/packages/3d/16/4623fad6076448df21c1a870c93a9774ad8a7b4dd1660223b59082dd8fec/psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067", size = 1025113 },
{ url = "https://files.pythonhosted.org/packages/66/de/baed128ae0fc07460d9399d82e631ea31a1f171c0c4ae18f9808ac6759e3/psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e", size = 1163951 },
{ url = "https://files.pythonhosted.org/packages/ae/49/a6cfc94a9c483b1fa401fbcb23aca7892f60c7269c5ffa2ac408364f80dc/psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2", size = 2569060 },
]
[[package]]
name = "pwdlib"
version = "0.2.1"
@ -2584,6 +2707,16 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/bd/67/16d48e7f02b285b39028aa47f847b3a279c903bc5cd49c8012ea90255317/py_rust_stemmers-0.1.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fbb9f7933239a57d1d9c0fcdfbe0c5283a081e9e64ddc48ed878783be3d52b2b", size = 567278 },
{ url = "https://files.pythonhosted.org/packages/ad/1c/cb8cc9680f8aa04f96cb5c814887b3bb8d23a2e9abf460ef861ae16bfe50/py_rust_stemmers-0.1.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:921803a6f8259f10bf348ac0e32a767c28ab587c9ad5c3b1ee593a4bbbe98d39", size = 488907 },
{ url = "https://files.pythonhosted.org/packages/cd/29/88217de06239e3e526fa6286a11e3662d94acb0be4216c1310301a252dab/py_rust_stemmers-0.1.3-cp312-none-win_amd64.whl", hash = "sha256:576206b540575e81bb84a0f620b7a8529f5e89b0b2ec7d4487f3183789dd5cfd", size = 208980 },
{ url = "https://files.pythonhosted.org/packages/74/62/ab1492a3d6b7c724443f1f964ff986c5d64a5d97a880dcc4de9475815c00/py_rust_stemmers-0.1.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ab7b6cc01df4013bd2e766ea4c367922bff4612dd36ec4a8aa8125cb384c5dac", size = 286055 },
{ url = "https://files.pythonhosted.org/packages/f5/dd/35ec95708df96831382df12184ef51b2a3f4db7c5fbed4d0d88e9a83ea49/py_rust_stemmers-0.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d39a18641cfa6ff6678ea538d64926c1612eb6ddce9a90a61694f383743c0257", size = 271966 },
{ url = "https://files.pythonhosted.org/packages/1d/3e/676726ab5fdd9d47ea6c8f0bbceebffec7a5d3837c71fc869ecce68faa2b/py_rust_stemmers-0.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ca50cef25d31e6ea200791f28976ee9500ef61fc91101343877b3d38fe3207a", size = 310515 },
{ url = "https://files.pythonhosted.org/packages/d2/d6/1722299d74959267d6d77fdfde7fc13aeacd0845265694fa65f358ed4a68/py_rust_stemmers-0.1.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a5d1a885830c5d94d36f74c0a2017225401f10e64f011e37e7b171ea84c17eb8", size = 315183 },
{ url = "https://files.pythonhosted.org/packages/bf/5c/279d420618d6dea0b00d40805e08418146c8af3c53db74345abf77f32551/py_rust_stemmers-0.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bb25a58552c058530d69d119fc310dfa27e585dd7a4be6b8f739bd209c29164", size = 324424 },
{ url = "https://files.pythonhosted.org/packages/7f/85/4e6e62c94c3cad7f2ef861300fb277c8b9cc89b1bcc2aeeb0c67db20a83a/py_rust_stemmers-0.1.3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8016d3e7c43b1a93ac06e9c4d68f77c4f8d6beec6984b4e86438406a0b589d48", size = 324779 },
{ url = "https://files.pythonhosted.org/packages/c9/b9/1079fd911d82f0ca1a34d613c5849ea33dcf373d3a0f18355a0f784420f4/py_rust_stemmers-0.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:846a16e43d8e12d3178d608f82dcbddc0fd03c4478cde9adc377de58a769b825", size = 487976 },
{ url = "https://files.pythonhosted.org/packages/80/17/5c52ad2b7cc3dbeb50aa1485372442989cb4e753e6c40476b174f38cb117/py_rust_stemmers-0.1.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9931ef64c9f2ace96f533092f5161a97bbf867ec5f1a9cb139838a6cf52da4c4", size = 575572 },
{ url = "https://files.pythonhosted.org/packages/9d/13/b019d8c0e8006702d0845b6bc7f9f0d100051a7936bc35d7f982117852f7/py_rust_stemmers-0.1.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aa1ee56ae903f126598f237b45f316b2704ec29a85ad1d27467bf6a5b27c71b9", size = 493259 },
{ url = "https://files.pythonhosted.org/packages/79/9f/36a004b3925dc4a61c44968c6be009ba03dd62827f8d1490cd91f9c3c506/py_rust_stemmers-0.1.3-cp313-none-win_amd64.whl", hash = "sha256:2837fc5a60eb0fa2cefc6e41f5fcfb9ff350cd3cdbed25d34a1bc36057d29397", size = 209418 },
{ url = "https://files.pythonhosted.org/packages/f1/45/e1ec9e76b4462e70fa42f6ac8be9f1bfe6565c1c260b9e5824e772157edf/py_rust_stemmers-0.1.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:59eacf7687738b20886a7c0ceeae999d501902b4e6234cf11eecd2f45f2c26bb", size = 288041 },
{ url = "https://files.pythonhosted.org/packages/4a/5b/eb594ca68715c23dd3b8f52dd700c10cbdd8133faaaf19886962c8f97c90/py_rust_stemmers-0.1.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:e39d5d273e13aec2f07a2c3ea0050b3bf3aaa7b6e9f6bef3d4e728ab49979ae8", size = 274089 },
{ url = "https://files.pythonhosted.org/packages/79/55/b62b14cdeb7268a818f21e4c8cfd543261c563dc9bd89ba7116293ce3008/py_rust_stemmers-0.1.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f95b25138431c4a457d684c49c6de5ff0c1852cf1cb3657e187ea63610fc7c21", size = 310373 },

View file

@ -2,6 +2,8 @@ import asyncio
import logging
from uuid import NAMESPACE_OID, uuid5
from cognee.api.v1.search.search_v2 import search
from cognee.api.v1.search import SearchType
from cognee.base_config import get_base_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.modules.pipelines import run_tasks
@ -42,7 +44,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
cognee_config = get_cognify_config()
user = await get_default_user()
detailed_extraction = False
detailed_extraction = True
tasks = [
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),
@ -50,7 +52,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
# Task(expand_dependency_graph, task_config={"batch_size": 50}),
# Task(get_source_code_chunks, task_config={"batch_size": 50}),
# Task(summarize_code, task_config={"batch_size": 50}),
Task(add_data_points, task_config={"batch_size": 100 if detailed_extraction else 500}),
Task(add_data_points, task_config={"batch_size": 500}),
]
if include_docs:
@ -84,9 +86,17 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
if __name__ == "__main__":
async def main():
async for data_points in run_code_graph_pipeline("REPO_PATH"):
async for data_points in run_code_graph_pipeline("YOUR_REPO_PATH"):
print(data_points)
await render_graph()
search_results = await search(
query_type=SearchType.CODE,
query_text="How is Relationship weight calculated?",
)
for file in search_results:
print(file.filename)
asyncio.run(main())

View file

@ -0,0 +1,22 @@
You are a professional file name and python code extracting expert.
Extract file names and corresponding code pieces from text while preserving formatting and structure.
### Instructions:
1. **Identify File Names:** Extract filenames from inline text, headers, or markdown formatting. Empty list of filenames is completely normal.
2. **Extract Code:** Extract code pieces that are in the text (do not add additional content) and maintain their indentation and formatting. Empty list of code pieces is completely normal
3. **Ensure Accuracy:** Avoid extraneous text, merge related snippets, and support multiple programming languages.
4. **Keep content:** Avoid additional files and code pieces that are not in the text make sure everything you extract as a code is actually a code and not a part of a sentence.
5. **Ensure relevancy:** Make sure that the extracted codepiece is not just one or two lines but a meaningful python code, extract classes and functions in one piece
Examples:
1.
query: 'I want to change the test1.py file and want to add a print statement at the end'
files: ['test1.py']
codepieces: ""
2.
query: 'print('Hello World') doesn't work in the test2.py file. What are the changes I have to do there?
files: ["test2.py"]
codepieces: "print(\'Hello World\')"

View file

@ -1,42 +1,128 @@
from cognee.low_level import DataPoint
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
from .brute_force_triplet_search import brute_force_triplet_search
import asyncio
import aiofiles
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
from typing import List, Dict, Any
from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt
async def code_graph_retrieval(query: str) -> dict[str, str]:
subclasses = get_all_subclasses(DataPoint)
class CodeQueryInfo(BaseModel):
"""Response model for information extraction from the query"""
vector_index_collections = []
filenames: List[str] = []
sourcecode: str
for subclass in subclasses:
index_fields = subclass.model_fields["metadata"].default.get("index_fields", [])
for field_name in index_fields:
vector_index_collections.append(f"{subclass.__name__}_{field_name}")
found_triplets = await brute_force_triplet_search(
query,
top_k=5,
collections=vector_index_collections or None,
properties_to_project=["id", "file_path", "source_code"],
async def code_graph_retrieval(query: str) -> list[dict[str, Any]]:
if not query or not isinstance(query, str):
raise ValueError("The query must be a non-empty string.")
file_name_collections = ["CodeFile_name"]
classes_and_functions_collections = [
"ClassDefinition_source_code",
"FunctionDefinition_source_code",
]
try:
vector_engine = get_vector_engine()
graph_engine = await get_graph_engine()
except Exception as e:
raise RuntimeError("Database initialization error in code_graph_retriever, ") from e
system_prompt = read_query_prompt("codegraph_retriever_system.txt")
llm_client = get_llm_client()
try:
files_and_codeparts = await llm_client.acreate_structured_output(
text_input=query,
system_prompt=system_prompt,
response_model=CodeQueryInfo,
)
except Exception as e:
raise RuntimeError("Failed to retrieve structured output from LLM") from e
similar_filenames = []
similar_codepieces = []
if not files_and_codeparts.filenames or not files_and_codeparts.sourcecode:
for collection in file_name_collections:
search_results_file = await vector_engine.search(collection, query, limit=3)
for res in search_results_file:
similar_filenames.append({"id": res.id, "score": res.score, "payload": res.payload})
for collection in classes_and_functions_collections:
search_results_code = await vector_engine.search(collection, query, limit=3)
for res in search_results_code:
similar_codepieces.append(
{"id": res.id, "score": res.score, "payload": res.payload}
)
else:
for collection in file_name_collections:
for file_from_query in files_and_codeparts.filenames:
search_results_file = await vector_engine.search(
collection, file_from_query, limit=3
)
for res in search_results_file:
similar_filenames.append(
{"id": res.id, "score": res.score, "payload": res.payload}
)
for collection in classes_and_functions_collections:
for code_from_query in files_and_codeparts.sourcecode:
search_results_code = await vector_engine.search(
collection, code_from_query, limit=3
)
for res in search_results_code:
similar_codepieces.append(
{"id": res.id, "score": res.score, "payload": res.payload}
)
file_ids = [str(item["id"]) for item in similar_filenames]
code_ids = [str(item["id"]) for item in similar_codepieces]
relevant_triplets = await asyncio.gather(
*[graph_engine.get_connections(node_id) for node_id in code_ids + file_ids]
)
paths = set()
for sublist in relevant_triplets:
for tpl in sublist:
if isinstance(tpl, tuple) and len(tpl) >= 3:
if "file_path" in tpl[0]:
paths.add(tpl[0]["file_path"])
if "file_path" in tpl[2]: # Third tuple element
paths.add(tpl[2]["file_path"])
retrieved_files = {}
for triplet in found_triplets:
if triplet.node1.attributes["source_code"]:
retrieved_files[triplet.node1.attributes["file_path"]] = triplet.node1.attributes[
"source_code"
]
if triplet.node2.attributes["source_code"]:
retrieved_files[triplet.node2.attributes["file_path"]] = triplet.node2.attributes[
"source_code"
]
read_tasks = []
for file_path in paths:
return [
async def read_file(fp):
try:
async with aiofiles.open(fp, "r", encoding="utf-8") as f:
retrieved_files[fp] = await f.read()
except Exception as e:
print(f"Error reading {fp}: {e}")
retrieved_files[fp] = ""
read_tasks.append(read_file(file_path))
await asyncio.gather(*read_tasks)
result = [
{
"name": file_path,
"description": file_path,
"content": source_code,
"content": retrieved_files[file_path],
}
for file_path, source_code in retrieved_files.items()
for file_path in paths
]
return result

View file

@ -8,11 +8,11 @@ class Repository(DataPoint):
class ImportStatement(DataPoint):
name: str
module: str
start_point: tuple
end_point: tuple
source_code: str
file_path: Optional[str] = None
metadata: dict = {"index_fields": ["name", "source_code"]}
class FunctionDefinition(DataPoint):
@ -21,7 +21,7 @@ class FunctionDefinition(DataPoint):
end_point: tuple
source_code: str
file_path: Optional[str] = None
metadata: dict = {"index_fields": ["name", "source_code"]}
metadata: dict = {"index_fields": ["source_code"]}
class ClassDefinition(DataPoint):
@ -30,17 +30,18 @@ class ClassDefinition(DataPoint):
end_point: tuple
source_code: str
file_path: Optional[str] = None
metadata: dict = {"index_fields": ["name", "source_code"]}
metadata: dict = {"index_fields": ["source_code"]}
class CodeFile(DataPoint):
name: str
file_path: str
source_code: Optional[str] = None
part_of: Optional[Repository] = None
depends_on: Optional[List["ImportStatement"]] = []
provides_function_definition: Optional[List["FunctionDefinition"]] = []
provides_class_definition: Optional[List["ClassDefinition"]] = []
metadata: dict = {"index_fields": ["source_code"]}
metadata: dict = {"index_fields": ["name"]}
class CodePart(DataPoint):

View file

@ -1,7 +1,9 @@
from typing import AsyncGenerator
import os
import importlib
from typing import AsyncGenerator, Optional
from uuid import NAMESPACE_OID, uuid5
import tree_sitter_python as tspython
from tree_sitter import Language, Node, Parser
from tree_sitter import Language, Node, Parser, Tree
import aiofiles
@ -21,6 +23,19 @@ PY_LANGUAGE = Language(tspython.language())
source_code_parser = Parser(PY_LANGUAGE)
class FileParser:
def __init__(self):
self.parsed_files = {}
async def parse_file(self, file_path: str) -> tuple[str, Tree]:
if file_path not in self.parsed_files:
source_code = await get_source_code(file_path)
source_code_tree = source_code_parser.parse(bytes(source_code, "utf-8"))
self.parsed_files[file_path] = (source_code, source_code_tree)
return self.parsed_files[file_path]
async def get_source_code(file_path: str):
try:
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
@ -31,31 +46,63 @@ async def get_source_code(file_path: str):
return None
def resolve_module_path(module_name):
"""Find the file path of a module."""
try:
spec = importlib.util.find_spec(module_name)
if spec and spec.origin:
return spec.origin
except ModuleNotFoundError:
return None
return None
def find_function_location(
module_path: str, function_name: str, parser: FileParser
) -> Optional[tuple[str, str]]:
"""Find the function definition in the module."""
if not module_path or not os.path.exists(module_path):
return None
source_code, tree = parser.parse_file(module_path)
root_node: Node = tree.root_node
for node in root_node.children:
if node.type == "function_definition":
func_name_node = node.child_by_field_name("name")
if func_name_node and func_name_node.text.decode() == function_name:
return (module_path, node.start_point) # (line, column)
return None
async def get_local_script_dependencies(
repo_path: str, script_path: str, detailed_extraction: bool = False
) -> CodeFile:
source_code = await get_source_code(script_path)
code_file_parser = FileParser()
source_code, source_code_tree = await code_file_parser.parse_file(script_path)
relative_file_path = script_path[len(repo_path) + 1 :]
file_path_relative_to_repo = script_path[len(repo_path) + 1 :]
if not detailed_extraction:
code_file_node = CodeFile(
id=uuid5(NAMESPACE_OID, script_path),
name=file_path_relative_to_repo,
source_code=source_code,
file_path=relative_file_path,
file_path=script_path,
)
return code_file_node
code_file_node = CodeFile(
id=uuid5(NAMESPACE_OID, script_path),
name=file_path_relative_to_repo,
source_code=None,
file_path=relative_file_path,
file_path=script_path,
)
source_code_tree = source_code_parser.parse(bytes(source_code, "utf-8"))
async for part in extract_code_parts(source_code_tree.root_node):
part.file_path = relative_file_path
async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):
part.file_path = script_path
if isinstance(part, FunctionDefinition):
code_file_node.provides_function_definition.append(part)
@ -75,42 +122,81 @@ def find_node(nodes: list[Node], condition: callable) -> Node:
return None
async def extract_code_parts(tree_root: Node) -> AsyncGenerator[DataPoint, None]:
async def extract_code_parts(
tree_root: Node, script_path: str, existing_nodes: list[DataPoint] = {}
) -> AsyncGenerator[DataPoint, None]:
for child_node in tree_root.children:
if child_node.type == "import_statement":
module_node = child_node.children[1]
yield ImportStatement(
name=module_node.text,
start_point=child_node.start_point,
end_point=child_node.end_point,
source_code=child_node.text,
)
if child_node.type == "import_statement" or child_node.type == "import_from_statement":
parts = child_node.text.decode("utf-8").split()
if child_node.type == "import_from_statement":
module_node = child_node.children[1]
yield ImportStatement(
name=module_node.text,
if parts[0] == "import":
module_name = parts[1]
function_name = None
elif parts[0] == "from":
module_name = parts[1]
function_name = parts[3]
if " as " in function_name:
function_name = function_name.split(" as ")[0]
if " as " in module_name:
module_name = module_name.split(" as ")[0]
if function_name and "import " + function_name not in existing_nodes:
import_statement_node = ImportStatement(
name=function_name,
module=module_name,
start_point=child_node.start_point,
end_point=child_node.end_point,
file_path=script_path,
source_code=child_node.text,
)
existing_nodes["import " + function_name] = import_statement_node
if function_name:
yield existing_nodes["import " + function_name]
if module_name not in existing_nodes:
import_statement_node = ImportStatement(
name=module_name,
module=module_name,
start_point=child_node.start_point,
end_point=child_node.end_point,
file_path=script_path,
source_code=child_node.text,
)
existing_nodes[module_name] = import_statement_node
yield existing_nodes[module_name]
if child_node.type == "function_definition":
function_name_node = find_node(
child_node.children, lambda node: node.type == "identifier"
)
yield FunctionDefinition(
name=function_name_node.text,
function_node = find_node(child_node.children, lambda node: node.type == "identifier")
function_node_name = function_node.text
if function_node_name not in existing_nodes:
function_definition_node = FunctionDefinition(
name=function_node_name,
start_point=child_node.start_point,
end_point=child_node.end_point,
file_path=script_path,
source_code=child_node.text,
)
existing_nodes[function_node_name] = function_definition_node
yield existing_nodes[function_node_name]
if child_node.type == "class_definition":
class_name_node = find_node(child_node.children, lambda node: node.type == "identifier")
yield ClassDefinition(
name=class_name_node.text,
class_name_node_name = class_name_node.text
if class_name_node_name not in existing_nodes:
class_definition_node = ClassDefinition(
name=class_name_node_name,
start_point=child_node.start_point,
end_point=child_node.end_point,
file_path=script_path,
source_code=child_node.text,
)
existing_nodes[class_name_node_name] = class_definition_node
yield existing_nodes[class_name_node_name]

View file

@ -20,7 +20,12 @@ async def get_source_code_files(repo_path):
os.path.join(root, file)
for root, _, files in os.walk(repo_path)
for file in files
if file.endswith(".py")
if (
file.endswith(".py")
and not file.startswith("test_")
and not file.endswith("_test")
and ".venv" not in file
)
)
source_code_files = set()
@ -74,7 +79,7 @@ async def get_repo_file_dependencies(
# with ProcessPoolExecutor(max_workers=12) as executor:
tasks = [
get_local_script_dependencies(repo_path, file_path, detailed_extraction)
for file_path in source_code_files[start_range:end_range]
for file_path in source_code_files[start_range : end_range + 1]
]
results: list[CodeFile] = await asyncio.gather(*tasks)

View file

@ -1,35 +0,0 @@
#!/bin/bash
# export ENVIRONMENT
echo "Debug mode: $DEBUG"
echo "Environment: $ENVIRONMENT"
if [ "$ENVIRONMENT" != "local" ]; then
echo "Running fetch_secret.py"
PYTHONPATH=. python cognee/fetch_secret.py
if [ $? -ne 0 ]; then
echo "Error: fetch_secret.py failed"
exit 1
fi
else
echo '"local" environment is active, skipping fetch_secret.py'
fi
echo "Creating database..."
#
#PYTHONPATH=. python cognee/setup_database.py
#if [ $? -ne 0 ]; then
# echo "Error: setup_database.py failed"
# exit 1
#fi
echo "Starting Gunicorn"
if [ "$DEBUG" = true ]; then
echo "Waiting for the debugger to attach..."
python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug cognee.api.client:app
else
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug cognee.api.client:app
fi

View file

@ -31,7 +31,7 @@ sleep 2
# Modified Gunicorn startup with error handling
if [ "$ENVIRONMENT" = "dev" ]; then
if [ "$DEBUG" = true ]; then
if [ "$DEBUG" = "true" ]; then
echo "Waiting for the debugger to attach..."
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
else