feat: add MCP check status tool [COG-1784] (#793)

<!-- .github/pull_request_template.md -->

## Description
Added tools to check current cognify and codify status

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-05-13 12:09:14 -04:00 committed by GitHub
parent 13bb244746
commit 966e337500
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 3770 additions and 8249 deletions

View file

@ -1,12 +1,14 @@
[project]
name = "cognee-mcp"
version = "0.2.3"
version = "0.3.0"
description = "A MCP server project"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"cognee[postgres,codegraph,gemini,huggingface]==0.1.39",
# For local cognee repo usage remove comment bellow and add absolute path to cognee
#"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
"cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
"fastmcp>=1.0",
"mcp==1.5.0",
"uv>=0.6.3",
@ -28,5 +30,8 @@ dev = [
"debugpy>=1.8.12",
]
[tool.hatch.metadata]
allow-direct-references = true
[project.scripts]
cognee = "src:main"

View file

@ -9,6 +9,9 @@ import importlib.util
from contextlib import redirect_stdout
import mcp.types as types
from mcp.server import FastMCP
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.users.methods import get_default_user
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
from cognee.modules.search.types import SearchType
from cognee.shared.data_models import KnowledgeGraph
@ -28,7 +31,6 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str
"""Build knowledge graph from the input text"""
# NOTE: MCP uses stdout to communicate, we must redirect all output
# going to stdout ( like the print function ) to stderr.
# As cognify is an async background job the output had to be redirected again.
with redirect_stdout(sys.stderr):
logger.info("Cognify process starting.")
if graph_model_file and graph_model_name:
@ -55,8 +57,8 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str
text = (
f"Background process launched due to MCP timeout limitations.\n"
f"Average completion time is around 4 minutes.\n"
f"For current cognify status you can check the log file at: {log_file}"
f"To check current cognify status use the cognify_status tool\n"
f"or check the log file at: {log_file}"
)
return [
@ -72,7 +74,6 @@ async def codify(repo_path: str) -> list:
async def codify_task(repo_path: str):
# NOTE: MCP uses stdout to communicate, we must redirect all output
# going to stdout ( like the print function ) to stderr.
# As codify is an async background job the output had to be redirected again.
with redirect_stdout(sys.stderr):
logger.info("Codify process starting.")
results = []
@ -88,8 +89,8 @@ async def codify(repo_path: str) -> list:
text = (
f"Background process launched due to MCP timeout limitations.\n"
f"Average completion time is around 4 minutes.\n"
f"For current codify status you can check the log file at: {log_file}"
f"To check current codify status use the codify_status tool\n"
f"or you can check the log file at: {log_file}"
)
return [
@ -138,6 +139,24 @@ async def prune():
return [types.TextContent(type="text", text="Pruned")]
@mcp.tool()
async def cognify_status():
"""Get status of cognify pipeline"""
with redirect_stdout(sys.stderr):
user = await get_default_user()
status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)])
return [types.TextContent(type="text", text=str(status))]
@mcp.tool()
async def codify_status():
"""Get status of codify pipeline"""
with redirect_stdout(sys.stderr):
user = await get_default_user()
status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)])
return [types.TextContent(type="text", text=str(status))]
def node_to_string(node):
node_data = ", ".join(
[f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]

4513
cognee-mcp/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,7 @@ from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.methods import get_default_user
from cognee.shared.data_models import KnowledgeGraph
from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.ingestion import ingest_data
from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
@ -64,7 +65,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
),
]
dataset_id = uuid5(NAMESPACE_OID, "codebase")
dataset_id = await get_unique_dataset_id("codebase", user)
if include_docs:
non_code_pipeline_run = run_tasks(

View file

@ -7,6 +7,7 @@ from .get_datasets import get_datasets
from .get_datasets_by_name import get_datasets_by_name
from .get_dataset_data import get_dataset_data
from .get_data import get_data
from .get_unique_dataset_id import get_unique_dataset_id
# Delete
from .delete_dataset import delete_dataset

View file

@ -4,8 +4,13 @@ from sqlalchemy import select
from sqlalchemy.orm import joinedload
from cognee.modules.data.models import Dataset
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.users.models import User
async def create_dataset(dataset_name: str, user: User, session: AsyncSession) -> Dataset:
owner_id = user.id
async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSession) -> Dataset:
dataset = (
await session.scalars(
select(Dataset)
@ -16,10 +21,9 @@ async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSessio
).first()
if dataset is None:
# Dataset id should be generated based on dataset_name and owner_id so multiple users can use the same dataset_name
dataset = Dataset(
id=uuid5(NAMESPACE_OID, f"{dataset_name}{str(owner_id)}"), name=dataset_name, data=[]
)
# Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name
dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user)
dataset = Dataset(id=dataset_id, name=dataset_name, data=[])
dataset.owner_id = owner_id
session.add(dataset)

View file

@ -0,0 +1,6 @@
from uuid import UUID, uuid5, NAMESPACE_OID
from cognee.modules.users.models import User
async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")

View file

@ -5,6 +5,7 @@ from uuid import uuid5, NAMESPACE_OID
from cognee.modules.data.methods import get_datasets, get_datasets_by_name
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.data.models import Data, Dataset
from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.models import PipelineRunStatus
@ -93,7 +94,7 @@ async def run_pipeline(
elif isinstance(dataset, str):
check_dataset_name(dataset)
# Generate id based on unique dataset_id formula
dataset_id = uuid5(NAMESPACE_OID, f"{dataset}{str(user.id)}")
dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user)
if not data:
data: list[Data] = await get_dataset_data(dataset_id=dataset_id)

View file

@ -104,7 +104,7 @@ async def ingest_data(
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
dataset = await create_dataset(dataset_name, user.id, session)
dataset = await create_dataset(dataset_name, user, session)
# Check to see if data should be updated
data_point = (

7
poetry.lock generated
View file

@ -8085,10 +8085,9 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
name = "pylance"
version = "0.22.0"
description = "python wrapper for Lance columnar format"
optional = true
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"dev\""
files = [
{file = "pylance-0.22.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2c0bb6bf7320e500f0f5948e5b23e4d70d9c84bba15a2db2e877be9637c4dc0c"},
{file = "pylance-0.22.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:341a8cbac762c1f446a05a1513dab1b7930f433a8331b08b0b89a975f3864f6a"},
@ -11963,7 +11962,7 @@ api = ["gunicorn", "uvicorn"]
chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
deepeval = ["deepeval"]
dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylance", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
docs = ["unstructured"]
evals = ["gdown", "plotly"]
falkordb = ["falkordb"]
@ -11988,4 +11987,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<=3.13"
content-hash = "94d75c451c7580381771508ddcf6cee599b196d97c36b8b5bb5a3b64146f3224"
content-hash = "9d2a3beda2b6c329e69319e51c28c90d2806d8a257cd971e990600e99c1d96bd"

View file

@ -57,6 +57,7 @@ dependencies = [
"sentry-sdk[fastapi]>=2.9.0,<3",
"structlog>=25.2.0,<26",
"onnxruntime<=1.21.1",
"pylance==0.22.0",
]
[project.optional-dependencies]
@ -122,7 +123,6 @@ dev = [
"ruff>=0.9.2,<1.0.0",
"tweepy==4.14.0",
"gitpython>=3.1.43,<4",
"pylance==0.22.0",
"mkdocs-material>=9.5.42,<10",
"mkdocs-minify-plugin>=0.8.0,<0.9",
"mkdocstrings[python]>=0.26.2,<0.27",

7428
uv.lock generated

File diff suppressed because it is too large Load diff