feat: add MCP check status tool [COG-1784] (#793)

## Description Added tools to check current cognify and codify status ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-05-13 12:09:14 -04:00 · 2025-05-13 12:09:14 -04:00 · 966e337500
commit 966e337500
parent 13bb244746
12 changed files with 3770 additions and 8249 deletions
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -1,12 +1,14 @@
 [project]
 name = "cognee-mcp"
-version = "0.2.3"
+version = "0.3.0"
 description = "A MCP server project"
 readme = "README.md"
 requires-python = ">=3.10"

 dependencies = [
-    "cognee[postgres,codegraph,gemini,huggingface]==0.1.39",
+    # For local cognee repo usage remove comment bellow and add absolute path to cognee
+    #"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
+    "cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
    "fastmcp>=1.0",
    "mcp==1.5.0",
    "uv>=0.6.3",
@ -28,5 +30,8 @@ dev = [
    "debugpy>=1.8.12",
 ]

+[tool.hatch.metadata]
+allow-direct-references = true
+
 [project.scripts]
 cognee = "src:main"
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -9,6 +9,9 @@ import importlib.util
 from contextlib import redirect_stdout
 import mcp.types as types
 from mcp.server import FastMCP
+from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
+from cognee.modules.users.methods import get_default_user
 from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
 from cognee.modules.search.types import SearchType
 from cognee.shared.data_models import KnowledgeGraph
@ -28,7 +31,6 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str
        """Build knowledge graph from the input text"""
        # NOTE: MCP uses stdout to communicate, we must redirect all output
        #       going to stdout ( like the print function ) to stderr.
-        #       As cognify is an async background job the output had to be redirected again.
        with redirect_stdout(sys.stderr):
            logger.info("Cognify process starting.")
            if graph_model_file and graph_model_name:
@ -55,8 +57,8 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str

    text = (
        f"Background process launched due to MCP timeout limitations.\n"
-        f"Average completion time is around 4 minutes.\n"
-        f"For current cognify status you can check the log file at: {log_file}"
+        f"To check current cognify status use the cognify_status tool\n"
+        f"or check the log file at: {log_file}"
    )

    return [
@ -72,7 +74,6 @@ async def codify(repo_path: str) -> list:
    async def codify_task(repo_path: str):
        # NOTE: MCP uses stdout to communicate, we must redirect all output
        #       going to stdout ( like the print function ) to stderr.
-        #       As codify is an async background job the output had to be redirected again.
        with redirect_stdout(sys.stderr):
            logger.info("Codify process starting.")
            results = []
@ -88,8 +89,8 @@ async def codify(repo_path: str) -> list:

    text = (
        f"Background process launched due to MCP timeout limitations.\n"
-        f"Average completion time is around 4 minutes.\n"
-        f"For current codify status you can check the log file at: {log_file}"
+        f"To check current codify status use the codify_status tool\n"
+        f"or you can check the log file at: {log_file}"
    )

    return [
@ -138,6 +139,24 @@ async def prune():
        return [types.TextContent(type="text", text="Pruned")]


+@mcp.tool()
+async def cognify_status():
+    """Get status of cognify pipeline"""
+    with redirect_stdout(sys.stderr):
+        user = await get_default_user()
+        status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)])
+        return [types.TextContent(type="text", text=str(status))]
+
+
+@mcp.tool()
+async def codify_status():
+    """Get status of codify pipeline"""
+    with redirect_stdout(sys.stderr):
+        user = await get_default_user()
+        status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)])
+        return [types.TextContent(type="text", text=str(status))]
+
+
 def node_to_string(node):
    node_data = ", ".join(
        [f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]
--- a/cognee-mcp/uv.lock
+++ b/cognee-mcp/uv.lock
--- a/cognee/api/v1/cognify/code_graph_pipeline.py
+++ b/cognee/api/v1/cognify/code_graph_pipeline.py
@ -13,6 +13,7 @@ from cognee.modules.pipelines.tasks.task import Task
 from cognee.modules.users.methods import get_default_user
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
 from cognee.tasks.graph import extract_graph_from_data
 from cognee.tasks.ingestion import ingest_data
 from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
@ -64,7 +65,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
            ),
        ]

-    dataset_id = uuid5(NAMESPACE_OID, "codebase")
+    dataset_id = await get_unique_dataset_id("codebase", user)

    if include_docs:
        non_code_pipeline_run = run_tasks(
--- a/cognee/modules/data/methods/init.py
+++ b/cognee/modules/data/methods/init.py
@ -7,6 +7,7 @@ from .get_datasets import get_datasets
 from .get_datasets_by_name import get_datasets_by_name
 from .get_dataset_data import get_dataset_data
 from .get_data import get_data
+from .get_unique_dataset_id import get_unique_dataset_id

 # Delete
 from .delete_dataset import delete_dataset
--- a/cognee/modules/data/methods/create_dataset.py
+++ b/cognee/modules/data/methods/create_dataset.py
@ -4,8 +4,13 @@ from sqlalchemy import select
 from sqlalchemy.orm import joinedload
 from cognee.modules.data.models import Dataset

+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
+from cognee.modules.users.models import User
+
+
+async def create_dataset(dataset_name: str, user: User, session: AsyncSession) -> Dataset:
+    owner_id = user.id

-async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSession) -> Dataset:
    dataset = (
        await session.scalars(
            select(Dataset)
@ -16,10 +21,9 @@ async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSessio
    ).first()

    if dataset is None:
-        # Dataset id should be generated based on dataset_name and owner_id so multiple users can use the same dataset_name
-        dataset = Dataset(
-            id=uuid5(NAMESPACE_OID, f"{dataset_name}{str(owner_id)}"), name=dataset_name, data=[]
-        )
+        # Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name
+        dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user)
+        dataset = Dataset(id=dataset_id, name=dataset_name, data=[])
        dataset.owner_id = owner_id

        session.add(dataset)
--- a/cognee/modules/data/methods/get_unique_dataset_id.py
+++ b/cognee/modules/data/methods/get_unique_dataset_id.py
@ -0,0 +1,6 @@
+from uuid import UUID, uuid5, NAMESPACE_OID
+from cognee.modules.users.models import User
+
+
+async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
+    return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
--- a/cognee/modules/pipelines/operations/pipeline.py
+++ b/cognee/modules/pipelines/operations/pipeline.py
@ -5,6 +5,7 @@ from uuid import uuid5, NAMESPACE_OID

 from cognee.modules.data.methods import get_datasets, get_datasets_by_name
 from cognee.modules.data.methods.get_dataset_data import get_dataset_data
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
 from cognee.modules.data.models import Data, Dataset
 from cognee.modules.pipelines.operations.run_tasks import run_tasks
 from cognee.modules.pipelines.models import PipelineRunStatus
@ -93,7 +94,7 @@ async def run_pipeline(
    elif isinstance(dataset, str):
        check_dataset_name(dataset)
        # Generate id based on unique dataset_id formula
-        dataset_id = uuid5(NAMESPACE_OID, f"{dataset}{str(user.id)}")
+        dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user)

    if not data:
        data: list[Data] = await get_dataset_data(dataset_id=dataset_id)
--- a/cognee/tasks/ingestion/ingest_data.py
+++ b/cognee/tasks/ingestion/ingest_data.py
@ -104,7 +104,7 @@ async def ingest_data(
                db_engine = get_relational_engine()

                async with db_engine.get_async_session() as session:
-                    dataset = await create_dataset(dataset_name, user.id, session)
+                    dataset = await create_dataset(dataset_name, user, session)

                    # Check to see if data should be updated
                    data_point = (
--- a/poetry.lock
+++ b/poetry.lock
@ -8085,10 +8085,9 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 name = "pylance"
 version = "0.22.0"
 description = "python wrapper for Lance columnar format"
-optional = true
+optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"dev\""
 files = [
    {file = "pylance-0.22.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2c0bb6bf7320e500f0f5948e5b23e4d70d9c84bba15a2db2e877be9637c4dc0c"},
    {file = "pylance-0.22.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:341a8cbac762c1f446a05a1513dab1b7930f433a8331b08b0b89a975f3864f6a"},
@ -11963,7 +11962,7 @@ api = ["gunicorn", "uvicorn"]
 chromadb = ["chromadb", "pypika"]
 codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
 deepeval = ["deepeval"]
-dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylance", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
+dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
 docs = ["unstructured"]
 evals = ["gdown", "plotly"]
 falkordb = ["falkordb"]
@ -11988,4 +11987,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "94d75c451c7580381771508ddcf6cee599b196d97c36b8b5bb5a3b64146f3224"
+content-hash = "9d2a3beda2b6c329e69319e51c28c90d2806d8a257cd971e990600e99c1d96bd"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -57,6 +57,7 @@ dependencies = [
    "sentry-sdk[fastapi]>=2.9.0,<3",
    "structlog>=25.2.0,<26",
    "onnxruntime<=1.21.1",
+    "pylance==0.22.0",
 ]

 [project.optional-dependencies]
@ -122,7 +123,6 @@ dev = [
    "ruff>=0.9.2,<1.0.0",
    "tweepy==4.14.0",
    "gitpython>=3.1.43,<4",
-    "pylance==0.22.0",
    "mkdocs-material>=9.5.42,<10",
    "mkdocs-minify-plugin>=0.8.0,<0.9",
    "mkdocstrings[python]>=0.26.2,<0.27",
--- a/uv.lock
+++ b/uv.lock