Merge branch 'dev' into refactor/refactor-cognee-mcp

refactor: remove client.py as part of the deprecation of coding assistance tools
refactor: remove deprecated tools from the Cognee MCP tools module
2025-11-05 13:28:21 +00:00 · 2025-10-31 17:17:04 +00:00 · 2025-10-31 17:16:38 +00:00 · 2025-10-31 17:16:38 +00:00 · 2025-10-31 17:16:38 +00:00 · 2025-10-31 17:16:38 +00:00
17 changed files with 767 additions and 1136 deletions
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -40,3 +40,11 @@ allow-direct-references = true
 [project.scripts]
 cognee = "src:main"
 cognee-mcp = "src:main_mcp"
+
+[tool.pyright]
+typeCheckingMode = "basic"
+reportMissingImports = "error"
+reportUndefinedVariable = "error"
+reportMissingModuleSource = "error"
+reportUnusedImport = "warning"
+reportUnusedVariable = "warning"
--- a/cognee-mcp/src/client.py
+++ b/cognee-mcp/src/client.py
@ -1,52 +0,0 @@
-from datetime import timedelta
-from mcp import ClientSession, StdioServerParameters
-from mcp.client.stdio import stdio_client
-
-# Create server parameters for stdio connection
-server_params = StdioServerParameters(
-    command="uv",  # Executable
-    args=["--directory", ".", "run", "cognee"],  # Optional command line arguments
-    env=None,  # Optional environment variables
-)
-
-text = """
-Artificial intelligence, or AI, is technology that enables computers
-and machines to simulate human intelligence and problem-solving
-capabilities.
-On its own or combined with other technologies (e.g., sensors,
-geolocation, robotics) AI can perform tasks that would otherwise
-require human intelligence or intervention. Digital assistants, GPS
-guidance, autonomous vehicles, and generative AI tools (like Open
-AI's Chat GPT) are just a few examples of AI in the daily news and
-our daily lives.
-As a field of computer science, artificial intelligence encompasses
-(and is often mentioned together with) machine learning and deep
-learning. These disciplines involve the development of AI
-algorithms, modeled after the decision-making processes of the human
-brain, that can ‘learn’ from available data and make increasingly
-more accurate classifications or predictions over time.
-"""
-
-
-async def run():
-    async with stdio_client(server_params) as (read, write):
-        async with ClientSession(read, write, timedelta(minutes=3)) as session:
-            await session.initialize()
-
-            toolResult = await session.list_tools()
-
-            toolResult = await session.call_tool("prune", arguments={})
-
-            toolResult = await session.call_tool("cognify", arguments={})
-
-            toolResult = await session.call_tool(
-                "search", arguments={"search_type": "GRAPH_COMPLETION"}
-            )
-
-            print(f"Cognify result: {toolResult.content}")
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(run())
--- a/cognee-mcp/src/clients/init.py
+++ b/cognee-mcp/src/clients/init.py
@ -0,0 +1,3 @@
+from .cognee_client import CogneeClient
+
+__all__ = ["CogneeClient"]
--- a/cognee-mcp/src/clients/cognee_client.py
+++ b/cognee-mcp/src/clients/cognee_client.py
@ -17,6 +17,7 @@ import json
 logger = get_logger()


+# TODO(daulet) COG-3311: I'm exploring OpenAPI json client generation for backend
 class CogneeClient:
    """
    Unified client for interacting with Cognee via direct calls or HTTP API.
--- a/cognee-mcp/src/codingagents/init.py
+++ b/cognee-mcp/src/codingagents/init.py
--- a/cognee-mcp/src/codingagents/coding_rule_associations.py
+++ b/cognee-mcp/src/codingagents/coding_rule_associations.py
@ -1,120 +0,0 @@
-from uuid import NAMESPACE_OID, uuid5
-
-from cognee.infrastructure.databases.graph import get_graph_engine
-from cognee.infrastructure.databases.vector import get_vector_engine
-
-from cognee.low_level import DataPoint
-from cognee.infrastructure.llm import LLMGateway
-from cognee.shared.logging_utils import get_logger
-from cognee.modules.engine.models import NodeSet
-from cognee.tasks.storage import add_data_points, index_graph_edges
-from typing import Optional, List, Any
-from pydantic import Field
-
-logger = get_logger("coding_rule_association")
-
-
-class Rule(DataPoint):
-    """A single developer rule extracted from text."""
-
-    text: str = Field(..., description="The coding rule associated with the conversation")
-    belongs_to_set: Optional[NodeSet] = None
-    metadata: dict = {"index_fields": ["rule"]}
-
-
-class RuleSet(DataPoint):
-    """Collection of parsed rules."""
-
-    rules: List[Rule] = Field(
-        ...,
-        description="List of developer rules extracted from the input text. Each rule represents a coding best practice or guideline.",
-    )
-
-
-async def get_existing_rules(rules_nodeset_name: str) -> str:
-    graph_engine = await get_graph_engine()
-    nodes_data, _ = await graph_engine.get_nodeset_subgraph(
-        node_type=NodeSet, node_name=[rules_nodeset_name]
-    )
-
-    existing_rules = [
-        item[1]["text"]
-        for item in nodes_data
-        if isinstance(item, tuple)
-        and len(item) == 2
-        and isinstance(item[1], dict)
-        and "text" in item[1]
-    ]
-
-    existing_rules = "\n".join(f"- {rule}" for rule in existing_rules)
-
-    return existing_rules
-
-
-async def get_origin_edges(data: str, rules: List[Rule]) -> list[Any]:
-    vector_engine = get_vector_engine()
-
-    origin_chunk = await vector_engine.search("DocumentChunk_text", data, limit=1)
-
-    try:
-        origin_id = origin_chunk[0].id
-    except (AttributeError, KeyError, TypeError, IndexError):
-        origin_id = None
-
-    relationships = []
-
-    if origin_id and isinstance(rules, (list, tuple)) and len(rules) > 0:
-        for rule in rules:
-            try:
-                rule_id = getattr(rule, "id", None)
-                if rule_id is not None:
-                    rel_name = "rule_associated_from"
-                    relationships.append(
-                        (
-                            rule_id,
-                            origin_id,
-                            rel_name,
-                            {
-                                "relationship_name": rel_name,
-                                "source_node_id": rule_id,
-                                "target_node_id": origin_id,
-                                "ontology_valid": False,
-                            },
-                        )
-                    )
-            except Exception as e:
-                logger.info(f"Warning: Skipping invalid rule due to error: {e}")
-    else:
-        logger.info("No valid origin_id or rules provided.")
-
-    return relationships
-
-
-async def add_rule_associations(data: str, rules_nodeset_name: str):
-    graph_engine = await get_graph_engine()
-    existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name)
-
-    user_context = {"chat": data, "rules": existing_rules}
-
-    user_prompt = LLMGateway.render_prompt(
-        "coding_rule_association_agent_user.txt", context=user_context
-    )
-    system_prompt = LLMGateway.render_prompt("coding_rule_association_agent_system.txt", context={})
-
-    rule_list = await LLMGateway.acreate_structured_output(
-        text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet
-    )
-
-    rules_nodeset = NodeSet(
-        id=uuid5(NAMESPACE_OID, name=rules_nodeset_name), name=rules_nodeset_name
-    )
-    for rule in rule_list.rules:
-        rule.belongs_to_set = rules_nodeset
-
-    edges_to_save = await get_origin_edges(data=data, rules=rule_list.rules)
-
-    await add_data_points(data_points=rule_list.rules)
-
-    if len(edges_to_save) > 0:
-        await graph_engine.add_edges(edges_to_save)
-        await index_graph_edges(edges_to_save)
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
--- a/cognee-mcp/src/shared/init.py
+++ b/cognee-mcp/src/shared/init.py
@ -0,0 +1,3 @@
+from . import context
+
+__all__ = ["context"]
--- a/cognee-mcp/src/shared/context.py
+++ b/cognee-mcp/src/shared/context.py
@ -0,0 +1,11 @@
+from typing import Optional
+
+from src.clients.cognee_client import CogneeClient
+
+cognee_client: Optional["CogneeClient"] = None
+
+
+def set_cognee_client(client: "CogneeClient") -> None:
+    """Set the global cognee client instance."""
+    global cognee_client
+    cognee_client = client
--- a/cognee-mcp/src/tools/init.py
+++ b/cognee-mcp/src/tools/init.py
@ -0,0 +1,17 @@
+"""Cognee MCP Tools - All tools for interacting with the Cognee knowledge graph."""
+
+from .cognify import cognify
+from .search import search
+from .list_data import list_data
+from .delete import delete
+from .prune import prune
+from .cognify_status import cognify_status
+
+__all__ = [
+    "cognify",
+    "search",
+    "list_data",
+    "delete",
+    "prune",
+    "cognify_status",
+]
--- a/cognee-mcp/src/tools/cognify.py
+++ b/cognee-mcp/src/tools/cognify.py
@ -0,0 +1,178 @@
+"""Tool for transforming data into a structured knowledge graph."""
+
+import sys
+import asyncio
+from contextlib import redirect_stdout
+import mcp.types as types
+from cognee.shared.logging_utils import get_logger, get_log_file_location
+
+from src.shared import context
+from .utils import load_class
+
+logger = get_logger()
+
+
+async def cognify(
+    data: str, graph_model_file: str = None, graph_model_name: str = None, custom_prompt: str = None
+) -> list:
+    """
+    Transform ingested data into a structured knowledge graph.
+
+    This is the core processing step in Cognee that converts raw text and documents
+    into an intelligent knowledge graph. It analyzes content, extracts entities and
+    relationships, and creates semantic connections for enhanced search and reasoning.
+
+    Prerequisites:
+        - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
+        - **Data Added**: Must have data previously added via `cognee.add()`
+        - **Vector Database**: Must be accessible for embeddings storage
+        - **Graph Database**: Must be accessible for relationship storage
+
+    Input Requirements:
+        - **Content Types**: Works with any text-extractable content including:
+            * Natural language documents
+            * Structured data (CSV, JSON)
+            * Code repositories
+            * Academic papers and technical documentation
+            * Mixed multimedia content (with text extraction)
+
+    Processing Pipeline:
+        1. **Document Classification**: Identifies document types and structures
+        2. **Permission Validation**: Ensures user has processing rights
+        3. **Text Chunking**: Breaks content into semantically meaningful segments
+        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
+        5. **Relationship Detection**: Discovers connections between entities
+        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
+        7. **Content Summarization**: Creates hierarchical summaries for navigation
+
+    Parameters
+    ----------
+    data : str
+        The data to be processed and transformed into structured knowledge.
+        This can include natural language, file location, or any text-based information
+        that should become part of the agent's memory.
+
+    graph_model_file : str, optional
+        Path to a custom schema file that defines the structure of the generated knowledge graph.
+        If provided, this file will be loaded using importlib to create a custom graph model.
+        Default is None, which uses Cognee's built-in KnowledgeGraph model.
+
+    graph_model_name : str, optional
+        Name of the class within the graph_model_file to instantiate as the graph model.
+        Required if graph_model_file is specified.
+        Default is None, which uses the default KnowledgeGraph class.
+
+    custom_prompt : str, optional
+        Custom prompt string to use for entity extraction and graph generation.
+        If provided, this prompt will be used instead of the default prompts for
+        knowledge graph extraction. The prompt should guide the LLM on how to
+        extract entities and relationships from the text content.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with information about the
+        background task launch and how to check its status.
+
+    Next Steps:
+        After successful cognify processing, use search functions to query the knowledge:
+
+        ```python
+        import cognee
+        from cognee import SearchType
+
+        # Process your data into knowledge graph
+        await cognee.cognify()
+
+        # Query for insights using different search types:
+
+        # 1. Natural language completion with graph context
+        insights = await cognee.search(
+            "What are the main themes?",
+            query_type=SearchType.GRAPH_COMPLETION
+        )
+
+        # 2. Get entity relationships and connections
+        relationships = await cognee.search(
+            "connections between concepts",
+            query_type=SearchType.GRAPH_COMPLETION
+        )
+
+        # 3. Find relevant document chunks
+        chunks = await cognee.search(
+            "specific topic",
+            query_type=SearchType.CHUNKS
+        )
+        ```
+
+    Environment Variables:
+        Required:
+        - LLM_API_KEY: API key for your LLM provider
+
+        Optional:
+        - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
+        - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
+        - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
+
+    Notes
+    -----
+    - The function launches a background task and returns immediately
+    - The actual cognify process may take significant time depending on text length
+    - Use the cognify_status tool to check the progress of the operation
+
+    """
+
+    async def cognify_task(
+        data: str,
+        graph_model_file: str = None,
+        graph_model_name: str = None,
+        custom_prompt: str = None,
+    ) -> str:
+        """Build knowledge graph from the input text"""
+        # NOTE: MCP uses stdout to communicate, we must redirect all output
+        #       going to stdout ( like the print function ) to stderr.
+        with redirect_stdout(sys.stderr):
+            logger.info("Cognify process starting.")
+
+            graph_model = None
+            if graph_model_file and graph_model_name:
+                if context.cognee_client.use_api:
+                    logger.warning("Custom graph models are not supported in API mode, ignoring.")
+                else:
+                    from cognee.shared.data_models import KnowledgeGraph
+
+                    graph_model = load_class(graph_model_file, graph_model_name)
+
+            await context.cognee_client.add(data)
+
+            try:
+                await context.cognee_client.cognify(
+                    custom_prompt=custom_prompt, graph_model=graph_model
+                )
+                logger.info("Cognify process finished.")
+            except Exception as e:
+                logger.error("Cognify process failed.")
+                raise ValueError(f"Failed to cognify: {str(e)}")
+
+    asyncio.create_task(
+        cognify_task(
+            data=data,
+            graph_model_file=graph_model_file,
+            graph_model_name=graph_model_name,
+            custom_prompt=custom_prompt,
+        )
+    )
+
+    log_file = get_log_file_location()
+    text = (
+        f"Background process launched due to MCP timeout limitations.\n"
+        f"To check current cognify status use the cognify_status tool\n"
+        f"or check the log file at: {log_file}"
+    )
+
+    return [
+        types.TextContent(
+            type="text",
+            text=text,
+        )
+    ]
--- a/cognee-mcp/src/tools/cognify_status.py
+++ b/cognee-mcp/src/tools/cognify_status.py
@ -0,0 +1,51 @@
+"""Tool for getting the status of the cognify pipeline."""
+
+import sys
+from contextlib import redirect_stdout
+import mcp.types as types
+from cognee.shared.logging_utils import get_logger
+
+from src.shared import context
+
+logger = get_logger()
+
+
+async def cognify_status():
+    """
+    Get the current status of the cognify pipeline.
+
+    This function retrieves information about current and recently completed cognify operations
+    in the main_dataset. It provides details on progress, success/failure status, and statistics
+    about the processed data.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with the status information as a string.
+        The status includes information about active and completed jobs for the cognify_pipeline.
+
+    Notes
+    -----
+    - The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset"
+    - Status information includes job progress, execution time, and completion status
+    - The status is returned in string format for easy reading
+    - This operation is not available in API mode
+    """
+    with redirect_stdout(sys.stderr):
+        try:
+            from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
+            from cognee.modules.users.methods import get_default_user
+
+            user = await get_default_user()
+            status = await context.cognee_client.get_pipeline_status(
+                [await get_unique_dataset_id("main_dataset", user)], "cognify_pipeline"
+            )
+            return [types.TextContent(type="text", text=str(status))]
+        except NotImplementedError:
+            error_msg = "❌ Pipeline status is not available in API mode"
+            logger.error(error_msg)
+            return [types.TextContent(type="text", text=error_msg)]
+        except Exception as e:
+            error_msg = f"❌ Failed to get cognify status: {str(e)}"
+            logger.error(error_msg)
+            return [types.TextContent(type="text", text=error_msg)]
--- a/cognee-mcp/src/tools/delete.py
+++ b/cognee-mcp/src/tools/delete.py
@ -0,0 +1,90 @@
+"""Tool for deleting specific data from a dataset."""
+
+import sys
+import json
+from uuid import UUID
+from contextlib import redirect_stdout
+import mcp.types as types
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.storage.utils import JSONEncoder
+
+from src.shared import context
+
+logger = get_logger()
+
+
+async def delete(data_id: str, dataset_id: str, mode: str = "soft") -> list:
+    """
+    Delete specific data from a dataset in the Cognee knowledge graph.
+
+    This function removes a specific data item from a dataset while keeping the
+    dataset itself intact. It supports both soft and hard deletion modes.
+
+    Parameters
+    ----------
+    data_id : str
+        The UUID of the data item to delete from the knowledge graph.
+        This should be a valid UUID string identifying the specific data item.
+
+    dataset_id : str
+        The UUID of the dataset containing the data to be deleted.
+        This should be a valid UUID string identifying the dataset.
+
+    mode : str, optional
+        The deletion mode to use. Options are:
+        - "soft" (default): Removes the data but keeps related entities that might be shared
+        - "hard": Also removes degree-one entity nodes that become orphaned after deletion
+        Default is "soft" for safer deletion that preserves shared knowledge.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with the deletion results,
+        including status, deleted node counts, and confirmation details.
+
+    Notes
+    -----
+    - This operation cannot be undone. The specified data will be permanently removed.
+    - Hard mode may remove additional entity nodes that become orphaned
+    - The function provides detailed feedback about what was deleted
+    - Use this for targeted deletion instead of the prune tool which removes everything
+    """
+
+    with redirect_stdout(sys.stderr):
+        try:
+            logger.info(
+                f"Starting delete operation for data_id: {data_id}, dataset_id: {dataset_id}, mode: {mode}"
+            )
+
+            # Convert string UUIDs to UUID objects
+            data_uuid = UUID(data_id)
+            dataset_uuid = UUID(dataset_id)
+
+            # Call the cognee delete function via client
+            result = await context.cognee_client.delete(
+                data_id=data_uuid, dataset_id=dataset_uuid, mode=mode
+            )
+
+            logger.info(f"Delete operation completed successfully: {result}")
+
+            # Format the result for MCP response
+            formatted_result = json.dumps(result, indent=2, cls=JSONEncoder)
+
+            return [
+                types.TextContent(
+                    type="text",
+                    text=f"✅ Delete operation completed successfully!\n\n{formatted_result}",
+                )
+            ]
+
+        except ValueError as e:
+            # Handle UUID parsing errors
+            error_msg = f"❌ Invalid UUID format: {str(e)}"
+            logger.error(error_msg)
+            return [types.TextContent(type="text", text=error_msg)]
+
+        except Exception as e:
+            # Handle all other errors (DocumentNotFoundError, DatasetNotFoundError, etc.)
+            error_msg = f"❌ Delete operation failed: {str(e)}"
+            logger.error(f"Delete operation error: {str(e)}")
+            return [types.TextContent(type="text", text=error_msg)]
--- a/cognee-mcp/src/tools/list_data.py
+++ b/cognee-mcp/src/tools/list_data.py
@ -0,0 +1,137 @@
+"""Tool for listing datasets and their data items."""
+
+import sys
+from uuid import UUID
+from contextlib import redirect_stdout
+import mcp.types as types
+from cognee.shared.logging_utils import get_logger
+
+from src.shared import context
+
+logger = get_logger()
+
+
+async def list_data(dataset_id: str = None) -> list:
+    """
+    List all datasets and their data items with IDs for deletion operations.
+
+    This function helps users identify data IDs and dataset IDs that can be used
+    with the delete tool. It provides a comprehensive view of available data.
+
+    Parameters
+    ----------
+    dataset_id : str, optional
+        If provided, only list data items from this specific dataset.
+        If None, lists all datasets and their data items.
+        Should be a valid UUID string.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with formatted information
+        about datasets and data items, including their IDs for deletion.
+
+    Notes
+    -----
+    - Use this tool to identify data_id and dataset_id values for the delete tool
+    - The output includes both dataset information and individual data items
+    - UUIDs are displayed in a format ready for use with other tools
+    """
+
+    with redirect_stdout(sys.stderr):
+        try:
+            output_lines = []
+
+            if dataset_id:
+                # Detailed data listing for specific dataset is only available in direct mode
+                if context.cognee_client.use_api:
+                    return [
+                        types.TextContent(
+                            type="text",
+                            text="❌ Detailed data listing for specific datasets is not available in API mode.\nPlease use the API directly or use direct mode.",
+                        )
+                    ]
+
+                from cognee.modules.users.methods import get_default_user
+                from cognee.modules.data.methods import get_dataset, get_dataset_data
+
+                logger.info(f"Listing data for dataset: {dataset_id}")
+                dataset_uuid = UUID(dataset_id)
+                user = await get_default_user()
+
+                dataset = await get_dataset(user.id, dataset_uuid)
+
+                if not dataset:
+                    return [
+                        types.TextContent(type="text", text=f"❌ Dataset not found: {dataset_id}")
+                    ]
+
+                # Get data items in the dataset
+                data_items = await get_dataset_data(dataset.id)
+
+                output_lines.append(f"📁 Dataset: {dataset.name}")
+                output_lines.append(f"   ID: {dataset.id}")
+                output_lines.append(f"   Created: {dataset.created_at}")
+                output_lines.append(f"   Data items: {len(data_items)}")
+                output_lines.append("")
+
+                if data_items:
+                    for i, data_item in enumerate(data_items, 1):
+                        output_lines.append(f"   📄 Data item #{i}:")
+                        output_lines.append(f"      Data ID: {data_item.id}")
+                        output_lines.append(f"      Name: {data_item.name or 'Unnamed'}")
+                        output_lines.append(f"      Created: {data_item.created_at}")
+                        output_lines.append("")
+                else:
+                    output_lines.append("   (No data items in this dataset)")
+
+            else:
+                # List all datasets - works in both modes
+                logger.info("Listing all datasets")
+                datasets = await context.cognee_client.list_datasets()
+
+                if not datasets:
+                    return [
+                        types.TextContent(
+                            type="text",
+                            text="📂 No datasets found.\nUse the cognify tool to create your first dataset!",
+                        )
+                    ]
+
+                output_lines.append("📂 Available Datasets:")
+                output_lines.append("=" * 50)
+                output_lines.append("")
+
+                for i, dataset in enumerate(datasets, 1):
+                    # In API mode, dataset is a dict; in direct mode, it's formatted as dict
+                    if isinstance(dataset, dict):
+                        output_lines.append(f"{i}. 📁 {dataset.get('name', 'Unnamed')}")
+                        output_lines.append(f"   Dataset ID: {dataset.get('id')}")
+                        output_lines.append(f"   Created: {dataset.get('created_at', 'N/A')}")
+                    else:
+                        output_lines.append(f"{i}. 📁 {dataset.name}")
+                        output_lines.append(f"   Dataset ID: {dataset.id}")
+                        output_lines.append(f"   Created: {dataset.created_at}")
+                    output_lines.append("")
+
+                if not context.cognee_client.use_api:
+                    output_lines.append("💡 To see data items in a specific dataset, use:")
+                    output_lines.append('   list_data(dataset_id="your-dataset-id-here")')
+                    output_lines.append("")
+                output_lines.append("🗑️  To delete specific data, use:")
+                output_lines.append('   delete(data_id="data-id", dataset_id="dataset-id")')
+
+            result_text = "\n".join(output_lines)
+            logger.info("List data operation completed successfully")
+
+            return [types.TextContent(type="text", text=result_text)]
+
+        except ValueError as e:
+            error_msg = f"❌ Invalid UUID format: {str(e)}"
+            logger.error(error_msg)
+            return [types.TextContent(type="text", text=error_msg)]
+
+        except Exception as e:
+            error_msg = f"❌ Failed to list data: {str(e)}"
+            logger.error(f"List data error: {str(e)}")
+            return [types.TextContent(type="text", text=error_msg)]
--- a/cognee-mcp/src/tools/prune.py
+++ b/cognee-mcp/src/tools/prune.py
@ -0,0 +1,45 @@
+"""Tool for resetting the Cognee knowledge graph."""
+
+import sys
+from contextlib import redirect_stdout
+import mcp.types as types
+from cognee.shared.logging_utils import get_logger
+
+from src.shared import context
+
+logger = get_logger()
+
+
+async def prune():
+    """
+    Reset the Cognee knowledge graph by removing all stored information.
+
+    This function performs a complete reset of both the data layer and system layer
+    of the Cognee knowledge graph, removing all nodes, edges, and associated metadata.
+    It is typically used during development or when needing to start fresh with a new
+    knowledge base.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with confirmation of the prune operation.
+
+    Notes
+    -----
+    - This operation cannot be undone. All memory data will be permanently deleted.
+    - The function prunes both data content (using prune_data) and system metadata (using prune_system)
+    - This operation is not available in API mode
+    """
+    with redirect_stdout(sys.stderr):
+        try:
+            await context.cognee_client.prune_data()
+            await context.cognee_client.prune_system(metadata=True)
+            return [types.TextContent(type="text", text="Pruned")]
+        except NotImplementedError:
+            error_msg = "❌ Prune operation is not available in API mode"
+            logger.error(error_msg)
+            return [types.TextContent(type="text", text=error_msg)]
+        except Exception as e:
+            error_msg = f"❌ Prune operation failed: {str(e)}"
+            logger.error(error_msg)
+            return [types.TextContent(type="text", text=error_msg)]
--- a/cognee-mcp/src/tools/search.py
+++ b/cognee-mcp/src/tools/search.py
@ -0,0 +1,166 @@
+"""Tool for searching and querying the knowledge graph."""
+
+import sys
+import json
+from contextlib import redirect_stdout
+import mcp.types as types
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.storage.utils import JSONEncoder
+
+from src.shared import context
+from .utils import retrieved_edges_to_string
+
+logger = get_logger()
+
+
+async def search(search_query: str, search_type: str) -> list:
+    """
+    Search and query the knowledge graph for insights, information, and connections.
+
+    This is the final step in the Cognee workflow that retrieves information from the
+    processed knowledge graph. It supports multiple search modes optimized for different
+    use cases - from simple fact retrieval to complex reasoning and code analysis.
+
+    Search Prerequisites:
+        - **LLM_API_KEY**: Required for GRAPH_COMPLETION and RAG_COMPLETION search types
+        - **Data Added**: Must have data previously added via `cognee.add()`
+        - **Knowledge Graph Built**: Must have processed data via `cognee.cognify()`
+        - **Vector Database**: Must be accessible for semantic search functionality
+
+    Search Types & Use Cases:
+
+        **GRAPH_COMPLETION** (Recommended):
+            Natural language Q&A using full graph context and LLM reasoning.
+            Best for: Complex questions, analysis, summaries, insights.
+            Returns: Conversational AI responses with graph-backed context.
+
+        **RAG_COMPLETION**:
+            Traditional RAG using document chunks without graph structure.
+            Best for: Direct document retrieval, specific fact-finding.
+            Returns: LLM responses based on relevant text chunks.
+
+        **CHUNKS**:
+            Raw text segments that match the query semantically.
+            Best for: Finding specific passages, citations, exact content.
+            Returns: Ranked list of relevant text chunks with metadata.
+
+        **SUMMARIES**:
+            Pre-generated hierarchical summaries of content.
+            Best for: Quick overviews, document abstracts, topic summaries.
+            Returns: Multi-level summaries from detailed to high-level.
+
+        **CODE**:
+            Code-specific search with syntax and semantic understanding.
+            Best for: Finding functions, classes, implementation patterns.
+            Returns: Structured code information with context and relationships.
+
+        **CYPHER**:
+            Direct graph database queries using Cypher syntax.
+            Best for: Advanced users, specific graph traversals, debugging.
+            Returns: Raw graph query results.
+
+        **FEELING_LUCKY**:
+            Intelligently selects and runs the most appropriate search type.
+            Best for: General-purpose queries or when you're unsure which search type is best.
+            Returns: The results from the automatically selected search type.
+
+    Parameters
+    ----------
+    search_query : str
+        Your question or search query in natural language.
+        Examples:
+        - "What are the main themes in this research?"
+        - "How do these concepts relate to each other?"
+        - "Find information about machine learning algorithms"
+        - "What functions handle user authentication?"
+
+    search_type : str
+        The type of search to perform. Valid options include:
+        - "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory
+        - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data
+        - "CODE": Returns code-related knowledge in JSON format
+        - "CHUNKS": Returns raw text chunks from the knowledge graph
+        - "SUMMARIES": Returns pre-generated hierarchical summaries
+        - "CYPHER": Direct graph database queries
+        - "FEELING_LUCKY": Automatically selects best search type
+
+        The search_type is case-insensitive and will be converted to uppercase.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with the search results.
+        The format of the result depends on the search_type:
+        - **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings
+        - **CHUNKS**: Relevant text passages with source metadata
+        - **SUMMARIES**: Hierarchical summaries from general to specific
+        - **CODE**: Structured code information with context
+        - **FEELING_LUCKY**: Results in format of automatically selected search type
+        - **CYPHER**: Raw graph query results
+
+    Performance & Optimization:
+        - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
+        - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
+        - **CHUNKS**: Fastest, pure vector similarity search without LLM
+        - **SUMMARIES**: Fast, returns pre-computed summaries
+        - **CODE**: Medium speed, specialized for code understanding
+        - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
+
+    Environment Variables:
+        Required for LLM-based search types (GRAPH_COMPLETION, RAG_COMPLETION):
+        - LLM_API_KEY: API key for your LLM provider
+
+        Optional:
+        - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses
+        - VECTOR_DB_PROVIDER: Must match what was used during cognify
+        - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
+
+    Notes
+    -----
+    - Different search types produce different output formats
+    - The function handles the conversion between Cognee's internal result format and MCP's output format
+
+    """
+
+    async def search_task(search_query: str, search_type: str) -> str:
+        """Search the knowledge graph"""
+        # NOTE: MCP uses stdout to communicate, we must redirect all output
+        #       going to stdout ( like the print function ) to stderr.
+        with redirect_stdout(sys.stderr):
+            search_results = await context.cognee_client.search(
+                query_text=search_query, query_type=search_type
+            )
+
+            # Handle different result formats based on API vs direct mode
+            if context.cognee_client.use_api:
+                # API mode returns JSON-serialized results
+                if isinstance(search_results, str):
+                    return search_results
+                elif isinstance(search_results, list):
+                    if (
+                        search_type.upper() in ["GRAPH_COMPLETION", "RAG_COMPLETION"]
+                        and len(search_results) > 0
+                    ):
+                        return str(search_results[0])
+                    return str(search_results)
+                else:
+                    return json.dumps(search_results, cls=JSONEncoder)
+            else:
+                # Direct mode processing
+                if search_type.upper() == "CODE":
+                    return json.dumps(search_results, cls=JSONEncoder)
+                elif (
+                    search_type.upper() == "GRAPH_COMPLETION"
+                    or search_type.upper() == "RAG_COMPLETION"
+                ):
+                    return str(search_results[0])
+                elif search_type.upper() == "CHUNKS":
+                    return str(search_results)
+                elif search_type.upper() == "INSIGHTS":
+                    results = retrieved_edges_to_string(search_results)
+                    return results
+                else:
+                    return str(search_results)
+
+    search_results = await search_task(search_query, search_type)
+    return [types.TextContent(type="text", text=search_results)]
--- a/cognee-mcp/src/tools/utils.py
+++ b/cognee-mcp/src/tools/utils.py
@ -0,0 +1,39 @@
+"""
+Utility functions for cognee tools.
+"""
+
+import os
+import importlib.util
+
+
+def node_to_string(node):
+    """Convert a node dictionary to a string representation."""
+    node_data = ", ".join(
+        [f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]
+    )
+    return f"Node({node_data})"
+
+
+def retrieved_edges_to_string(search_results):
+    """Convert graph search results (triplets) to human-readable strings."""
+    edge_strings = []
+    for triplet in search_results:
+        node1, edge, node2 = triplet
+        relationship_type = edge["relationship_name"]
+        edge_str = f"{node_to_string(node1)} {relationship_type} {node_to_string(node2)}"
+        edge_strings.append(edge_str)
+    return "\n".join(edge_strings)
+
+
+def load_class(model_file, model_name):
+    """Dynamically load a class from a file."""
+    model_file = os.path.abspath(model_file)
+    spec = importlib.util.spec_from_file_location("graph_model", model_file)
+    if spec is None:
+        raise ValueError(f"Could not load specification for module from file: {model_file}")
+    if spec.loader is None:
+        raise ImportError(f"Spec loader is None for module file: {model_file}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    model_class = getattr(module, model_name)
+    return model_class
Author	SHA1	Message	Date
Daulet Amirkhanov	f2b86b3508	Merge branch 'dev' into refactor/refactor-cognee-mcp	2025-11-05 13:28:21 +00:00
Daulet Amirkhanov	ba9377f036	refactor: remove client.py as part of the deprecation of coding assistance tools	2025-10-31 17:17:04 +00:00
Daulet Amirkhanov	14164ac2ec	refactor: remove deprecated tools from the Cognee MCP tools module	2025-10-31 17:16:38 +00:00
Daulet Amirkhanov	27cfe8e323	chore: add TODO comment for exploring OpenAPI JSON client generation in CogneeClient	2025-10-31 17:16:38 +00:00
Daulet Amirkhanov	661084aa57	deprecate coding rule assiociants	2025-10-31 17:16:38 +00:00
Daulet Amirkhanov	9adb54a761	refactor: deprecate coding assistance tools	2025-10-31 17:16:38 +00:00
Daulet Amirkhanov	5e2b3c2e59	address pyright syntax errors	2025-10-31 17:16:38 +00:00
Daulet Amirkhanov	f467dc9b04	dev: configure pyright to flag syntax issues	2025-10-31 17:16:38 +00:00
Daulet Amirkhanov	6996cdb887	Refactor: break down `server.py`, extract tools	2025-10-31 17:16:38 +00:00