diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index ce6dad88a..96068d23f 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -1,3 +1,14 @@ +""" +MCP Registry Server + +A personal registry for discovering and remembering MCP servers. + +Available Tools: +- remember_mcp_server: Store MCP server information +- find_mcp_server: Search for servers by requirements +- list_mcp_servers: View all stored servers +- clear_registry: Clear the registry +""" import json import os import sys @@ -24,25 +35,15 @@ except ImportError: from cognee_client import CogneeClient -try: - from cognee.tasks.codingagents.coding_rule_associations import ( - add_rule_associations, - get_existing_rules, - ) -except ModuleNotFoundError: - from .codingagents.coding_rule_associations import ( - add_rule_associations, - get_existing_rules, - ) - - -mcp = FastMCP("Cognee") +mcp = FastMCP("MCP-Registry") logger = get_logger() cognee_client: Optional[CogneeClient] = None + + async def run_sse_with_cors(): """Custom SSE transport with CORS middleware.""" sse_app = mcp.sse_app() @@ -91,941 +92,306 @@ async def health_check(request): @mcp.tool() -async def cognee_add_developer_rules( - base_path: str = ".", graph_model_file: str = None, graph_model_name: str = None +async def remember_mcp_server( + server_name: str, + description: str, + capabilities: str, + url: str = None, + command: str = None, + args: str = None, + installation: str = None, + repository_url: str = None, + documentation_url: str = None, + tags: str = None, ) -> list: """ - Ingest core developer rule files into Cognee's memory layer. + Store information about an MCP server for future discovery. - This function loads a predefined set of developer-related configuration, - rule, and documentation files from the base repository and assigns them - to the special 'developer_rules' node set in Cognee. It ensures these - foundational files are always part of the structured memory graph. + Use this when you learn about an MCP server and want to remember its details, + capabilities, and connection information for later retrieval. Parameters ---------- - base_path : str - Root path to resolve relative file paths. Defaults to current directory. + server_name : str + The name of the MCP server (e.g., "filesystem", "brave-search", "puppeteer") - graph_model_file : str, optional - Optional path to a custom schema file for knowledge graph generation. + description : str + A comprehensive description of what the MCP server does, its main features, + and what problems it solves. Be detailed to improve search accuracy. - graph_model_name : str, optional - Optional class name to use from the graph_model_file schema. + capabilities : str + What the server can do. List specific capabilities, use cases, and features. + Examples: "file operations, directory listing, search files" + or "web search, real-time information, news retrieval" + + url : str, optional + Server URL for HTTP/SSE connections (e.g., "http://localhost:8124/sse") + + command : str, optional + Command to run for stdio-based servers (e.g., "python", "npx") + + args : str, optional + Command arguments as a comma-separated string (e.g., "src/server.py, --transport, stdio") + + installation : str, optional + How to install and configure the server (commands, config examples, etc.) + + repository_url : str, optional + GitHub or source code repository URL + + documentation_url : str, optional + Link to documentation or README + + tags : str, optional + Comma-separated tags for categorization (e.g., "filesystem, tools, dev-tools") Returns ------- list - A message indicating how many rule files were scheduled for ingestion, - and how to check their processing status. + A TextContent object confirming the server was stored successfully. - Notes - ----- - - Each file is processed asynchronously in the background. - - Files are attached to the 'developer_rules' node set. - - Missing files are skipped with a logged warning. + Examples + -------- + ```python + await remember_mcp_server( + server_name="filesystem", + description="Provides comprehensive file system operations including reading, writing, editing files and directories", + capabilities="read files, write files, search files, list directories, create directories, move files", + installation="npx -y @modelcontextprotocol/server-filesystem /path/to/allowed/directory", + repository_url="https://github.com/modelcontextprotocol/servers", + tags="filesystem, tools, files" + ) + ``` """ - developer_rule_paths = [ - ".cursorrules", - ".cursor/rules", - ".same/todos.md", - ".windsurfrules", - ".clinerules", - "CLAUDE.md", - ".sourcegraph/memory.md", - "AGENT.md", - "AGENTS.md", - ] - - async def cognify_task(file_path: str) -> None: + async def store_mcp_server() -> None: with redirect_stdout(sys.stderr): - logger.info(f"Starting cognify for: {file_path}") + logger.info(f"Storing MCP server: {server_name}") + + # Create structured content about the MCP server + server_content = f""" +# MCP Server: {server_name} + +## Description +{description} + +## Capabilities +{capabilities} + +## Connection +URL: {url or 'Not provided'} +Command: {command or 'Not provided'} +Args: {args or 'Not provided'} + +## Installation +{installation or 'Not provided'} + +## Repository +{repository_url or 'Not provided'} + +## Documentation +{documentation_url or 'Not provided'} + +## Tags +{tags or 'Not provided'} +""" + try: - await cognee_client.add(file_path, node_set=["developer_rules"]) + # Add to knowledge graph with special node set + await cognee_client.add(server_content, node_set=["mcp_servers", server_name]) - model = None - if graph_model_file and graph_model_name: - if cognee_client.use_api: - logger.warning( - "Custom graph models are not supported in API mode, ignoring." - ) - else: - from cognee.shared.data_models import KnowledgeGraph + # Process into knowledge graph + await cognee_client.cognify() - model = load_class(graph_model_file, graph_model_name) - - await cognee_client.cognify(graph_model=model) - logger.info(f"Cognify finished for: {file_path}") + logger.info(f"Successfully stored MCP server: {server_name}") except Exception as e: - logger.error(f"Cognify failed for {file_path}: {str(e)}") - raise ValueError(f"Failed to cognify: {str(e)}") + logger.error(f"Failed to store MCP server {server_name}: {str(e)}") + raise ValueError(f"Failed to store MCP server: {str(e)}") + + # Run as background task + asyncio.create_task(store_mcp_server()) - tasks = [] - for rel_path in developer_rule_paths: - abs_path = os.path.join(base_path, rel_path) - if os.path.isfile(abs_path): - tasks.append(asyncio.create_task(cognify_task(abs_path))) - else: - logger.warning(f"Skipped missing developer rule file: {abs_path}") log_file = get_log_file_location() return [ types.TextContent( type="text", text=( - f"Started cognify for {len(tasks)} developer rule files in background.\n" - f"All are added to the `developer_rules` node set.\n" - f"Use `cognify_status` or check logs at {log_file} to monitor progress." + f"✅ Started storing MCP server '{server_name}' in background.\n" + f"Check logs at {log_file} for completion status.\n" + f"Use 'find_mcp_server' to search for it once processing is complete." ), ) ] @mcp.tool() -async def cognify( - data: str, graph_model_file: str = None, graph_model_name: str = None, custom_prompt: str = None -) -> list: +async def find_mcp_server(requirements: str, max_results: int = 5) -> list: """ - Transform ingested data into a structured knowledge graph. + Search for MCP servers that match your requirements. - This is the core processing step in Cognee that converts raw text and documents - into an intelligent knowledge graph. It analyzes content, extracts entities and - relationships, and creates semantic connections for enhanced search and reasoning. - - Prerequisites: - - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation) - - **Data Added**: Must have data previously added via `cognee.add()` - - **Vector Database**: Must be accessible for embeddings storage - - **Graph Database**: Must be accessible for relationship storage - - Input Requirements: - - **Content Types**: Works with any text-extractable content including: - * Natural language documents - * Structured data (CSV, JSON) - * Code repositories - * Academic papers and technical documentation - * Mixed multimedia content (with text extraction) - - Processing Pipeline: - 1. **Document Classification**: Identifies document types and structures - 2. **Permission Validation**: Ensures user has processing rights - 3. **Text Chunking**: Breaks content into semantically meaningful segments - 4. **Entity Extraction**: Identifies key concepts, people, places, organizations - 5. **Relationship Detection**: Discovers connections between entities - 6. **Graph Construction**: Builds semantic knowledge graph with embeddings - 7. **Content Summarization**: Creates hierarchical summaries for navigation + Searches through stored MCP servers and returns the ones that best match your needs + based on their capabilities and descriptions. Parameters ---------- - data : str - The data to be processed and transformed into structured knowledge. - This can include natural language, file location, or any text-based information - that should become part of the agent's memory. - - graph_model_file : str, optional - Path to a custom schema file that defines the structure of the generated knowledge graph. - If provided, this file will be loaded using importlib to create a custom graph model. - Default is None, which uses Cognee's built-in KnowledgeGraph model. - - graph_model_name : str, optional - Name of the class within the graph_model_file to instantiate as the graph model. - Required if graph_model_file is specified. - Default is None, which uses the default KnowledgeGraph class. - - custom_prompt : str, optional - Custom prompt string to use for entity extraction and graph generation. - If provided, this prompt will be used instead of the default prompts for - knowledge graph extraction. The prompt should guide the LLM on how to - extract entities and relationships from the text content. - - Returns - ------- - list - A list containing a single TextContent object with information about the - background task launch and how to check its status. - - Next Steps: - After successful cognify processing, use search functions to query the knowledge: - - ```python - import cognee - from cognee import SearchType - - # Process your data into knowledge graph - await cognee.cognify() - - # Query for insights using different search types: - - # 1. Natural language completion with graph context - insights = await cognee.search( - "What are the main themes?", - query_type=SearchType.GRAPH_COMPLETION - ) - - # 2. Get entity relationships and connections - relationships = await cognee.search( - "connections between concepts", - query_type=SearchType.GRAPH_COMPLETION - ) - - # 3. Find relevant document chunks - chunks = await cognee.search( - "specific topic", - query_type=SearchType.CHUNKS - ) - ``` - - Environment Variables: - Required: - - LLM_API_KEY: API key for your LLM provider - - Optional: - - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER - - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) - - Notes - ----- - - The function launches a background task and returns immediately - - The actual cognify process may take significant time depending on text length - - Use the cognify_status tool to check the progress of the operation - - """ - - async def cognify_task( - data: str, - graph_model_file: str = None, - graph_model_name: str = None, - custom_prompt: str = None, - ) -> str: - """Build knowledge graph from the input text""" - # NOTE: MCP uses stdout to communicate, we must redirect all output - # going to stdout ( like the print function ) to stderr. - with redirect_stdout(sys.stderr): - logger.info("Cognify process starting.") - - graph_model = None - if graph_model_file and graph_model_name: - if cognee_client.use_api: - logger.warning("Custom graph models are not supported in API mode, ignoring.") - else: - from cognee.shared.data_models import KnowledgeGraph - - graph_model = load_class(graph_model_file, graph_model_name) - - await cognee_client.add(data) - - try: - await cognee_client.cognify(custom_prompt=custom_prompt, graph_model=graph_model) - logger.info("Cognify process finished.") - except Exception as e: - logger.error("Cognify process failed.") - raise ValueError(f"Failed to cognify: {str(e)}") - - asyncio.create_task( - cognify_task( - data=data, - graph_model_file=graph_model_file, - graph_model_name=graph_model_name, - custom_prompt=custom_prompt, - ) - ) - - log_file = get_log_file_location() - text = ( - f"Background process launched due to MCP timeout limitations.\n" - f"To check current cognify status use the cognify_status tool\n" - f"or check the log file at: {log_file}" - ) - - return [ - types.TextContent( - type="text", - text=text, - ) - ] - - -@mcp.tool( - name="save_interaction", description="Logs user-agent interactions and query-answer pairs" -) -async def save_interaction(data: str) -> list: - """ - Transform and save a user-agent interaction into structured knowledge. - - Parameters - ---------- - data : str - The input string containing user queries and corresponding agent answers. - - Returns - ------- - list - A list containing a single TextContent object with information about the background task launch. - """ - - async def save_user_agent_interaction(data: str) -> None: - """Build knowledge graph from the interaction data""" - with redirect_stdout(sys.stderr): - logger.info("Save interaction process starting.") - - await cognee_client.add(data, node_set=["user_agent_interaction"]) - - try: - await cognee_client.cognify() - logger.info("Save interaction process finished.") - - # Rule associations only work in direct mode - if not cognee_client.use_api: - logger.info("Generating associated rules from interaction data.") - await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules") - logger.info("Associated rules generated from interaction data.") - else: - logger.warning("Rule associations are not available in API mode, skipping.") - - except Exception as e: - logger.error("Save interaction process failed.") - raise ValueError(f"Failed to Save interaction: {str(e)}") - - asyncio.create_task( - save_user_agent_interaction( - data=data, - ) - ) - - log_file = get_log_file_location() - text = ( - f"Background process launched to process the user-agent interaction.\n" - f"To check the current status, use the cognify_status tool or check the log file at: {log_file}" - ) - - return [ - types.TextContent( - type="text", - text=text, - ) - ] - - -@mcp.tool() -async def codify(repo_path: str) -> list: - """ - Analyze and generate a code-specific knowledge graph from a software repository. - - This function launches a background task that processes the provided repository - and builds a code knowledge graph. The function returns immediately while - the processing continues in the background due to MCP timeout constraints. - - Parameters - ---------- - repo_path : str - Path to the code repository to analyze. This can be a local file path or a - relative path to a repository. The path should point to the root of the - repository or a specific directory within it. - - Returns - ------- - list - A list containing a single TextContent object with information about the - background task launch and how to check its status. - - Notes - ----- - - The function launches a background task and returns immediately - - The code graph generation may take significant time for larger repositories - - Use the codify_status tool to check the progress of the operation - - Process results are logged to the standard Cognee log file - - All stdout is redirected to stderr to maintain MCP communication integrity - """ - - if cognee_client.use_api: - error_msg = "❌ Codify operation is not available in API mode. Please use direct mode for code graph pipeline." - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - - async def codify_task(repo_path: str): - # NOTE: MCP uses stdout to communicate, we must redirect all output - # going to stdout ( like the print function ) to stderr. - with redirect_stdout(sys.stderr): - logger.info("Codify process starting.") - from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline - - results = [] - async for result in run_code_graph_pipeline(repo_path, False): - results.append(result) - logger.info(result) - if all(results): - logger.info("Codify process finished succesfully.") - else: - logger.info("Codify process failed.") - - asyncio.create_task(codify_task(repo_path)) - - log_file = get_log_file_location() - text = ( - f"Background process launched due to MCP timeout limitations.\n" - f"To check current codify status use the codify_status tool\n" - f"or you can check the log file at: {log_file}" - ) - - return [ - types.TextContent( - type="text", - text=text, - ) - ] - - -@mcp.tool() -async def search(search_query: str, search_type: str) -> list: - """ - Search and query the knowledge graph for insights, information, and connections. - - This is the final step in the Cognee workflow that retrieves information from the - processed knowledge graph. It supports multiple search modes optimized for different - use cases - from simple fact retrieval to complex reasoning and code analysis. - - Search Prerequisites: - - **LLM_API_KEY**: Required for GRAPH_COMPLETION and RAG_COMPLETION search types - - **Data Added**: Must have data previously added via `cognee.add()` - - **Knowledge Graph Built**: Must have processed data via `cognee.cognify()` - - **Vector Database**: Must be accessible for semantic search functionality - - Search Types & Use Cases: - - **GRAPH_COMPLETION** (Recommended): - Natural language Q&A using full graph context and LLM reasoning. - Best for: Complex questions, analysis, summaries, insights. - Returns: Conversational AI responses with graph-backed context. - - **RAG_COMPLETION**: - Traditional RAG using document chunks without graph structure. - Best for: Direct document retrieval, specific fact-finding. - Returns: LLM responses based on relevant text chunks. - - **CHUNKS**: - Raw text segments that match the query semantically. - Best for: Finding specific passages, citations, exact content. - Returns: Ranked list of relevant text chunks with metadata. - - **SUMMARIES**: - Pre-generated hierarchical summaries of content. - Best for: Quick overviews, document abstracts, topic summaries. - Returns: Multi-level summaries from detailed to high-level. - - **CODE**: - Code-specific search with syntax and semantic understanding. - Best for: Finding functions, classes, implementation patterns. - Returns: Structured code information with context and relationships. - - **CYPHER**: - Direct graph database queries using Cypher syntax. - Best for: Advanced users, specific graph traversals, debugging. - Returns: Raw graph query results. - - **FEELING_LUCKY**: - Intelligently selects and runs the most appropriate search type. - Best for: General-purpose queries or when you're unsure which search type is best. - Returns: The results from the automatically selected search type. - - Parameters - ---------- - search_query : str - Your question or search query in natural language. + requirements : str + Describe what you need the MCP server to do. Be specific about your use case. Examples: - - "What are the main themes in this research?" - - "How do these concepts relate to each other?" - - "Find information about machine learning algorithms" - - "What functions handle user authentication?" + - "I need to read and write files" + - "I want to search the web for real-time information" + - "I need to control a browser and take screenshots" + - "I want to execute code in a sandbox" - search_type : str - The type of search to perform. Valid options include: - - "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory - - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data - - "CODE": Returns code-related knowledge in JSON format - - "CHUNKS": Returns raw text chunks from the knowledge graph - - "SUMMARIES": Returns pre-generated hierarchical summaries - - "CYPHER": Direct graph database queries - - "FEELING_LUCKY": Automatically selects best search type - - The search_type is case-insensitive and will be converted to uppercase. + max_results : int, optional + Maximum number of MCP servers to return (default: 5) Returns ------- list - A list containing a single TextContent object with the search results. - The format of the result depends on the search_type: - - **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings - - **CHUNKS**: Relevant text passages with source metadata - - **SUMMARIES**: Hierarchical summaries from general to specific - - **CODE**: Structured code information with context - - **FEELING_LUCKY**: Results in format of automatically selected search type - - **CYPHER**: Raw graph query results + A TextContent object with detailed information about matching MCP servers, + including their names, descriptions, capabilities, and installation instructions. - Performance & Optimization: - - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context - - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal) - - **CHUNKS**: Fastest, pure vector similarity search without LLM - - **SUMMARIES**: Fast, returns pre-computed summaries - - **CODE**: Medium speed, specialized for code understanding - - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently - - Environment Variables: - Required for LLM-based search types (GRAPH_COMPLETION, RAG_COMPLETION): - - LLM_API_KEY: API key for your LLM provider - - Optional: - - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses - - VECTOR_DB_PROVIDER: Must match what was used during cognify - - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify - - Notes - ----- - - Different search types produce different output formats - - The function handles the conversion between Cognee's internal result format and MCP's output format + Examples + -------- + ```python + # Find a server for file operations + await find_mcp_server("I need to read and modify files in my project") + # Find a server for web search + await find_mcp_server("I want to search the internet for current information") + ``` """ - async def search_task(search_query: str, search_type: str) -> str: - """Search the knowledge graph""" - # NOTE: MCP uses stdout to communicate, we must redirect all output - # going to stdout ( like the print function ) to stderr. - with redirect_stdout(sys.stderr): + with redirect_stdout(sys.stderr): + try: + logger.info(f"Searching for MCP servers matching: {requirements}") + + # Search using GRAPH_COMPLETION for intelligent matching search_results = await cognee_client.search( - query_text=search_query, query_type=search_type + query_text=f"Find MCP servers that can: {requirements}. Include their capabilities, installation instructions, and documentation.", + query_type="GRAPH_COMPLETION", ) - # Handle different result formats based on API vs direct mode + # Format the results if cognee_client.use_api: - # API mode returns JSON-serialized results if isinstance(search_results, str): - return search_results - elif isinstance(search_results, list): - if ( - search_type.upper() in ["GRAPH_COMPLETION", "RAG_COMPLETION"] - and len(search_results) > 0 - ): - return str(search_results[0]) - return str(search_results) + result_text = search_results + elif isinstance(search_results, list) and len(search_results) > 0: + result_text = str(search_results[0]) else: - return json.dumps(search_results, cls=JSONEncoder) + result_text = json.dumps(search_results, cls=JSONEncoder) else: - # Direct mode processing - if search_type.upper() == "CODE": - return json.dumps(search_results, cls=JSONEncoder) - elif ( - search_type.upper() == "GRAPH_COMPLETION" - or search_type.upper() == "RAG_COMPLETION" - ): - return str(search_results[0]) - elif search_type.upper() == "CHUNKS": - return str(search_results) - elif search_type.upper() == "INSIGHTS": - results = retrieved_edges_to_string(search_results) - return results + if isinstance(search_results, list) and len(search_results) > 0: + result_text = str(search_results[0]) else: - return str(search_results) + result_text = str(search_results) - search_results = await search_task(search_query, search_type) - return [types.TextContent(type="text", text=search_results)] - - -@mcp.tool() -async def get_developer_rules() -> list: - """ - Retrieve all developer rules that were generated based on previous interactions. - - This tool queries the Cognee knowledge graph and returns a list of developer - rules. - - Parameters - ---------- - None - - Returns - ------- - list - A list containing a single TextContent object with the retrieved developer rules. - The format is plain text containing the developer rules in bulletpoints. - - Notes - ----- - - The specific logic for fetching rules is handled internally. - - This tool does not accept any parameters and is intended for simple rule inspection use cases. - """ - - async def fetch_rules_from_cognee() -> str: - """Collect all developer rules from Cognee""" - with redirect_stdout(sys.stderr): - if cognee_client.use_api: - logger.warning("Developer rules retrieval is not available in API mode") - return "Developer rules retrieval is not available in API mode" - - developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") - return developer_rules - - rules_text = await fetch_rules_from_cognee() - - return [types.TextContent(type="text", text=rules_text)] - - -@mcp.tool() -async def list_data(dataset_id: str = None) -> list: - """ - List all datasets and their data items with IDs for deletion operations. - - This function helps users identify data IDs and dataset IDs that can be used - with the delete tool. It provides a comprehensive view of available data. - - Parameters - ---------- - dataset_id : str, optional - If provided, only list data items from this specific dataset. - If None, lists all datasets and their data items. - Should be a valid UUID string. - - Returns - ------- - list - A list containing a single TextContent object with formatted information - about datasets and data items, including their IDs for deletion. - - Notes - ----- - - Use this tool to identify data_id and dataset_id values for the delete tool - - The output includes both dataset information and individual data items - - UUIDs are displayed in a format ready for use with other tools - """ - from uuid import UUID - - with redirect_stdout(sys.stderr): - try: - output_lines = [] - - if dataset_id: - # Detailed data listing for specific dataset is only available in direct mode - if cognee_client.use_api: - return [ - types.TextContent( - type="text", - text="❌ Detailed data listing for specific datasets is not available in API mode.\nPlease use the API directly or use direct mode.", - ) - ] - - from cognee.modules.users.methods import get_default_user - from cognee.modules.data.methods import get_dataset, get_dataset_data - - logger.info(f"Listing data for dataset: {dataset_id}") - dataset_uuid = UUID(dataset_id) - user = await get_default_user() - - dataset = await get_dataset(user.id, dataset_uuid) - - if not dataset: - return [ - types.TextContent(type="text", text=f"❌ Dataset not found: {dataset_id}") - ] - - # Get data items in the dataset - data_items = await get_dataset_data(dataset.id) - - output_lines.append(f"📁 Dataset: {dataset.name}") - output_lines.append(f" ID: {dataset.id}") - output_lines.append(f" Created: {dataset.created_at}") - output_lines.append(f" Data items: {len(data_items)}") - output_lines.append("") - - if data_items: - for i, data_item in enumerate(data_items, 1): - output_lines.append(f" 📄 Data item #{i}:") - output_lines.append(f" Data ID: {data_item.id}") - output_lines.append(f" Name: {data_item.name or 'Unnamed'}") - output_lines.append(f" Created: {data_item.created_at}") - output_lines.append("") - else: - output_lines.append(" (No data items in this dataset)") - - else: - # List all datasets - works in both modes - logger.info("Listing all datasets") - datasets = await cognee_client.list_datasets() - - if not datasets: - return [ - types.TextContent( - type="text", - text="📂 No datasets found.\nUse the cognify tool to create your first dataset!", - ) - ] - - output_lines.append("📂 Available Datasets:") - output_lines.append("=" * 50) - output_lines.append("") - - for i, dataset in enumerate(datasets, 1): - # In API mode, dataset is a dict; in direct mode, it's formatted as dict - if isinstance(dataset, dict): - output_lines.append(f"{i}. 📁 {dataset.get('name', 'Unnamed')}") - output_lines.append(f" Dataset ID: {dataset.get('id')}") - output_lines.append(f" Created: {dataset.get('created_at', 'N/A')}") - else: - output_lines.append(f"{i}. 📁 {dataset.name}") - output_lines.append(f" Dataset ID: {dataset.id}") - output_lines.append(f" Created: {dataset.created_at}") - output_lines.append("") - - if not cognee_client.use_api: - output_lines.append("💡 To see data items in a specific dataset, use:") - output_lines.append(' list_data(dataset_id="your-dataset-id-here")') - output_lines.append("") - output_lines.append("🗑️ To delete specific data, use:") - output_lines.append(' delete(data_id="data-id", dataset_id="dataset-id")') - - result_text = "\n".join(output_lines) - logger.info("List data operation completed successfully") - - return [types.TextContent(type="text", text=result_text)] - - except ValueError as e: - error_msg = f"❌ Invalid UUID format: {str(e)}" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - - except Exception as e: - error_msg = f"❌ Failed to list data: {str(e)}" - logger.error(f"List data error: {str(e)}") - return [types.TextContent(type="text", text=error_msg)] - - -@mcp.tool() -async def delete(data_id: str, dataset_id: str, mode: str = "soft") -> list: - """ - Delete specific data from a dataset in the Cognee knowledge graph. - - This function removes a specific data item from a dataset while keeping the - dataset itself intact. It supports both soft and hard deletion modes. - - Parameters - ---------- - data_id : str - The UUID of the data item to delete from the knowledge graph. - This should be a valid UUID string identifying the specific data item. - - dataset_id : str - The UUID of the dataset containing the data to be deleted. - This should be a valid UUID string identifying the dataset. - - mode : str, optional - The deletion mode to use. Options are: - - "soft" (default): Removes the data but keeps related entities that might be shared - - "hard": Also removes degree-one entity nodes that become orphaned after deletion - Default is "soft" for safer deletion that preserves shared knowledge. - - Returns - ------- - list - A list containing a single TextContent object with the deletion results, - including status, deleted node counts, and confirmation details. - - Notes - ----- - - This operation cannot be undone. The specified data will be permanently removed. - - Hard mode may remove additional entity nodes that become orphaned - - The function provides detailed feedback about what was deleted - - Use this for targeted deletion instead of the prune tool which removes everything - """ - from uuid import UUID - - with redirect_stdout(sys.stderr): - try: - logger.info( - f"Starting delete operation for data_id: {data_id}, dataset_id: {dataset_id}, mode: {mode}" - ) - - # Convert string UUIDs to UUID objects - data_uuid = UUID(data_id) - dataset_uuid = UUID(dataset_id) - - # Call the cognee delete function via client - result = await cognee_client.delete( - data_id=data_uuid, dataset_id=dataset_uuid, mode=mode - ) - - logger.info(f"Delete operation completed successfully: {result}") - - # Format the result for MCP response - formatted_result = json.dumps(result, indent=2, cls=JSONEncoder) + logger.info("MCP server search completed") return [ types.TextContent( type="text", - text=f"✅ Delete operation completed successfully!\n\n{formatted_result}", + text=f"🔍 MCP Servers matching your requirements:\n\n{result_text}", ) ] - except ValueError as e: - # Handle UUID parsing errors - error_msg = f"❌ Invalid UUID format: {str(e)}" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - except Exception as e: - # Handle all other errors (DocumentNotFoundError, DatasetNotFoundError, etc.) - error_msg = f"❌ Delete operation failed: {str(e)}" - logger.error(f"Delete operation error: {str(e)}") + error_msg = f"❌ Failed to search for MCP servers: {str(e)}" + logger.error(error_msg) return [types.TextContent(type="text", text=error_msg)] @mcp.tool() -async def prune(): +async def list_mcp_servers() -> list: """ - Reset the Cognee knowledge graph by removing all stored information. + List all MCP servers stored in your personal registry. - This function performs a complete reset of both the data layer and system layer - of the Cognee knowledge graph, removing all nodes, edges, and associated metadata. - It is typically used during development or when needing to start fresh with a new - knowledge base. + Returns detailed information about MCP servers you've previously remembered, + including their connection details (URL/command), capabilities, and documentation. + Use this information to connect to the servers with your MCP client. Returns ------- list - A list containing a single TextContent object with confirmation of the prune operation. - - Notes - ----- - - This operation cannot be undone. All memory data will be permanently deleted. - - The function prunes both data content (using prune_data) and system metadata (using prune_system) - - This operation is not available in API mode + A list of all MCP servers in the registry with their connection information. """ + + with redirect_stdout(sys.stderr): + try: + logger.info("Listing all MCP servers") + + # Search for all MCP servers with connection details + search_results = await cognee_client.search( + query_text="List all MCP servers with their names, descriptions, capabilities, connection information (URL, command, args), installation instructions, and documentation links", + query_type="GRAPH_COMPLETION", + ) + + # Format the results + if cognee_client.use_api: + if isinstance(search_results, str): + result_text = search_results + elif isinstance(search_results, list) and len(search_results) > 0: + result_text = str(search_results[0]) + else: + result_text = json.dumps(search_results, cls=JSONEncoder) + else: + if isinstance(search_results, list) and len(search_results) > 0: + result_text = str(search_results[0]) + else: + result_text = str(search_results) + + output_text = f"📋 MCP Servers in Registry:\n\n{result_text}\n\n" + output_text += "💡 Use the connection information above (URL or command/args) to configure your MCP client." + + logger.info("MCP server listing completed") + + return [ + types.TextContent( + type="text", + text=output_text, + ) + ] + + except Exception as e: + error_msg = f"❌ Failed to list MCP servers: {str(e)}" + logger.error(error_msg) + return [types.TextContent(type="text", text=error_msg)] + + +@mcp.tool() +async def clear_registry() -> list: + """ + Clear all stored MCP server information from the registry. + + Removes all MCP servers you've stored. Use with caution as this cannot be undone. + + Returns + ------- + list + A TextContent object confirming the registry was cleared. + """ + with redirect_stdout(sys.stderr): try: await cognee_client.prune_data() await cognee_client.prune_system(metadata=True) - return [types.TextContent(type="text", text="Pruned")] + logger.info("MCP server registry cleared") + return [ + types.TextContent( + type="text", + text="✅ MCP server registry has been cleared. All stored servers removed.", + ) + ] except NotImplementedError: - error_msg = "❌ Prune operation is not available in API mode" + error_msg = "❌ Clear operation is not available in API mode" logger.error(error_msg) return [types.TextContent(type="text", text=error_msg)] except Exception as e: - error_msg = f"❌ Prune operation failed: {str(e)}" + error_msg = f"❌ Failed to clear registry: {str(e)}" logger.error(error_msg) return [types.TextContent(type="text", text=error_msg)] -@mcp.tool() -async def cognify_status(): - """ - Get the current status of the cognify pipeline. - - This function retrieves information about current and recently completed cognify operations - in the main_dataset. It provides details on progress, success/failure status, and statistics - about the processed data. - - Returns - ------- - list - A list containing a single TextContent object with the status information as a string. - The status includes information about active and completed jobs for the cognify_pipeline. - - Notes - ----- - - The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset" - - Status information includes job progress, execution time, and completion status - - The status is returned in string format for easy reading - - This operation is not available in API mode - """ - with redirect_stdout(sys.stderr): - try: - from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id - from cognee.modules.users.methods import get_default_user - - user = await get_default_user() - status = await cognee_client.get_pipeline_status( - [await get_unique_dataset_id("main_dataset", user)], "cognify_pipeline" - ) - return [types.TextContent(type="text", text=str(status))] - except NotImplementedError: - error_msg = "❌ Pipeline status is not available in API mode" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - except Exception as e: - error_msg = f"❌ Failed to get cognify status: {str(e)}" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - - -@mcp.tool() -async def codify_status(): - """ - Get the current status of the codify pipeline. - - This function retrieves information about current and recently completed codify operations - in the codebase dataset. It provides details on progress, success/failure status, and statistics - about the processed code repositories. - - Returns - ------- - list - A list containing a single TextContent object with the status information as a string. - The status includes information about active and completed jobs for the cognify_code_pipeline. - - Notes - ----- - - The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset - - Status information includes job progress, execution time, and completion status - - The status is returned in string format for easy reading - - This operation is not available in API mode - """ - with redirect_stdout(sys.stderr): - try: - from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id - from cognee.modules.users.methods import get_default_user - - user = await get_default_user() - status = await cognee_client.get_pipeline_status( - [await get_unique_dataset_id("codebase", user)], "cognify_code_pipeline" - ) - return [types.TextContent(type="text", text=str(status))] - except NotImplementedError: - error_msg = "❌ Pipeline status is not available in API mode" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - except Exception as e: - error_msg = f"❌ Failed to get codify status: {str(e)}" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - - -def node_to_string(node): - node_data = ", ".join( - [f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]] - ) - - return f"Node({node_data})" - - -def retrieved_edges_to_string(search_results): - edge_strings = [] - for triplet in search_results: - node1, edge, node2 = triplet - relationship_type = edge["relationship_name"] - edge_str = f"{node_to_string(node1)} {relationship_type} {node_to_string(node2)}" - edge_strings.append(edge_str) - - return "\n".join(edge_strings) - - -def load_class(model_file, model_name): - model_file = os.path.abspath(model_file) - spec = importlib.util.spec_from_file_location("graph_model", model_file) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - model_class = getattr(module, model_name) - - return model_class - - async def main(): global cognee_client @@ -1121,7 +487,7 @@ async def main(): elif args.api_url: logger.info("Skipping database migrations (using API mode)") - logger.info(f"Starting MCP server with transport: {args.transport}") + logger.info(f"Starting MCP Registry server with transport: {args.transport}") if args.transport == "stdio": await mcp.run_stdio_async() elif args.transport == "sse": @@ -1140,5 +506,5 @@ if __name__ == "__main__": try: asyncio.run(main()) except Exception as e: - logger.error(f"Error initializing Cognee MCP server: {str(e)}") + logger.error(f"Error initializing MCP Registry server: {str(e)}") raise