diff --git a/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py b/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py new file mode 100644 index 000000000..5e0e99dba --- /dev/null +++ b/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py @@ -0,0 +1,33 @@ +"""Add pipeline run status + +Revision ID: 1d0bb7fede17 +Revises: 482cd6517ce4 +Create Date: 2025-05-19 10:58:15.993314 +""" + +from typing import Sequence, Union + +from alembic import op + +from cognee.infrastructure.databases.relational.get_relational_engine import get_relational_engine +from cognee.modules.pipelines.models.PipelineRun import PipelineRun, PipelineRunStatus + + +# revision identifiers, used by Alembic. +revision: str = "1d0bb7fede17" +down_revision: Union[str, None] = "482cd6517ce4" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = "482cd6517ce4" + + +def upgrade() -> None: + db_engine = get_relational_engine() + + if db_engine.engine.dialect.name == "postgresql": + op.execute( + "ALTER TYPE pipelinerunstatus ADD VALUE IF NOT EXISTS 'DATASET_PROCESSING_INITIATED'" + ) + + +def downgrade() -> None: + pass diff --git a/alembic/versions/482cd6517ce4_add_default_user.py b/alembic/versions/482cd6517ce4_add_default_user.py index 92429e1e4..078dce7fd 100644 --- a/alembic/versions/482cd6517ce4_add_default_user.py +++ b/alembic/versions/482cd6517ce4_add_default_user.py @@ -21,7 +21,10 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2" def upgrade() -> None: - await_only(create_default_user()) + try: + await_only(create_default_user()) + except Exception: + pass def downgrade() -> None: diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index f8c65bc7c..fffbaf223 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -24,9 +24,127 @@ log_file = get_log_file_location() @mcp.tool() -async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> list: +async def cognee_add_developer_rules( + base_path: str = ".", graph_model_file: str = None, graph_model_name: str = None +) -> list: + """ + Ingest core developer rule files into Cognee's memory layer. + + This function loads a predefined set of developer-related configuration, + rule, and documentation files from the base repository and assigns them + to the special 'developer_rules' node set in Cognee. It ensures these + foundational files are always part of the structured memory graph. + + Parameters + ---------- + base_path : str + Root path to resolve relative file paths. Defaults to current directory. + + graph_model_file : str, optional + Optional path to a custom schema file for knowledge graph generation. + + graph_model_name : str, optional + Optional class name to use from the graph_model_file schema. + + Returns + ------- + list + A message indicating how many rule files were scheduled for ingestion, + and how to check their processing status. + + Notes + ----- + - Each file is processed asynchronously in the background. + - Files are attached to the 'developer_rules' node set. + - Missing files are skipped with a logged warning. + """ + + developer_rule_paths = [ + ".cursorrules", + ".cursor/rules", + ".same/todos.md", + ".windsurfrules", + ".clinerules", + "CLAUDE.md", + ".sourcegraph/memory.md", + "AGENT.md", + "AGENTS.md", + ] + + async def cognify_task(file_path: str) -> None: + with redirect_stdout(sys.stderr): + logger.info(f"Starting cognify for: {file_path}") + try: + await cognee.add(file_path, nodeset="developer_rules") + model = KnowledgeGraph + if graph_model_file and graph_model_name: + model = load_class(graph_model_file, graph_model_name) + await cognee.cognify(graph_model=model) + logger.info(f"Cognify finished for: {file_path}") + except Exception as e: + logger.error(f"Cognify failed for {file_path}: {str(e)}") + + tasks = [] + for rel_path in developer_rule_paths: + abs_path = os.path.join(base_path, rel_path) + if os.path.isfile(abs_path): + tasks.append(asyncio.create_task(cognify_task(abs_path))) + else: + logger.warning(f"Skipped missing developer rule file: {abs_path}") + + return [ + types.TextContent( + type="text", + text=( + f"Started cognify for {len(tasks)} developer rule files in background.\n" + f"All are added to the `developer_rules` node set.\n" + f"Use `cognify_status` or check logs at {log_file} to monitor progress." + ), + ) + ] + + +@mcp.tool() +async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list: + """ + Transform data into a structured knowledge graph in Cognee's memory layer. + + This function launches a background task that processes the provided text/file location and + generates a knowledge graph representation. The function returns immediately while + the processing continues in the background due to MCP timeout constraints. + + Parameters + ---------- + data : str + The data to be processed and transformed into structured knowledge. + This can include natural language, file location, or any text-based information + that should become part of the agent's memory. + + graph_model_file : str, optional + Path to a custom schema file that defines the structure of the generated knowledge graph. + If provided, this file will be loaded using importlib to create a custom graph model. + Default is None, which uses Cognee's built-in KnowledgeGraph model. + + graph_model_name : str, optional + Name of the class within the graph_model_file to instantiate as the graph model. + Required if graph_model_file is specified. + Default is None, which uses the default KnowledgeGraph class. + + Returns + ------- + list + A list containing a single TextContent object with information about the + background task launch and how to check its status. + + Notes + ----- + - The function launches a background task and returns immediately + - The actual cognify process may take significant time depending on text length + - Use the cognify_status tool to check the progress of the operation + """ + async def cognify_task( - text: str, graph_model_file: str = None, graph_model_name: str = None + data: str, graph_model_file: str = None, graph_model_name: str = None ) -> str: """Build knowledge graph from the input text""" # NOTE: MCP uses stdout to communicate, we must redirect all output @@ -38,7 +156,7 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str else: graph_model = KnowledgeGraph - await cognee.add(text) + await cognee.add(data) try: await cognee.cognify(graph_model=graph_model) @@ -49,7 +167,7 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str asyncio.create_task( cognify_task( - text=text, + data=data, graph_model_file=graph_model_file, graph_model_name=graph_model_name, ) @@ -71,6 +189,35 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str @mcp.tool() async def codify(repo_path: str) -> list: + """ + Analyze and generate a code-specific knowledge graph from a software repository. + + This function launches a background task that processes the provided repository + and builds a code knowledge graph. The function returns immediately while + the processing continues in the background due to MCP timeout constraints. + + Parameters + ---------- + repo_path : str + Path to the code repository to analyze. This can be a local file path or a + relative path to a repository. The path should point to the root of the + repository or a specific directory within it. + + Returns + ------- + list + A list containing a single TextContent object with information about the + background task launch and how to check its status. + + Notes + ----- + - The function launches a background task and returns immediately + - The code graph generation may take significant time for larger repositories + - Use the codify_status tool to check the progress of the operation + - Process results are logged to the standard Cognee log file + - All stdout is redirected to stderr to maintain MCP communication integrity + """ + async def codify_task(repo_path: str): # NOTE: MCP uses stdout to communicate, we must redirect all output # going to stdout ( like the print function ) to stderr. @@ -103,6 +250,46 @@ async def codify(repo_path: str) -> list: @mcp.tool() async def search(search_query: str, search_type: str) -> list: + """ + Search the Cognee knowledge graph for information relevant to the query. + + This function executes a search against the Cognee knowledge graph using the + specified query and search type. It returns formatted results based on the + search type selected. + + Parameters + ---------- + search_query : str + The search query in natural language. This can be a question, instruction, or + any text that expresses what information is needed from the knowledge graph. + + search_type : str + The type of search to perform. Valid options include: + - "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory + - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data + - "CODE": Returns code-related knowledge in JSON format + - "CHUNKS": Returns raw text chunks from the knowledge graph + - "INSIGHTS": Returns relationships between nodes in readable format + + The search_type is case-insensitive and will be converted to uppercase. + + Returns + ------- + list + A list containing a single TextContent object with the search results. + The format of the result depends on the search_type: + - For CODE: JSON-formatted search results + - For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion + - For CHUNKS: String representation of the raw chunks + - For INSIGHTS: Formatted string showing node relationships + - For other types: String representation of the search results + + Notes + ----- + - Different search types produce different output formats + - The function handles the conversion between Cognee's internal result format and MCP's output format + """ + async def search_task(search_query: str, search_type: str) -> str: """Search the knowledge graph""" # NOTE: MCP uses stdout to communicate, we must redirect all output @@ -132,7 +319,24 @@ async def search(search_query: str, search_type: str) -> list: @mcp.tool() async def prune(): - """Reset the knowledge graph""" + """ + Reset the Cognee knowledge graph by removing all stored information. + + This function performs a complete reset of both the data layer and system layer + of the Cognee knowledge graph, removing all nodes, edges, and associated metadata. + It is typically used during development or when needing to start fresh with a new + knowledge base. + + Returns + ------- + list + A list containing a single TextContent object with confirmation of the prune operation. + + Notes + ----- + - This operation cannot be undone. All memory data will be permanently deleted. + - The function prunes both data content (using prune_data) and system metadata (using prune_system) + """ with redirect_stdout(sys.stderr): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) @@ -141,7 +345,25 @@ async def prune(): @mcp.tool() async def cognify_status(): - """Get status of cognify pipeline""" + """ + Get the current status of the cognify pipeline. + + This function retrieves information about current and recently completed cognify operations + in the main_dataset. It provides details on progress, success/failure status, and statistics + about the processed data. + + Returns + ------- + list + A list containing a single TextContent object with the status information as a string. + The status includes information about active and completed jobs for the cognify_pipeline. + + Notes + ----- + - The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset" + - Status information includes job progress, execution time, and completion status + - The status is returned in string format for easy reading + """ with redirect_stdout(sys.stderr): user = await get_default_user() status = await get_pipeline_status( @@ -152,7 +374,25 @@ async def cognify_status(): @mcp.tool() async def codify_status(): - """Get status of codify pipeline""" + """ + Get the current status of the codify pipeline. + + This function retrieves information about current and recently completed codify operations + in the codebase dataset. It provides details on progress, success/failure status, and statistics + about the processed code repositories. + + Returns + ------- + list + A list containing a single TextContent object with the status information as a string. + The status includes information about active and completed jobs for the cognify_code_pipeline. + + Notes + ----- + - The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset + - Status information includes job progress, execution time, and completion status + - The status is returned in string format for easy reading + """ with redirect_stdout(sys.stderr): user = await get_default_user() status = await get_pipeline_status(