resolve conflicts

2025-05-19 13:25:38 +02:00 · 2025-05-19 13:25:38 +02:00 · f842f68b9e
commit f842f68b9e
parent a5b28983bd a874988db6
3 changed files with 284 additions and 8 deletions
--- a/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py
+++ b/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py
@ -0,0 +1,33 @@
+"""Add pipeline run status
+
+Revision ID: 1d0bb7fede17
+Revises: 482cd6517ce4
+Create Date: 2025-05-19 10:58:15.993314
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+
+from cognee.infrastructure.databases.relational.get_relational_engine import get_relational_engine
+from cognee.modules.pipelines.models.PipelineRun import PipelineRun, PipelineRunStatus
+
+
+# revision identifiers, used by Alembic.
+revision: str = "1d0bb7fede17"
+down_revision: Union[str, None] = "482cd6517ce4"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = "482cd6517ce4"
+
+
+def upgrade() -> None:
+    db_engine = get_relational_engine()
+
+    if db_engine.engine.dialect.name == "postgresql":
+        op.execute(
+            "ALTER TYPE pipelinerunstatus ADD VALUE IF NOT EXISTS 'DATASET_PROCESSING_INITIATED'"
+        )
+
+
+def downgrade() -> None:
+    pass
--- a/alembic/versions/482cd6517ce4_add_default_user.py
+++ b/alembic/versions/482cd6517ce4_add_default_user.py
@ -21,7 +21,10 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"


 def upgrade() -> None:
-    await_only(create_default_user())
+    try:
+        await_only(create_default_user())
+    except Exception:
+        pass


 def downgrade() -> None:
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -24,9 +24,127 @@ log_file = get_log_file_location()


@mcp.tool()
-async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
+async def cognee_add_developer_rules(
+    base_path: str = ".", graph_model_file: str = None, graph_model_name: str = None
+) -> list:
+    """
+    Ingest core developer rule files into Cognee's memory layer.
+
+    This function loads a predefined set of developer-related configuration,
+    rule, and documentation files from the base repository and assigns them
+    to the special 'developer_rules' node set in Cognee. It ensures these
+    foundational files are always part of the structured memory graph.
+
+    Parameters
+    ----------
+    base_path : str
+        Root path to resolve relative file paths. Defaults to current directory.
+
+    graph_model_file : str, optional
+        Optional path to a custom schema file for knowledge graph generation.
+
+    graph_model_name : str, optional
+        Optional class name to use from the graph_model_file schema.
+
+    Returns
+    -------
+    list
+        A message indicating how many rule files were scheduled for ingestion,
+        and how to check their processing status.
+
+    Notes
+    -----
+    - Each file is processed asynchronously in the background.
+    - Files are attached to the 'developer_rules' node set.
+    - Missing files are skipped with a logged warning.
+    """
+
+    developer_rule_paths = [
+        ".cursorrules",
+        ".cursor/rules",
+        ".same/todos.md",
+        ".windsurfrules",
+        ".clinerules",
+        "CLAUDE.md",
+        ".sourcegraph/memory.md",
+        "AGENT.md",
+        "AGENTS.md",
+    ]
+
+    async def cognify_task(file_path: str) -> None:
+        with redirect_stdout(sys.stderr):
+            logger.info(f"Starting cognify for: {file_path}")
+            try:
+                await cognee.add(file_path, nodeset="developer_rules")
+                model = KnowledgeGraph
+                if graph_model_file and graph_model_name:
+                    model = load_class(graph_model_file, graph_model_name)
+                await cognee.cognify(graph_model=model)
+                logger.info(f"Cognify finished for: {file_path}")
+            except Exception as e:
+                logger.error(f"Cognify failed for {file_path}: {str(e)}")
+
+    tasks = []
+    for rel_path in developer_rule_paths:
+        abs_path = os.path.join(base_path, rel_path)
+        if os.path.isfile(abs_path):
+            tasks.append(asyncio.create_task(cognify_task(abs_path)))
+        else:
+            logger.warning(f"Skipped missing developer rule file: {abs_path}")
+
+    return [
+        types.TextContent(
+            type="text",
+            text=(
+                f"Started cognify for {len(tasks)} developer rule files in background.\n"
+                f"All are added to the `developer_rules` node set.\n"
+                f"Use `cognify_status` or check logs at {log_file} to monitor progress."
+            ),
+        )
+    ]
+
+
+@mcp.tool()
+async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
+    """
+    Transform data into a structured knowledge graph in Cognee's memory layer.
+
+    This function launches a background task that processes the provided text/file location and
+    generates a knowledge graph representation. The function returns immediately while
+    the processing continues in the background due to MCP timeout constraints.
+
+    Parameters
+    ----------
+    data : str
+        The data to be processed and transformed into structured knowledge.
+        This can include natural language, file location, or any text-based information
+        that should become part of the agent's memory.
+
+    graph_model_file : str, optional
+        Path to a custom schema file that defines the structure of the generated knowledge graph.
+        If provided, this file will be loaded using importlib to create a custom graph model.
+        Default is None, which uses Cognee's built-in KnowledgeGraph model.
+
+    graph_model_name : str, optional
+        Name of the class within the graph_model_file to instantiate as the graph model.
+        Required if graph_model_file is specified.
+        Default is None, which uses the default KnowledgeGraph class.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with information about the
+        background task launch and how to check its status.
+
+    Notes
+    -----
+    - The function launches a background task and returns immediately
+    - The actual cognify process may take significant time depending on text length
+    - Use the cognify_status tool to check the progress of the operation
+    """
+
    async def cognify_task(
-        text: str, graph_model_file: str = None, graph_model_name: str = None
+        data: str, graph_model_file: str = None, graph_model_name: str = None
    ) -> str:
        """Build knowledge graph from the input text"""
        # NOTE: MCP uses stdout to communicate, we must redirect all output
@ -38,7 +156,7 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str
            else:
                graph_model = KnowledgeGraph

-            await cognee.add(text)
+            await cognee.add(data)

            try:
                await cognee.cognify(graph_model=graph_model)
@ -49,7 +167,7 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str

    asyncio.create_task(
        cognify_task(
-            text=text,
+            data=data,
            graph_model_file=graph_model_file,
            graph_model_name=graph_model_name,
        )
@ -71,6 +189,35 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str

@mcp.tool()
 async def codify(repo_path: str) -> list:
+    """
+    Analyze and generate a code-specific knowledge graph from a software repository.
+
+    This function launches a background task that processes the provided repository
+    and builds a code knowledge graph. The function returns immediately while
+    the processing continues in the background due to MCP timeout constraints.
+
+    Parameters
+    ----------
+    repo_path : str
+        Path to the code repository to analyze. This can be a local file path or a
+        relative path to a repository. The path should point to the root of the
+        repository or a specific directory within it.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with information about the
+        background task launch and how to check its status.
+
+    Notes
+    -----
+    - The function launches a background task and returns immediately
+    - The code graph generation may take significant time for larger repositories
+    - Use the codify_status tool to check the progress of the operation
+    - Process results are logged to the standard Cognee log file
+    - All stdout is redirected to stderr to maintain MCP communication integrity
+    """
+
    async def codify_task(repo_path: str):
        # NOTE: MCP uses stdout to communicate, we must redirect all output
        #       going to stdout ( like the print function ) to stderr.
@ -103,6 +250,46 @@ async def codify(repo_path: str) -> list:

@mcp.tool()
 async def search(search_query: str, search_type: str) -> list:
+    """
+    Search the Cognee knowledge graph for information relevant to the query.
+
+    This function executes a search against the Cognee knowledge graph using the
+    specified query and search type. It returns formatted results based on the
+    search type selected.
+
+    Parameters
+    ----------
+    search_query : str
+        The search query in natural language. This can be a question, instruction, or
+        any text that expresses what information is needed from the knowledge graph.
+
+    search_type : str
+        The type of search to perform. Valid options include:
+        - "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory
+        - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data
+        - "CODE": Returns code-related knowledge in JSON format
+        - "CHUNKS": Returns raw text chunks from the knowledge graph
+        - "INSIGHTS": Returns relationships between nodes in readable format
+
+        The search_type is case-insensitive and will be converted to uppercase.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with the search results.
+        The format of the result depends on the search_type:
+        - For CODE: JSON-formatted search results
+        - For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
+        - For CHUNKS: String representation of the raw chunks
+        - For INSIGHTS: Formatted string showing node relationships
+        - For other types: String representation of the search results
+
+    Notes
+    -----
+    - Different search types produce different output formats
+    - The function handles the conversion between Cognee's internal result format and MCP's output format
+    """
+
    async def search_task(search_query: str, search_type: str) -> str:
        """Search the knowledge graph"""
        # NOTE: MCP uses stdout to communicate, we must redirect all output
@ -132,7 +319,24 @@ async def search(search_query: str, search_type: str) -> list:

@mcp.tool()
 async def prune():
-    """Reset the knowledge graph"""
+    """
+    Reset the Cognee knowledge graph by removing all stored information.
+
+    This function performs a complete reset of both the data layer and system layer
+    of the Cognee knowledge graph, removing all nodes, edges, and associated metadata.
+    It is typically used during development or when needing to start fresh with a new
+    knowledge base.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with confirmation of the prune operation.
+
+    Notes
+    -----
+    - This operation cannot be undone. All memory data will be permanently deleted.
+    - The function prunes both data content (using prune_data) and system metadata (using prune_system)
+    """
    with redirect_stdout(sys.stderr):
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
@ -141,7 +345,25 @@ async def prune():

@mcp.tool()
 async def cognify_status():
-    """Get status of cognify pipeline"""
+    """
+    Get the current status of the cognify pipeline.
+
+    This function retrieves information about current and recently completed cognify operations
+    in the main_dataset. It provides details on progress, success/failure status, and statistics
+    about the processed data.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with the status information as a string.
+        The status includes information about active and completed jobs for the cognify_pipeline.
+
+    Notes
+    -----
+    - The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset"
+    - Status information includes job progress, execution time, and completion status
+    - The status is returned in string format for easy reading
+    """
    with redirect_stdout(sys.stderr):
        user = await get_default_user()
        status = await get_pipeline_status(
@ -152,7 +374,25 @@ async def cognify_status():

@mcp.tool()
 async def codify_status():
-    """Get status of codify pipeline"""
+    """
+    Get the current status of the codify pipeline.
+
+    This function retrieves information about current and recently completed codify operations
+    in the codebase dataset. It provides details on progress, success/failure status, and statistics
+    about the processed code repositories.
+
+    Returns
+    -------
+    list
+        A list containing a single TextContent object with the status information as a string.
+        The status includes information about active and completed jobs for the cognify_code_pipeline.
+
+    Notes
+    -----
+    - The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset
+    - Status information includes job progress, execution time, and completion status
+    - The status is returned in string format for easy reading
+    """
    with redirect_stdout(sys.stderr):
        user = await get_default_user()
        status = await get_pipeline_status(