Cognee mcp fixes main (#1196)

## Description Fix Cognee mcp issues ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-08-04 15:48:04 +02:00 · 2025-08-04 15:48:04 +02:00 · b46833b476
commit b46833b476
parent 5fcc8b7813
6 changed files with 3334 additions and 3201 deletions
--- a/cognee-mcp/Dockerfile
+++ b/cognee-mcp/Dockerfile
@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \
 WORKDIR /app
 # Copy the virtual environment from the uv stage
 COPY --from=uv /usr/local /usr/local
 COPY --from=uv /app /app
 RUN chmod +x /app/entrypoint.sh
--- a/cognee-mcp/entrypoint.sh
+++ b/cognee-mcp/entrypoint.sh
@ -48,27 +48,27 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
    if [ "$DEBUG" = "true" ]; then
        echo "Waiting for the debugger to attach..."
        if [ "$TRANSPORT_MODE" = "sse" ]; then
-            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse
+            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
        elif [ "$TRANSPORT_MODE" = "http" ]; then
-            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
+            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
        else
-            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio
+            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio --no-migration
        fi
    else
        if [ "$TRANSPORT_MODE" = "sse" ]; then
-            exec cognee --transport sse
+            exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
        elif [ "$TRANSPORT_MODE" = "http" ]; then
-            exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
+            exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
        else
-            exec cognee --transport stdio
+            exec cognee --transport stdio --no-migration
        fi
    fi
 else
    if [ "$TRANSPORT_MODE" = "sse" ]; then
-        exec cognee --transport sse
+        exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
    elif [ "$TRANSPORT_MODE" = "http" ]; then
-        exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
+        exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
    else
-        exec cognee --transport stdio
+        exec cognee --transport stdio --no-migration
    fi
 fi
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -8,7 +8,7 @@ requires-python = ">=3.10"
 dependencies = [
    # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
 #    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
-    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]>=0.2.0,<1.0.0",
+    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.1",
    "fastmcp>=2.10.0,<3.0.0",
    "mcp>=1.12.0,<2.0.0",
    "uv>=0.6.3,<1.0.0",
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -123,11 +123,34 @@ async def cognee_add_developer_rules(
@mcp.tool()
 async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
    """
-    Transform data into a structured knowledge graph in Cognee's memory layer.
+    Transform ingested data into a structured knowledge graph.
-    This function launches a background task that processes the provided text/file location and
+    This is the core processing step in Cognee that converts raw text and documents
-    generates a knowledge graph representation. The function returns immediately while
+    into an intelligent knowledge graph. It analyzes content, extracts entities and
-    the processing continues in the background due to MCP timeout constraints.
+    relationships, and creates semantic connections for enhanced search and reasoning.
    Prerequisites:
        - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
        - **Data Added**: Must have data previously added via `cognee.add()`
        - **Vector Database**: Must be accessible for embeddings storage
        - **Graph Database**: Must be accessible for relationship storage
    Input Requirements:
        - **Content Types**: Works with any text-extractable content including:
            * Natural language documents
            * Structured data (CSV, JSON)
            * Code repositories
            * Academic papers and technical documentation
            * Mixed multimedia content (with text extraction)
    Processing Pipeline:
        1. **Document Classification**: Identifies document types and structures
        2. **Permission Validation**: Ensures user has processing rights
        3. **Text Chunking**: Breaks content into semantically meaningful segments
        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
        5. **Relationship Detection**: Discovers connections between entities
        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
        7. **Content Summarization**: Creates hierarchical summaries for navigation
    Parameters
    ----------
@ -152,11 +175,60 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
        A list containing a single TextContent object with information about the
        background task launch and how to check its status.
    Next Steps:
        After successful cognify processing, use search functions to query the knowledge:
        ```python
        import cognee
        from cognee import SearchType
        # Process your data into knowledge graph
        await cognee.cognify()
        # Query for insights using different search types:
        # 1. Natural language completion with graph context
        insights = await cognee.search(
            "What are the main themes?",
            query_type=SearchType.GRAPH_COMPLETION
        )
        # 2. Get entity relationships and connections
        relationships = await cognee.search(
            "connections between concepts",
            query_type=SearchType.INSIGHTS
        )
        # 3. Find relevant document chunks
        chunks = await cognee.search(
            "specific topic",
            query_type=SearchType.CHUNKS
        )
        ```
    Environment Variables:
        Required:
        - LLM_API_KEY: API key for your LLM provider
        Optional:
        - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
        - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
        - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
    Notes
    -----
    - The function launches a background task and returns immediately
    - The actual cognify process may take significant time depending on text length
    - Use the cognify_status tool to check the progress of the operation
    Raises
    ------
    InvalidValueError
        If LLM_API_KEY is not set
    ValueError
        If chunks exceed max token limits (reduce chunk_size)
    DatabaseNotCreatedError
        If databases are not properly initialized
    """
    async def cognify_task(
@ -327,17 +399,69 @@ async def codify(repo_path: str) -> list:
@mcp.tool()
 async def search(search_query: str, search_type: str) -> list:
    """
-    Search the Cognee knowledge graph for information relevant to the query.
+    Search and query the knowledge graph for insights, information, and connections.
-    This function executes a search against the Cognee knowledge graph using the
+    This is the final step in the Cognee workflow that retrieves information from the
-    specified query and search type. It returns formatted results based on the
+    processed knowledge graph. It supports multiple search modes optimized for different
-    search type selected.
+    use cases - from simple fact retrieval to complex reasoning and code analysis.
    Search Prerequisites:
        - **LLM_API_KEY**: Required for GRAPH_COMPLETION and RAG_COMPLETION search types
        - **Data Added**: Must have data previously added via `cognee.add()`
        - **Knowledge Graph Built**: Must have processed data via `cognee.cognify()`
        - **Vector Database**: Must be accessible for semantic search functionality
    Search Types & Use Cases:
        **GRAPH_COMPLETION** (Recommended):
            Natural language Q&A using full graph context and LLM reasoning.
            Best for: Complex questions, analysis, summaries, insights.
            Returns: Conversational AI responses with graph-backed context.
        **RAG_COMPLETION**:
            Traditional RAG using document chunks without graph structure.
            Best for: Direct document retrieval, specific fact-finding.
            Returns: LLM responses based on relevant text chunks.
        **INSIGHTS**:
            Structured entity relationships and semantic connections.
            Best for: Understanding concept relationships, knowledge mapping.
            Returns: Formatted relationship data and entity connections.
        **CHUNKS**:
            Raw text segments that match the query semantically.
            Best for: Finding specific passages, citations, exact content.
            Returns: Ranked list of relevant text chunks with metadata.
        **SUMMARIES**:
            Pre-generated hierarchical summaries of content.
            Best for: Quick overviews, document abstracts, topic summaries.
            Returns: Multi-level summaries from detailed to high-level.
        **CODE**:
            Code-specific search with syntax and semantic understanding.
            Best for: Finding functions, classes, implementation patterns.
            Returns: Structured code information with context and relationships.
        **CYPHER**:
            Direct graph database queries using Cypher syntax.
            Best for: Advanced users, specific graph traversals, debugging.
            Returns: Raw graph query results.
        **FEELING_LUCKY**:
            Intelligently selects and runs the most appropriate search type.
            Best for: General-purpose queries or when you're unsure which search type is best.
            Returns: The results from the automatically selected search type.
    Parameters
    ----------
    search_query : str
-        The search query in natural language. This can be a question, instruction, or
+        Your question or search query in natural language.
-        any text that expresses what information is needed from the knowledge graph.
+        Examples:
        - "What are the main themes in this research?"
        - "How do these concepts relate to each other?"
        - "Find information about machine learning algorithms"
        - "What functions handle user authentication?"
    search_type : str
        The type of search to perform. Valid options include:
@ -346,6 +470,9 @@ async def search(search_query: str, search_type: str) -> list:
        - "CODE": Returns code-related knowledge in JSON format
        - "CHUNKS": Returns raw text chunks from the knowledge graph
        - "INSIGHTS": Returns relationships between nodes in readable format
        - "SUMMARIES": Returns pre-generated hierarchical summaries
        - "CYPHER": Direct graph database queries
        - "FEELING_LUCKY": Automatically selects best search type
        The search_type is case-insensitive and will be converted to uppercase.
@ -354,16 +481,45 @@ async def search(search_query: str, search_type: str) -> list:
    list
        A list containing a single TextContent object with the search results.
        The format of the result depends on the search_type:
-        - For CODE: JSON-formatted search results
+        - **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings
-        - For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
+        - **INSIGHTS**: Formatted relationship descriptions and entity connections
-        - For CHUNKS: String representation of the raw chunks
+        - **CHUNKS**: Relevant text passages with source metadata
-        - For INSIGHTS: Formatted string showing node relationships
+        - **SUMMARIES**: Hierarchical summaries from general to specific
-        - For other types: String representation of the search results
+        - **CODE**: Structured code information with context
        - **FEELING_LUCKY**: Results in format of automatically selected search type
        - **CYPHER**: Raw graph query results
    Performance & Optimization:
        - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
        - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
        - **INSIGHTS**: Fast, returns structured relationships without LLM processing
        - **CHUNKS**: Fastest, pure vector similarity search without LLM
        - **SUMMARIES**: Fast, returns pre-computed summaries
        - **CODE**: Medium speed, specialized for code understanding
        - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
    Environment Variables:
        Required for LLM-based search types (GRAPH_COMPLETION, RAG_COMPLETION):
        - LLM_API_KEY: API key for your LLM provider
        Optional:
        - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses
        - VECTOR_DB_PROVIDER: Must match what was used during cognify
        - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
    Notes
    -----
    - Different search types produce different output formats
    - The function handles the conversion between Cognee's internal result format and MCP's output format
    Raises
    ------
    InvalidValueError
        If LLM_API_KEY is not set (for LLM-based search types)
    ValueError
        If query_text is empty or search parameters are invalid
    NoDataError
        If no relevant data found for the search query
    """
    async def search_task(search_query: str, search_type: str) -> str:
@ -782,30 +938,41 @@ async def main():
        help="Log level for the HTTP server (default: info)",
    )
-    args = parser.parse_args()
+    parser.add_argument(
-
+        "--no-migration",
-    # Run Alembic migrations from the main cognee directory where alembic.ini is located
+        default=False,
-    print("Running database migrations...")
+        action="store_true",
-    migration_result = subprocess.run(
+        help="Argument stops database migration from being attempted",
        ["python", "-m", "alembic", "upgrade", "head"],
        capture_output=True,
        text=True,
        cwd=Path(__file__).resolve().parent.parent.parent,
    )
-    if migration_result.returncode != 0:
+    args = parser.parse_args()
        migration_output = migration_result.stderr + migration_result.stdout
        # Check for the expected UserAlreadyExists error (which is not critical)
        if (
            "UserAlreadyExists" in migration_output
            or "User default_user@example.com already exists" in migration_output
        ):
            print("Warning: Default user already exists, continuing startup...")
        else:
            print(f"Migration failed with unexpected error: {migration_output}")
            sys.exit(1)
-    print("Database migrations done.")
+    mcp.settings.host = args.host
    mcp.settings.port = args.port
    if not args.no_migration:
        # Run Alembic migrations from the main cognee directory where alembic.ini is located
        logger.info("Running database migrations...")
        migration_result = subprocess.run(
            ["python", "-m", "alembic", "upgrade", "head"],
            capture_output=True,
            text=True,
            cwd=Path(__file__).resolve().parent.parent.parent,
        )
        if migration_result.returncode != 0:
            migration_output = migration_result.stderr + migration_result.stdout
            # Check for the expected UserAlreadyExists error (which is not critical)
            if (
                "UserAlreadyExists" in migration_output
                or "User default_user@example.com already exists" in migration_output
            ):
                logger.warning("Warning: Default user already exists, continuing startup...")
            else:
                logger.error(f"Migration failed with unexpected error: {migration_output}")
                sys.exit(1)
        logger.info("Database migrations done.")
    logger.info(f"Starting MCP server with transport: {args.transport}")
    if args.transport == "stdio":
--- a/cognee-mcp/uv.lock
+++ b/cognee-mcp/uv.lock
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,34 +1,3 @@
 # Cognee Docker Compose Configuration
 #
 # This docker-compose file includes the main Cognee API server and optional services:
 #
 # BASIC USAGE:
 # Start main Cognee API server:
 #   docker-compose up cognee
 #
 # MCP SERVER USAGE:
 # The MCP (Model Context Protocol) server enables IDE integration with tools like Cursor, Claude Desktop, etc.
 # 
 # Start with MCP server (stdio transport - recommended):
 #   docker-compose --profile mcp up
 #
 # Start with MCP server (SSE transport for HTTP access):
 #   TRANSPORT_MODE=sse docker-compose --profile mcp up
 #
 # PORT CONFIGURATION:
 # - Main Cognee API: http://localhost:8000
 # - MCP Server (SSE mode): http://localhost:8001 
 # - Frontend (UI): http://localhost:3000 (with --profile ui)
 #
 # DEBUGGING:
 # Enable debug mode by setting DEBUG=true in your .env file or:
 #   DEBUG=true docker-compose --profile mcp up
 #
 # This exposes debugger ports:
 # - Main API debugger: localhost:5678
 # - MCP Server debugger: localhost:5679
 services:
  cognee:
    container_name: cognee
@ -69,15 +38,13 @@ services:
      dockerfile: cognee-mcp/Dockerfile
    volumes:
      - .env:/app/.env
      # Optional: Mount local data for ingestion
      - ./examples/data:/app/data:ro
    environment:
      - DEBUG=false # Change to true if debugging
      - ENVIRONMENT=local
      - LOG_LEVEL=INFO
-      - TRANSPORT_MODE=stdio # Use 'sse' for Server-Sent Events over HTTP
+      - TRANSPORT_MODE=sse
      # Database configuration - should match the main cognee service
-      - DB_TYPE=${DB_TYPE:-sqlite}
+      - DB_PROVIDER=${DB_PROVIDER:-sqlite}
      - DB_HOST=${DB_HOST:-host.docker.internal}
      - DB_PORT=${DB_PORT:-5432}
      - DB_NAME=${DB_NAME:-cognee_db}
@ -89,11 +56,8 @@ services:
    extra_hosts:
      - "host.docker.internal:host-gateway"
    ports:
-      # Only expose ports when using SSE transport
+      - "8000:8000" # MCP port
-      - "8001:8000" # MCP SSE port (mapped to avoid conflict with main API)
+      - "5678:5678" # MCP debugger port
      - "5679:5678" # MCP debugger port (different from main service)
    depends_on:
      - cognee
    deploy:
      resources:
        limits: