Cognee mcp fixes main (#1196)

## Description Fix Cognee mcp issues ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-08-04 15:48:04 +02:00 · 2025-08-04 15:48:04 +02:00 · b46833b476
commit b46833b476
parent 5fcc8b7813
6 changed files with 3334 additions and 3201 deletions
--- a/cognee-mcp/Dockerfile
+++ b/cognee-mcp/Dockerfile
@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \
 WORKDIR /app

 # Copy the virtual environment from the uv stage
+COPY --from=uv /usr/local /usr/local
 COPY --from=uv /app /app

 RUN chmod +x /app/entrypoint.sh
--- a/cognee-mcp/entrypoint.sh
+++ b/cognee-mcp/entrypoint.sh
@ -48,27 +48,27 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
    if [ "$DEBUG" = "true" ]; then
        echo "Waiting for the debugger to attach..."
        if [ "$TRANSPORT_MODE" = "sse" ]; then
-            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse
+            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
        elif [ "$TRANSPORT_MODE" = "http" ]; then
-            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
+            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
        else
-            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio
+            exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio --no-migration
        fi
    else
        if [ "$TRANSPORT_MODE" = "sse" ]; then
-            exec cognee --transport sse
+            exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
        elif [ "$TRANSPORT_MODE" = "http" ]; then
-            exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
+            exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
        else
-            exec cognee --transport stdio
+            exec cognee --transport stdio --no-migration
        fi
    fi
 else
    if [ "$TRANSPORT_MODE" = "sse" ]; then
-        exec cognee --transport sse
+        exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
    elif [ "$TRANSPORT_MODE" = "http" ]; then
-        exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
+        exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
    else
-        exec cognee --transport stdio
+        exec cognee --transport stdio --no-migration
    fi
 fi
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -8,7 +8,7 @@ requires-python = ">=3.10"
 dependencies = [
    # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
 #    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
-    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]>=0.2.0,<1.0.0",
+    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.1",
    "fastmcp>=2.10.0,<3.0.0",
    "mcp>=1.12.0,<2.0.0",
    "uv>=0.6.3,<1.0.0",
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -123,11 +123,34 @@ async def cognee_add_developer_rules(
@mcp.tool()
 async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
    """
-    Transform data into a structured knowledge graph in Cognee's memory layer.
+    Transform ingested data into a structured knowledge graph.

-    This function launches a background task that processes the provided text/file location and
-    generates a knowledge graph representation. The function returns immediately while
-    the processing continues in the background due to MCP timeout constraints.
+    This is the core processing step in Cognee that converts raw text and documents
+    into an intelligent knowledge graph. It analyzes content, extracts entities and
+    relationships, and creates semantic connections for enhanced search and reasoning.
+
+    Prerequisites:
+        - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
+        - **Data Added**: Must have data previously added via `cognee.add()`
+        - **Vector Database**: Must be accessible for embeddings storage
+        - **Graph Database**: Must be accessible for relationship storage
+
+    Input Requirements:
+        - **Content Types**: Works with any text-extractable content including:
+            * Natural language documents
+            * Structured data (CSV, JSON)
+            * Code repositories
+            * Academic papers and technical documentation
+            * Mixed multimedia content (with text extraction)
+
+    Processing Pipeline:
+        1. **Document Classification**: Identifies document types and structures
+        2. **Permission Validation**: Ensures user has processing rights
+        3. **Text Chunking**: Breaks content into semantically meaningful segments
+        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
+        5. **Relationship Detection**: Discovers connections between entities
+        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
+        7. **Content Summarization**: Creates hierarchical summaries for navigation

    Parameters
    ----------
@ -152,11 +175,60 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
        A list containing a single TextContent object with information about the
        background task launch and how to check its status.

+    Next Steps:
+        After successful cognify processing, use search functions to query the knowledge:
+
+        ```python
+        import cognee
+        from cognee import SearchType
+
+        # Process your data into knowledge graph
+        await cognee.cognify()
+
+        # Query for insights using different search types:
+
+        # 1. Natural language completion with graph context
+        insights = await cognee.search(
+            "What are the main themes?",
+            query_type=SearchType.GRAPH_COMPLETION
+        )
+
+        # 2. Get entity relationships and connections
+        relationships = await cognee.search(
+            "connections between concepts",
+            query_type=SearchType.INSIGHTS
+        )
+
+        # 3. Find relevant document chunks
+        chunks = await cognee.search(
+            "specific topic",
+            query_type=SearchType.CHUNKS
+        )
+        ```
+
+    Environment Variables:
+        Required:
+        - LLM_API_KEY: API key for your LLM provider
+
+        Optional:
+        - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
+        - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
+        - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
+
    Notes
    -----
    - The function launches a background task and returns immediately
    - The actual cognify process may take significant time depending on text length
    - Use the cognify_status tool to check the progress of the operation
+
+    Raises
+    ------
+    InvalidValueError
+        If LLM_API_KEY is not set
+    ValueError
+        If chunks exceed max token limits (reduce chunk_size)
+    DatabaseNotCreatedError
+        If databases are not properly initialized
    """

    async def cognify_task(
@ -327,17 +399,69 @@ async def codify(repo_path: str) -> list:
@mcp.tool()
 async def search(search_query: str, search_type: str) -> list:
    """
-    Search the Cognee knowledge graph for information relevant to the query.
+    Search and query the knowledge graph for insights, information, and connections.

-    This function executes a search against the Cognee knowledge graph using the
-    specified query and search type. It returns formatted results based on the
-    search type selected.
+    This is the final step in the Cognee workflow that retrieves information from the
+    processed knowledge graph. It supports multiple search modes optimized for different
+    use cases - from simple fact retrieval to complex reasoning and code analysis.
+
+    Search Prerequisites:
+        - **LLM_API_KEY**: Required for GRAPH_COMPLETION and RAG_COMPLETION search types
+        - **Data Added**: Must have data previously added via `cognee.add()`
+        - **Knowledge Graph Built**: Must have processed data via `cognee.cognify()`
+        - **Vector Database**: Must be accessible for semantic search functionality
+
+    Search Types & Use Cases:
+
+        **GRAPH_COMPLETION** (Recommended):
+            Natural language Q&A using full graph context and LLM reasoning.
+            Best for: Complex questions, analysis, summaries, insights.
+            Returns: Conversational AI responses with graph-backed context.
+
+        **RAG_COMPLETION**:
+            Traditional RAG using document chunks without graph structure.
+            Best for: Direct document retrieval, specific fact-finding.
+            Returns: LLM responses based on relevant text chunks.
+
+        **INSIGHTS**:
+            Structured entity relationships and semantic connections.
+            Best for: Understanding concept relationships, knowledge mapping.
+            Returns: Formatted relationship data and entity connections.
+
+        **CHUNKS**:
+            Raw text segments that match the query semantically.
+            Best for: Finding specific passages, citations, exact content.
+            Returns: Ranked list of relevant text chunks with metadata.
+
+        **SUMMARIES**:
+            Pre-generated hierarchical summaries of content.
+            Best for: Quick overviews, document abstracts, topic summaries.
+            Returns: Multi-level summaries from detailed to high-level.
+
+        **CODE**:
+            Code-specific search with syntax and semantic understanding.
+            Best for: Finding functions, classes, implementation patterns.
+            Returns: Structured code information with context and relationships.
+
+        **CYPHER**:
+            Direct graph database queries using Cypher syntax.
+            Best for: Advanced users, specific graph traversals, debugging.
+            Returns: Raw graph query results.
+
+        **FEELING_LUCKY**:
+            Intelligently selects and runs the most appropriate search type.
+            Best for: General-purpose queries or when you're unsure which search type is best.
+            Returns: The results from the automatically selected search type.

    Parameters
    ----------
    search_query : str
-        The search query in natural language. This can be a question, instruction, or
-        any text that expresses what information is needed from the knowledge graph.
+        Your question or search query in natural language.
+        Examples:
+        - "What are the main themes in this research?"
+        - "How do these concepts relate to each other?"
+        - "Find information about machine learning algorithms"
+        - "What functions handle user authentication?"

    search_type : str
        The type of search to perform. Valid options include:
@ -346,6 +470,9 @@ async def search(search_query: str, search_type: str) -> list:
        - "CODE": Returns code-related knowledge in JSON format
        - "CHUNKS": Returns raw text chunks from the knowledge graph
        - "INSIGHTS": Returns relationships between nodes in readable format
+        - "SUMMARIES": Returns pre-generated hierarchical summaries
+        - "CYPHER": Direct graph database queries
+        - "FEELING_LUCKY": Automatically selects best search type

        The search_type is case-insensitive and will be converted to uppercase.

@ -354,16 +481,45 @@ async def search(search_query: str, search_type: str) -> list:
    list
        A list containing a single TextContent object with the search results.
        The format of the result depends on the search_type:
-        - For CODE: JSON-formatted search results
-        - For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
-        - For CHUNKS: String representation of the raw chunks
-        - For INSIGHTS: Formatted string showing node relationships
-        - For other types: String representation of the search results
+        - **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings
+        - **INSIGHTS**: Formatted relationship descriptions and entity connections
+        - **CHUNKS**: Relevant text passages with source metadata
+        - **SUMMARIES**: Hierarchical summaries from general to specific
+        - **CODE**: Structured code information with context
+        - **FEELING_LUCKY**: Results in format of automatically selected search type
+        - **CYPHER**: Raw graph query results
+
+    Performance & Optimization:
+        - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
+        - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
+        - **INSIGHTS**: Fast, returns structured relationships without LLM processing
+        - **CHUNKS**: Fastest, pure vector similarity search without LLM
+        - **SUMMARIES**: Fast, returns pre-computed summaries
+        - **CODE**: Medium speed, specialized for code understanding
+        - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
+
+    Environment Variables:
+        Required for LLM-based search types (GRAPH_COMPLETION, RAG_COMPLETION):
+        - LLM_API_KEY: API key for your LLM provider
+
+        Optional:
+        - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses
+        - VECTOR_DB_PROVIDER: Must match what was used during cognify
+        - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify

    Notes
    -----
    - Different search types produce different output formats
    - The function handles the conversion between Cognee's internal result format and MCP's output format
+
+    Raises
+    ------
+    InvalidValueError
+        If LLM_API_KEY is not set (for LLM-based search types)
+    ValueError
+        If query_text is empty or search parameters are invalid
+    NoDataError
+        If no relevant data found for the search query
    """

    async def search_task(search_query: str, search_type: str) -> str:
@ -782,30 +938,41 @@ async def main():
        help="Log level for the HTTP server (default: info)",
    )

-    args = parser.parse_args()
-
-    # Run Alembic migrations from the main cognee directory where alembic.ini is located
-    print("Running database migrations...")
-    migration_result = subprocess.run(
-        ["python", "-m", "alembic", "upgrade", "head"],
-        capture_output=True,
-        text=True,
-        cwd=Path(__file__).resolve().parent.parent.parent,
+    parser.add_argument(
+        "--no-migration",
+        default=False,
+        action="store_true",
+        help="Argument stops database migration from being attempted",
    )

-    if migration_result.returncode != 0:
-        migration_output = migration_result.stderr + migration_result.stdout
-        # Check for the expected UserAlreadyExists error (which is not critical)
-        if (
-            "UserAlreadyExists" in migration_output
-            or "User default_user@example.com already exists" in migration_output
-        ):
-            print("Warning: Default user already exists, continuing startup...")
-        else:
-            print(f"Migration failed with unexpected error: {migration_output}")
-            sys.exit(1)
+    args = parser.parse_args()

-    print("Database migrations done.")
+    mcp.settings.host = args.host
+    mcp.settings.port = args.port
+
+    if not args.no_migration:
+        # Run Alembic migrations from the main cognee directory where alembic.ini is located
+        logger.info("Running database migrations...")
+        migration_result = subprocess.run(
+            ["python", "-m", "alembic", "upgrade", "head"],
+            capture_output=True,
+            text=True,
+            cwd=Path(__file__).resolve().parent.parent.parent,
+        )
+
+        if migration_result.returncode != 0:
+            migration_output = migration_result.stderr + migration_result.stdout
+            # Check for the expected UserAlreadyExists error (which is not critical)
+            if (
+                "UserAlreadyExists" in migration_output
+                or "User default_user@example.com already exists" in migration_output
+            ):
+                logger.warning("Warning: Default user already exists, continuing startup...")
+            else:
+                logger.error(f"Migration failed with unexpected error: {migration_output}")
+                sys.exit(1)
+
+        logger.info("Database migrations done.")

    logger.info(f"Starting MCP server with transport: {args.transport}")
    if args.transport == "stdio":
--- a/cognee-mcp/uv.lock
+++ b/cognee-mcp/uv.lock
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,34 +1,3 @@
-# Cognee Docker Compose Configuration
-#
-# This docker-compose file includes the main Cognee API server and optional services:
-#
-# BASIC USAGE:
-# Start main Cognee API server:
-#   docker-compose up cognee
-#
-# MCP SERVER USAGE:
-# The MCP (Model Context Protocol) server enables IDE integration with tools like Cursor, Claude Desktop, etc.
-# 
-# Start with MCP server (stdio transport - recommended):
-#   docker-compose --profile mcp up
-#
-# Start with MCP server (SSE transport for HTTP access):
-#   TRANSPORT_MODE=sse docker-compose --profile mcp up
-#
-# PORT CONFIGURATION:
-# - Main Cognee API: http://localhost:8000
-# - MCP Server (SSE mode): http://localhost:8001 
-# - Frontend (UI): http://localhost:3000 (with --profile ui)
-
-#
-# DEBUGGING:
-# Enable debug mode by setting DEBUG=true in your .env file or:
-#   DEBUG=true docker-compose --profile mcp up
-#
-# This exposes debugger ports:
-# - Main API debugger: localhost:5678
-# - MCP Server debugger: localhost:5679
-
 services:
  cognee:
    container_name: cognee
@ -69,15 +38,13 @@ services:
      dockerfile: cognee-mcp/Dockerfile
    volumes:
      - .env:/app/.env
-      # Optional: Mount local data for ingestion
-      - ./examples/data:/app/data:ro
    environment:
      - DEBUG=false # Change to true if debugging
      - ENVIRONMENT=local
      - LOG_LEVEL=INFO
-      - TRANSPORT_MODE=stdio # Use 'sse' for Server-Sent Events over HTTP
+      - TRANSPORT_MODE=sse
      # Database configuration - should match the main cognee service
-      - DB_TYPE=${DB_TYPE:-sqlite}
+      - DB_PROVIDER=${DB_PROVIDER:-sqlite}
      - DB_HOST=${DB_HOST:-host.docker.internal}
      - DB_PORT=${DB_PORT:-5432}
      - DB_NAME=${DB_NAME:-cognee_db}
@ -89,11 +56,8 @@ services:
    extra_hosts:
      - "host.docker.internal:host-gateway"
    ports:
-      # Only expose ports when using SSE transport
-      - "8001:8000" # MCP SSE port (mapped to avoid conflict with main API)
-      - "5679:5678" # MCP debugger port (different from main service)
-    depends_on:
-      - cognee
+      - "8000:8000" # MCP port
+      - "5678:5678" # MCP debugger port
    deploy:
      resources:
        limits: