Cognee mcp fixes main (#1196)

<!-- .github/pull_request_template.md -->

## Description
Fix Cognee mcp issues

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-08-04 15:48:04 +02:00 committed by GitHub
parent 5fcc8b7813
commit b46833b476
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 3334 additions and 3201 deletions

View file

@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \
WORKDIR /app WORKDIR /app
# Copy the virtual environment from the uv stage # Copy the virtual environment from the uv stage
COPY --from=uv /usr/local /usr/local
COPY --from=uv /app /app COPY --from=uv /app /app
RUN chmod +x /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh

View file

@ -48,27 +48,27 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
if [ "$DEBUG" = "true" ]; then if [ "$DEBUG" = "true" ]; then
echo "Waiting for the debugger to attach..." echo "Waiting for the debugger to attach..."
if [ "$TRANSPORT_MODE" = "sse" ]; then if [ "$TRANSPORT_MODE" = "sse" ]; then
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
elif [ "$TRANSPORT_MODE" = "http" ]; then elif [ "$TRANSPORT_MODE" = "http" ]; then
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
else else
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio --no-migration
fi fi
else else
if [ "$TRANSPORT_MODE" = "sse" ]; then if [ "$TRANSPORT_MODE" = "sse" ]; then
exec cognee --transport sse exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
elif [ "$TRANSPORT_MODE" = "http" ]; then elif [ "$TRANSPORT_MODE" = "http" ]; then
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
else else
exec cognee --transport stdio exec cognee --transport stdio --no-migration
fi fi
fi fi
else else
if [ "$TRANSPORT_MODE" = "sse" ]; then if [ "$TRANSPORT_MODE" = "sse" ]; then
exec cognee --transport sse exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
elif [ "$TRANSPORT_MODE" = "http" ]; then elif [ "$TRANSPORT_MODE" = "http" ]; then
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
else else
exec cognee --transport stdio exec cognee --transport stdio --no-migration
fi fi
fi fi

View file

@ -8,7 +8,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes. # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
# "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee", # "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]>=0.2.0,<1.0.0", "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.1",
"fastmcp>=2.10.0,<3.0.0", "fastmcp>=2.10.0,<3.0.0",
"mcp>=1.12.0,<2.0.0", "mcp>=1.12.0,<2.0.0",
"uv>=0.6.3,<1.0.0", "uv>=0.6.3,<1.0.0",

View file

@ -123,11 +123,34 @@ async def cognee_add_developer_rules(
@mcp.tool() @mcp.tool()
async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list: async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
""" """
Transform data into a structured knowledge graph in Cognee's memory layer. Transform ingested data into a structured knowledge graph.
This function launches a background task that processes the provided text/file location and This is the core processing step in Cognee that converts raw text and documents
generates a knowledge graph representation. The function returns immediately while into an intelligent knowledge graph. It analyzes content, extracts entities and
the processing continues in the background due to MCP timeout constraints. relationships, and creates semantic connections for enhanced search and reasoning.
Prerequisites:
- **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
- **Data Added**: Must have data previously added via `cognee.add()`
- **Vector Database**: Must be accessible for embeddings storage
- **Graph Database**: Must be accessible for relationship storage
Input Requirements:
- **Content Types**: Works with any text-extractable content including:
* Natural language documents
* Structured data (CSV, JSON)
* Code repositories
* Academic papers and technical documentation
* Mixed multimedia content (with text extraction)
Processing Pipeline:
1. **Document Classification**: Identifies document types and structures
2. **Permission Validation**: Ensures user has processing rights
3. **Text Chunking**: Breaks content into semantically meaningful segments
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
5. **Relationship Detection**: Discovers connections between entities
6. **Graph Construction**: Builds semantic knowledge graph with embeddings
7. **Content Summarization**: Creates hierarchical summaries for navigation
Parameters Parameters
---------- ----------
@ -152,11 +175,60 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
A list containing a single TextContent object with information about the A list containing a single TextContent object with information about the
background task launch and how to check its status. background task launch and how to check its status.
Next Steps:
After successful cognify processing, use search functions to query the knowledge:
```python
import cognee
from cognee import SearchType
# Process your data into knowledge graph
await cognee.cognify()
# Query for insights using different search types:
# 1. Natural language completion with graph context
insights = await cognee.search(
"What are the main themes?",
query_type=SearchType.GRAPH_COMPLETION
)
# 2. Get entity relationships and connections
relationships = await cognee.search(
"connections between concepts",
query_type=SearchType.INSIGHTS
)
# 3. Find relevant document chunks
chunks = await cognee.search(
"specific topic",
query_type=SearchType.CHUNKS
)
```
Environment Variables:
Required:
- LLM_API_KEY: API key for your LLM provider
Optional:
- LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
Notes Notes
----- -----
- The function launches a background task and returns immediately - The function launches a background task and returns immediately
- The actual cognify process may take significant time depending on text length - The actual cognify process may take significant time depending on text length
- Use the cognify_status tool to check the progress of the operation - Use the cognify_status tool to check the progress of the operation
Raises
------
InvalidValueError
If LLM_API_KEY is not set
ValueError
If chunks exceed max token limits (reduce chunk_size)
DatabaseNotCreatedError
If databases are not properly initialized
""" """
async def cognify_task( async def cognify_task(
@ -327,17 +399,69 @@ async def codify(repo_path: str) -> list:
@mcp.tool() @mcp.tool()
async def search(search_query: str, search_type: str) -> list: async def search(search_query: str, search_type: str) -> list:
""" """
Search the Cognee knowledge graph for information relevant to the query. Search and query the knowledge graph for insights, information, and connections.
This function executes a search against the Cognee knowledge graph using the This is the final step in the Cognee workflow that retrieves information from the
specified query and search type. It returns formatted results based on the processed knowledge graph. It supports multiple search modes optimized for different
search type selected. use cases - from simple fact retrieval to complex reasoning and code analysis.
Search Prerequisites:
- **LLM_API_KEY**: Required for GRAPH_COMPLETION and RAG_COMPLETION search types
- **Data Added**: Must have data previously added via `cognee.add()`
- **Knowledge Graph Built**: Must have processed data via `cognee.cognify()`
- **Vector Database**: Must be accessible for semantic search functionality
Search Types & Use Cases:
**GRAPH_COMPLETION** (Recommended):
Natural language Q&A using full graph context and LLM reasoning.
Best for: Complex questions, analysis, summaries, insights.
Returns: Conversational AI responses with graph-backed context.
**RAG_COMPLETION**:
Traditional RAG using document chunks without graph structure.
Best for: Direct document retrieval, specific fact-finding.
Returns: LLM responses based on relevant text chunks.
**INSIGHTS**:
Structured entity relationships and semantic connections.
Best for: Understanding concept relationships, knowledge mapping.
Returns: Formatted relationship data and entity connections.
**CHUNKS**:
Raw text segments that match the query semantically.
Best for: Finding specific passages, citations, exact content.
Returns: Ranked list of relevant text chunks with metadata.
**SUMMARIES**:
Pre-generated hierarchical summaries of content.
Best for: Quick overviews, document abstracts, topic summaries.
Returns: Multi-level summaries from detailed to high-level.
**CODE**:
Code-specific search with syntax and semantic understanding.
Best for: Finding functions, classes, implementation patterns.
Returns: Structured code information with context and relationships.
**CYPHER**:
Direct graph database queries using Cypher syntax.
Best for: Advanced users, specific graph traversals, debugging.
Returns: Raw graph query results.
**FEELING_LUCKY**:
Intelligently selects and runs the most appropriate search type.
Best for: General-purpose queries or when you're unsure which search type is best.
Returns: The results from the automatically selected search type.
Parameters Parameters
---------- ----------
search_query : str search_query : str
The search query in natural language. This can be a question, instruction, or Your question or search query in natural language.
any text that expresses what information is needed from the knowledge graph. Examples:
- "What are the main themes in this research?"
- "How do these concepts relate to each other?"
- "Find information about machine learning algorithms"
- "What functions handle user authentication?"
search_type : str search_type : str
The type of search to perform. Valid options include: The type of search to perform. Valid options include:
@ -346,6 +470,9 @@ async def search(search_query: str, search_type: str) -> list:
- "CODE": Returns code-related knowledge in JSON format - "CODE": Returns code-related knowledge in JSON format
- "CHUNKS": Returns raw text chunks from the knowledge graph - "CHUNKS": Returns raw text chunks from the knowledge graph
- "INSIGHTS": Returns relationships between nodes in readable format - "INSIGHTS": Returns relationships between nodes in readable format
- "SUMMARIES": Returns pre-generated hierarchical summaries
- "CYPHER": Direct graph database queries
- "FEELING_LUCKY": Automatically selects best search type
The search_type is case-insensitive and will be converted to uppercase. The search_type is case-insensitive and will be converted to uppercase.
@ -354,16 +481,45 @@ async def search(search_query: str, search_type: str) -> list:
list list
A list containing a single TextContent object with the search results. A list containing a single TextContent object with the search results.
The format of the result depends on the search_type: The format of the result depends on the search_type:
- For CODE: JSON-formatted search results - **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings
- For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion - **INSIGHTS**: Formatted relationship descriptions and entity connections
- For CHUNKS: String representation of the raw chunks - **CHUNKS**: Relevant text passages with source metadata
- For INSIGHTS: Formatted string showing node relationships - **SUMMARIES**: Hierarchical summaries from general to specific
- For other types: String representation of the search results - **CODE**: Structured code information with context
- **FEELING_LUCKY**: Results in format of automatically selected search type
- **CYPHER**: Raw graph query results
Performance & Optimization:
- **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
- **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
- **INSIGHTS**: Fast, returns structured relationships without LLM processing
- **CHUNKS**: Fastest, pure vector similarity search without LLM
- **SUMMARIES**: Fast, returns pre-computed summaries
- **CODE**: Medium speed, specialized for code understanding
- **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
Environment Variables:
Required for LLM-based search types (GRAPH_COMPLETION, RAG_COMPLETION):
- LLM_API_KEY: API key for your LLM provider
Optional:
- LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses
- VECTOR_DB_PROVIDER: Must match what was used during cognify
- GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
Notes Notes
----- -----
- Different search types produce different output formats - Different search types produce different output formats
- The function handles the conversion between Cognee's internal result format and MCP's output format - The function handles the conversion between Cognee's internal result format and MCP's output format
Raises
------
InvalidValueError
If LLM_API_KEY is not set (for LLM-based search types)
ValueError
If query_text is empty or search parameters are invalid
NoDataError
If no relevant data found for the search query
""" """
async def search_task(search_query: str, search_type: str) -> str: async def search_task(search_query: str, search_type: str) -> str:
@ -782,30 +938,41 @@ async def main():
help="Log level for the HTTP server (default: info)", help="Log level for the HTTP server (default: info)",
) )
args = parser.parse_args() parser.add_argument(
"--no-migration",
# Run Alembic migrations from the main cognee directory where alembic.ini is located default=False,
print("Running database migrations...") action="store_true",
migration_result = subprocess.run( help="Argument stops database migration from being attempted",
["python", "-m", "alembic", "upgrade", "head"],
capture_output=True,
text=True,
cwd=Path(__file__).resolve().parent.parent.parent,
) )
if migration_result.returncode != 0: args = parser.parse_args()
migration_output = migration_result.stderr + migration_result.stdout
# Check for the expected UserAlreadyExists error (which is not critical)
if (
"UserAlreadyExists" in migration_output
or "User default_user@example.com already exists" in migration_output
):
print("Warning: Default user already exists, continuing startup...")
else:
print(f"Migration failed with unexpected error: {migration_output}")
sys.exit(1)
print("Database migrations done.") mcp.settings.host = args.host
mcp.settings.port = args.port
if not args.no_migration:
# Run Alembic migrations from the main cognee directory where alembic.ini is located
logger.info("Running database migrations...")
migration_result = subprocess.run(
["python", "-m", "alembic", "upgrade", "head"],
capture_output=True,
text=True,
cwd=Path(__file__).resolve().parent.parent.parent,
)
if migration_result.returncode != 0:
migration_output = migration_result.stderr + migration_result.stdout
# Check for the expected UserAlreadyExists error (which is not critical)
if (
"UserAlreadyExists" in migration_output
or "User default_user@example.com already exists" in migration_output
):
logger.warning("Warning: Default user already exists, continuing startup...")
else:
logger.error(f"Migration failed with unexpected error: {migration_output}")
sys.exit(1)
logger.info("Database migrations done.")
logger.info(f"Starting MCP server with transport: {args.transport}") logger.info(f"Starting MCP server with transport: {args.transport}")
if args.transport == "stdio": if args.transport == "stdio":

6231
cognee-mcp/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,34 +1,3 @@
# Cognee Docker Compose Configuration
#
# This docker-compose file includes the main Cognee API server and optional services:
#
# BASIC USAGE:
# Start main Cognee API server:
# docker-compose up cognee
#
# MCP SERVER USAGE:
# The MCP (Model Context Protocol) server enables IDE integration with tools like Cursor, Claude Desktop, etc.
#
# Start with MCP server (stdio transport - recommended):
# docker-compose --profile mcp up
#
# Start with MCP server (SSE transport for HTTP access):
# TRANSPORT_MODE=sse docker-compose --profile mcp up
#
# PORT CONFIGURATION:
# - Main Cognee API: http://localhost:8000
# - MCP Server (SSE mode): http://localhost:8001
# - Frontend (UI): http://localhost:3000 (with --profile ui)
#
# DEBUGGING:
# Enable debug mode by setting DEBUG=true in your .env file or:
# DEBUG=true docker-compose --profile mcp up
#
# This exposes debugger ports:
# - Main API debugger: localhost:5678
# - MCP Server debugger: localhost:5679
services: services:
cognee: cognee:
container_name: cognee container_name: cognee
@ -69,15 +38,13 @@ services:
dockerfile: cognee-mcp/Dockerfile dockerfile: cognee-mcp/Dockerfile
volumes: volumes:
- .env:/app/.env - .env:/app/.env
# Optional: Mount local data for ingestion
- ./examples/data:/app/data:ro
environment: environment:
- DEBUG=false # Change to true if debugging - DEBUG=false # Change to true if debugging
- ENVIRONMENT=local - ENVIRONMENT=local
- LOG_LEVEL=INFO - LOG_LEVEL=INFO
- TRANSPORT_MODE=stdio # Use 'sse' for Server-Sent Events over HTTP - TRANSPORT_MODE=sse
# Database configuration - should match the main cognee service # Database configuration - should match the main cognee service
- DB_TYPE=${DB_TYPE:-sqlite} - DB_PROVIDER=${DB_PROVIDER:-sqlite}
- DB_HOST=${DB_HOST:-host.docker.internal} - DB_HOST=${DB_HOST:-host.docker.internal}
- DB_PORT=${DB_PORT:-5432} - DB_PORT=${DB_PORT:-5432}
- DB_NAME=${DB_NAME:-cognee_db} - DB_NAME=${DB_NAME:-cognee_db}
@ -89,11 +56,8 @@ services:
extra_hosts: extra_hosts:
- "host.docker.internal:host-gateway" - "host.docker.internal:host-gateway"
ports: ports:
# Only expose ports when using SSE transport - "8000:8000" # MCP port
- "8001:8000" # MCP SSE port (mapped to avoid conflict with main API) - "5678:5678" # MCP debugger port
- "5679:5678" # MCP debugger port (different from main service)
depends_on:
- cognee
deploy: deploy:
resources: resources:
limits: limits: