Cognee mcp fixes main (#1196)
<!-- .github/pull_request_template.md --> ## Description Fix Cognee mcp issues ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
5fcc8b7813
commit
b46833b476
6 changed files with 3334 additions and 3201 deletions
|
|
@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \
|
|||
WORKDIR /app
|
||||
|
||||
# Copy the virtual environment from the uv stage
|
||||
COPY --from=uv /usr/local /usr/local
|
||||
COPY --from=uv /app /app
|
||||
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
|
|
|||
|
|
@ -48,27 +48,27 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
|
|||
if [ "$DEBUG" = "true" ]; then
|
||||
echo "Waiting for the debugger to attach..."
|
||||
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
elif [ "$TRANSPORT_MODE" = "http" ]; then
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
else
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio --no-migration
|
||||
fi
|
||||
else
|
||||
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||
exec cognee --transport sse
|
||||
exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
elif [ "$TRANSPORT_MODE" = "http" ]; then
|
||||
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
|
||||
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
else
|
||||
exec cognee --transport stdio
|
||||
exec cognee --transport stdio --no-migration
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||
exec cognee --transport sse
|
||||
exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
elif [ "$TRANSPORT_MODE" = "http" ]; then
|
||||
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT
|
||||
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
else
|
||||
exec cognee --transport stdio
|
||||
exec cognee --transport stdio --no-migration
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ requires-python = ">=3.10"
|
|||
dependencies = [
|
||||
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
|
||||
# "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
|
||||
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]>=0.2.0,<1.0.0",
|
||||
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.1",
|
||||
"fastmcp>=2.10.0,<3.0.0",
|
||||
"mcp>=1.12.0,<2.0.0",
|
||||
"uv>=0.6.3,<1.0.0",
|
||||
|
|
|
|||
|
|
@ -123,11 +123,34 @@ async def cognee_add_developer_rules(
|
|||
@mcp.tool()
|
||||
async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
|
||||
"""
|
||||
Transform data into a structured knowledge graph in Cognee's memory layer.
|
||||
Transform ingested data into a structured knowledge graph.
|
||||
|
||||
This function launches a background task that processes the provided text/file location and
|
||||
generates a knowledge graph representation. The function returns immediately while
|
||||
the processing continues in the background due to MCP timeout constraints.
|
||||
This is the core processing step in Cognee that converts raw text and documents
|
||||
into an intelligent knowledge graph. It analyzes content, extracts entities and
|
||||
relationships, and creates semantic connections for enhanced search and reasoning.
|
||||
|
||||
Prerequisites:
|
||||
- **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
|
||||
- **Data Added**: Must have data previously added via `cognee.add()`
|
||||
- **Vector Database**: Must be accessible for embeddings storage
|
||||
- **Graph Database**: Must be accessible for relationship storage
|
||||
|
||||
Input Requirements:
|
||||
- **Content Types**: Works with any text-extractable content including:
|
||||
* Natural language documents
|
||||
* Structured data (CSV, JSON)
|
||||
* Code repositories
|
||||
* Academic papers and technical documentation
|
||||
* Mixed multimedia content (with text extraction)
|
||||
|
||||
Processing Pipeline:
|
||||
1. **Document Classification**: Identifies document types and structures
|
||||
2. **Permission Validation**: Ensures user has processing rights
|
||||
3. **Text Chunking**: Breaks content into semantically meaningful segments
|
||||
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
|
||||
5. **Relationship Detection**: Discovers connections between entities
|
||||
6. **Graph Construction**: Builds semantic knowledge graph with embeddings
|
||||
7. **Content Summarization**: Creates hierarchical summaries for navigation
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
|
@ -152,11 +175,60 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
|
|||
A list containing a single TextContent object with information about the
|
||||
background task launch and how to check its status.
|
||||
|
||||
Next Steps:
|
||||
After successful cognify processing, use search functions to query the knowledge:
|
||||
|
||||
```python
|
||||
import cognee
|
||||
from cognee import SearchType
|
||||
|
||||
# Process your data into knowledge graph
|
||||
await cognee.cognify()
|
||||
|
||||
# Query for insights using different search types:
|
||||
|
||||
# 1. Natural language completion with graph context
|
||||
insights = await cognee.search(
|
||||
"What are the main themes?",
|
||||
query_type=SearchType.GRAPH_COMPLETION
|
||||
)
|
||||
|
||||
# 2. Get entity relationships and connections
|
||||
relationships = await cognee.search(
|
||||
"connections between concepts",
|
||||
query_type=SearchType.INSIGHTS
|
||||
)
|
||||
|
||||
# 3. Find relevant document chunks
|
||||
chunks = await cognee.search(
|
||||
"specific topic",
|
||||
query_type=SearchType.CHUNKS
|
||||
)
|
||||
```
|
||||
|
||||
Environment Variables:
|
||||
Required:
|
||||
- LLM_API_KEY: API key for your LLM provider
|
||||
|
||||
Optional:
|
||||
- LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
|
||||
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
||||
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
||||
|
||||
Notes
|
||||
-----
|
||||
- The function launches a background task and returns immediately
|
||||
- The actual cognify process may take significant time depending on text length
|
||||
- Use the cognify_status tool to check the progress of the operation
|
||||
|
||||
Raises
|
||||
------
|
||||
InvalidValueError
|
||||
If LLM_API_KEY is not set
|
||||
ValueError
|
||||
If chunks exceed max token limits (reduce chunk_size)
|
||||
DatabaseNotCreatedError
|
||||
If databases are not properly initialized
|
||||
"""
|
||||
|
||||
async def cognify_task(
|
||||
|
|
@ -327,17 +399,69 @@ async def codify(repo_path: str) -> list:
|
|||
@mcp.tool()
|
||||
async def search(search_query: str, search_type: str) -> list:
|
||||
"""
|
||||
Search the Cognee knowledge graph for information relevant to the query.
|
||||
Search and query the knowledge graph for insights, information, and connections.
|
||||
|
||||
This function executes a search against the Cognee knowledge graph using the
|
||||
specified query and search type. It returns formatted results based on the
|
||||
search type selected.
|
||||
This is the final step in the Cognee workflow that retrieves information from the
|
||||
processed knowledge graph. It supports multiple search modes optimized for different
|
||||
use cases - from simple fact retrieval to complex reasoning and code analysis.
|
||||
|
||||
Search Prerequisites:
|
||||
- **LLM_API_KEY**: Required for GRAPH_COMPLETION and RAG_COMPLETION search types
|
||||
- **Data Added**: Must have data previously added via `cognee.add()`
|
||||
- **Knowledge Graph Built**: Must have processed data via `cognee.cognify()`
|
||||
- **Vector Database**: Must be accessible for semantic search functionality
|
||||
|
||||
Search Types & Use Cases:
|
||||
|
||||
**GRAPH_COMPLETION** (Recommended):
|
||||
Natural language Q&A using full graph context and LLM reasoning.
|
||||
Best for: Complex questions, analysis, summaries, insights.
|
||||
Returns: Conversational AI responses with graph-backed context.
|
||||
|
||||
**RAG_COMPLETION**:
|
||||
Traditional RAG using document chunks without graph structure.
|
||||
Best for: Direct document retrieval, specific fact-finding.
|
||||
Returns: LLM responses based on relevant text chunks.
|
||||
|
||||
**INSIGHTS**:
|
||||
Structured entity relationships and semantic connections.
|
||||
Best for: Understanding concept relationships, knowledge mapping.
|
||||
Returns: Formatted relationship data and entity connections.
|
||||
|
||||
**CHUNKS**:
|
||||
Raw text segments that match the query semantically.
|
||||
Best for: Finding specific passages, citations, exact content.
|
||||
Returns: Ranked list of relevant text chunks with metadata.
|
||||
|
||||
**SUMMARIES**:
|
||||
Pre-generated hierarchical summaries of content.
|
||||
Best for: Quick overviews, document abstracts, topic summaries.
|
||||
Returns: Multi-level summaries from detailed to high-level.
|
||||
|
||||
**CODE**:
|
||||
Code-specific search with syntax and semantic understanding.
|
||||
Best for: Finding functions, classes, implementation patterns.
|
||||
Returns: Structured code information with context and relationships.
|
||||
|
||||
**CYPHER**:
|
||||
Direct graph database queries using Cypher syntax.
|
||||
Best for: Advanced users, specific graph traversals, debugging.
|
||||
Returns: Raw graph query results.
|
||||
|
||||
**FEELING_LUCKY**:
|
||||
Intelligently selects and runs the most appropriate search type.
|
||||
Best for: General-purpose queries or when you're unsure which search type is best.
|
||||
Returns: The results from the automatically selected search type.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
search_query : str
|
||||
The search query in natural language. This can be a question, instruction, or
|
||||
any text that expresses what information is needed from the knowledge graph.
|
||||
Your question or search query in natural language.
|
||||
Examples:
|
||||
- "What are the main themes in this research?"
|
||||
- "How do these concepts relate to each other?"
|
||||
- "Find information about machine learning algorithms"
|
||||
- "What functions handle user authentication?"
|
||||
|
||||
search_type : str
|
||||
The type of search to perform. Valid options include:
|
||||
|
|
@ -346,6 +470,9 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
- "CODE": Returns code-related knowledge in JSON format
|
||||
- "CHUNKS": Returns raw text chunks from the knowledge graph
|
||||
- "INSIGHTS": Returns relationships between nodes in readable format
|
||||
- "SUMMARIES": Returns pre-generated hierarchical summaries
|
||||
- "CYPHER": Direct graph database queries
|
||||
- "FEELING_LUCKY": Automatically selects best search type
|
||||
|
||||
The search_type is case-insensitive and will be converted to uppercase.
|
||||
|
||||
|
|
@ -354,16 +481,45 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
list
|
||||
A list containing a single TextContent object with the search results.
|
||||
The format of the result depends on the search_type:
|
||||
- For CODE: JSON-formatted search results
|
||||
- For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
|
||||
- For CHUNKS: String representation of the raw chunks
|
||||
- For INSIGHTS: Formatted string showing node relationships
|
||||
- For other types: String representation of the search results
|
||||
- **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings
|
||||
- **INSIGHTS**: Formatted relationship descriptions and entity connections
|
||||
- **CHUNKS**: Relevant text passages with source metadata
|
||||
- **SUMMARIES**: Hierarchical summaries from general to specific
|
||||
- **CODE**: Structured code information with context
|
||||
- **FEELING_LUCKY**: Results in format of automatically selected search type
|
||||
- **CYPHER**: Raw graph query results
|
||||
|
||||
Performance & Optimization:
|
||||
- **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
|
||||
- **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
|
||||
- **INSIGHTS**: Fast, returns structured relationships without LLM processing
|
||||
- **CHUNKS**: Fastest, pure vector similarity search without LLM
|
||||
- **SUMMARIES**: Fast, returns pre-computed summaries
|
||||
- **CODE**: Medium speed, specialized for code understanding
|
||||
- **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
|
||||
|
||||
Environment Variables:
|
||||
Required for LLM-based search types (GRAPH_COMPLETION, RAG_COMPLETION):
|
||||
- LLM_API_KEY: API key for your LLM provider
|
||||
|
||||
Optional:
|
||||
- LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses
|
||||
- VECTOR_DB_PROVIDER: Must match what was used during cognify
|
||||
- GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
|
||||
|
||||
Notes
|
||||
-----
|
||||
- Different search types produce different output formats
|
||||
- The function handles the conversion between Cognee's internal result format and MCP's output format
|
||||
|
||||
Raises
|
||||
------
|
||||
InvalidValueError
|
||||
If LLM_API_KEY is not set (for LLM-based search types)
|
||||
ValueError
|
||||
If query_text is empty or search parameters are invalid
|
||||
NoDataError
|
||||
If no relevant data found for the search query
|
||||
"""
|
||||
|
||||
async def search_task(search_query: str, search_type: str) -> str:
|
||||
|
|
@ -782,30 +938,41 @@ async def main():
|
|||
help="Log level for the HTTP server (default: info)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Run Alembic migrations from the main cognee directory where alembic.ini is located
|
||||
print("Running database migrations...")
|
||||
migration_result = subprocess.run(
|
||||
["python", "-m", "alembic", "upgrade", "head"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=Path(__file__).resolve().parent.parent.parent,
|
||||
parser.add_argument(
|
||||
"--no-migration",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Argument stops database migration from being attempted",
|
||||
)
|
||||
|
||||
if migration_result.returncode != 0:
|
||||
migration_output = migration_result.stderr + migration_result.stdout
|
||||
# Check for the expected UserAlreadyExists error (which is not critical)
|
||||
if (
|
||||
"UserAlreadyExists" in migration_output
|
||||
or "User default_user@example.com already exists" in migration_output
|
||||
):
|
||||
print("Warning: Default user already exists, continuing startup...")
|
||||
else:
|
||||
print(f"Migration failed with unexpected error: {migration_output}")
|
||||
sys.exit(1)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Database migrations done.")
|
||||
mcp.settings.host = args.host
|
||||
mcp.settings.port = args.port
|
||||
|
||||
if not args.no_migration:
|
||||
# Run Alembic migrations from the main cognee directory where alembic.ini is located
|
||||
logger.info("Running database migrations...")
|
||||
migration_result = subprocess.run(
|
||||
["python", "-m", "alembic", "upgrade", "head"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=Path(__file__).resolve().parent.parent.parent,
|
||||
)
|
||||
|
||||
if migration_result.returncode != 0:
|
||||
migration_output = migration_result.stderr + migration_result.stdout
|
||||
# Check for the expected UserAlreadyExists error (which is not critical)
|
||||
if (
|
||||
"UserAlreadyExists" in migration_output
|
||||
or "User default_user@example.com already exists" in migration_output
|
||||
):
|
||||
logger.warning("Warning: Default user already exists, continuing startup...")
|
||||
else:
|
||||
logger.error(f"Migration failed with unexpected error: {migration_output}")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info("Database migrations done.")
|
||||
|
||||
logger.info(f"Starting MCP server with transport: {args.transport}")
|
||||
if args.transport == "stdio":
|
||||
|
|
|
|||
6231
cognee-mcp/uv.lock
generated
6231
cognee-mcp/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,34 +1,3 @@
|
|||
# Cognee Docker Compose Configuration
|
||||
#
|
||||
# This docker-compose file includes the main Cognee API server and optional services:
|
||||
#
|
||||
# BASIC USAGE:
|
||||
# Start main Cognee API server:
|
||||
# docker-compose up cognee
|
||||
#
|
||||
# MCP SERVER USAGE:
|
||||
# The MCP (Model Context Protocol) server enables IDE integration with tools like Cursor, Claude Desktop, etc.
|
||||
#
|
||||
# Start with MCP server (stdio transport - recommended):
|
||||
# docker-compose --profile mcp up
|
||||
#
|
||||
# Start with MCP server (SSE transport for HTTP access):
|
||||
# TRANSPORT_MODE=sse docker-compose --profile mcp up
|
||||
#
|
||||
# PORT CONFIGURATION:
|
||||
# - Main Cognee API: http://localhost:8000
|
||||
# - MCP Server (SSE mode): http://localhost:8001
|
||||
# - Frontend (UI): http://localhost:3000 (with --profile ui)
|
||||
|
||||
#
|
||||
# DEBUGGING:
|
||||
# Enable debug mode by setting DEBUG=true in your .env file or:
|
||||
# DEBUG=true docker-compose --profile mcp up
|
||||
#
|
||||
# This exposes debugger ports:
|
||||
# - Main API debugger: localhost:5678
|
||||
# - MCP Server debugger: localhost:5679
|
||||
|
||||
services:
|
||||
cognee:
|
||||
container_name: cognee
|
||||
|
|
@ -69,15 +38,13 @@ services:
|
|||
dockerfile: cognee-mcp/Dockerfile
|
||||
volumes:
|
||||
- .env:/app/.env
|
||||
# Optional: Mount local data for ingestion
|
||||
- ./examples/data:/app/data:ro
|
||||
environment:
|
||||
- DEBUG=false # Change to true if debugging
|
||||
- ENVIRONMENT=local
|
||||
- LOG_LEVEL=INFO
|
||||
- TRANSPORT_MODE=stdio # Use 'sse' for Server-Sent Events over HTTP
|
||||
- TRANSPORT_MODE=sse
|
||||
# Database configuration - should match the main cognee service
|
||||
- DB_TYPE=${DB_TYPE:-sqlite}
|
||||
- DB_PROVIDER=${DB_PROVIDER:-sqlite}
|
||||
- DB_HOST=${DB_HOST:-host.docker.internal}
|
||||
- DB_PORT=${DB_PORT:-5432}
|
||||
- DB_NAME=${DB_NAME:-cognee_db}
|
||||
|
|
@ -89,11 +56,8 @@ services:
|
|||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
ports:
|
||||
# Only expose ports when using SSE transport
|
||||
- "8001:8000" # MCP SSE port (mapped to avoid conflict with main API)
|
||||
- "5679:5678" # MCP debugger port (different from main service)
|
||||
depends_on:
|
||||
- cognee
|
||||
- "8000:8000" # MCP port
|
||||
- "5678:5678" # MCP debugger port
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue