From fb2ebeba501ae2d8919c1ac8f803b88231770009 Mon Sep 17 00:00:00 2001 From: Daniel Chalef <131175+danielchalef@users.noreply.github.com> Date: Sat, 30 Aug 2025 08:43:17 -0700 Subject: [PATCH] feat: Configure KuzuDB Docker deployment with persistent storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update docker-compose-kuzu.yml to use persistent volume by default - Set KUZU_DB to /data/graphiti.kuzu for persistent storage - Increase max concurrent queries from 1 to 10 for better performance - Add dedicated config-docker-kuzu.yaml for Docker deployments - Create README-kuzu.md with usage instructions and troubleshooting - Configure volume with local driver for data persistence The setup now provides: - Automatic data persistence across container restarts - No external database dependencies - Simple backup/restore procedures - Lower resource usage compared to Neo4j 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- mcp_server/config/config-docker-kuzu.yaml | 90 ++++++++++++++++ mcp_server/docker/README-kuzu.md | 122 ++++++++++++++++++++++ mcp_server/docker/docker-compose-kuzu.yml | 20 ++-- 3 files changed, 222 insertions(+), 10 deletions(-) create mode 100644 mcp_server/config/config-docker-kuzu.yaml create mode 100644 mcp_server/docker/README-kuzu.md diff --git a/mcp_server/config/config-docker-kuzu.yaml b/mcp_server/config/config-docker-kuzu.yaml new file mode 100644 index 00000000..fe405744 --- /dev/null +++ b/mcp_server/config/config-docker-kuzu.yaml @@ -0,0 +1,90 @@ +# Graphiti MCP Server Configuration for Docker with KuzuDB +# This configuration is optimized for running with docker-compose-kuzu.yml +# It uses persistent KuzuDB storage at /data/graphiti.kuzu + +server: + transport: "sse" # SSE for HTTP access from Docker + host: "0.0.0.0" + port: 8000 + +llm: + provider: "openai" # Options: openai, azure_openai, anthropic, gemini, groq + model: "gpt-4o" + temperature: 0.0 + max_tokens: 4096 + + providers: + openai: + api_key: ${OPENAI_API_KEY} + api_url: ${OPENAI_API_URL:https://api.openai.com/v1} + organization_id: ${OPENAI_ORGANIZATION_ID:} + + azure_openai: + api_key: ${AZURE_OPENAI_API_KEY} + api_url: ${AZURE_OPENAI_ENDPOINT} + api_version: ${AZURE_OPENAI_API_VERSION:2024-10-21} + deployment_name: ${AZURE_OPENAI_DEPLOYMENT} + use_azure_ad: ${USE_AZURE_AD:false} + + anthropic: + api_key: ${ANTHROPIC_API_KEY} + api_url: ${ANTHROPIC_API_URL:https://api.anthropic.com} + max_retries: 3 + + gemini: + api_key: ${GOOGLE_API_KEY} + project_id: ${GOOGLE_PROJECT_ID:} + location: ${GOOGLE_LOCATION:us-central1} + + groq: + api_key: ${GROQ_API_KEY} + api_url: ${GROQ_API_URL:https://api.groq.com/openai/v1} + +embedder: + provider: "openai" # Options: openai, azure_openai, gemini, voyage + model: "text-embedding-ada-002" + dimensions: 1536 + + providers: + openai: + api_key: ${OPENAI_API_KEY} + api_url: ${OPENAI_API_URL:https://api.openai.com/v1} + organization_id: ${OPENAI_ORGANIZATION_ID:} + + azure_openai: + api_key: ${AZURE_OPENAI_API_KEY} + api_url: ${AZURE_OPENAI_EMBEDDINGS_ENDPOINT} + api_version: ${AZURE_OPENAI_API_VERSION:2024-10-21} + deployment_name: ${AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT} + use_azure_ad: ${USE_AZURE_AD:false} + + gemini: + api_key: ${GOOGLE_API_KEY} + project_id: ${GOOGLE_PROJECT_ID:} + location: ${GOOGLE_LOCATION:us-central1} + + voyage: + api_key: ${VOYAGE_API_KEY} + api_url: ${VOYAGE_API_URL:https://api.voyageai.com/v1} + model: "voyage-3" + +database: + provider: "kuzu" # Using KuzuDB for this configuration + + providers: + kuzu: + # Use environment variable if set, otherwise use persistent storage at /data + db: ${KUZU_DB:/data/graphiti.kuzu} + max_concurrent_queries: ${KUZU_MAX_CONCURRENT_QUERIES:10} + +graphiti: + group_id: ${GRAPHITI_GROUP_ID:main} + episode_id_prefix: ${EPISODE_ID_PREFIX:} + user_id: ${USER_ID:mcp_user} + entity_types: + - name: "Requirement" + description: "Represents a requirement" + - name: "Preference" + description: "User preferences and settings" + - name: "Procedure" + description: "Standard operating procedures" \ No newline at end of file diff --git a/mcp_server/docker/README-kuzu.md b/mcp_server/docker/README-kuzu.md new file mode 100644 index 00000000..6bfd0709 --- /dev/null +++ b/mcp_server/docker/README-kuzu.md @@ -0,0 +1,122 @@ +# Running MCP Server with KuzuDB + +This guide explains how to run the Graphiti MCP server with KuzuDB as the graph database backend using Docker Compose. + +## Why KuzuDB? + +KuzuDB is an embedded graph database that provides several advantages: +- **No external dependencies**: Unlike Neo4j, KuzuDB runs embedded within the application +- **Persistent storage**: Data is stored in a single file/directory +- **High performance**: Optimized for analytical workloads +- **Low resource usage**: Minimal memory and CPU requirements + +## Quick Start + +1. **Set up environment variables**: + Create a `.env` file in the `docker` directory: + ```bash + OPENAI_API_KEY=your-api-key-here + # Optional: Override default settings + GRAPHITI_GROUP_ID=my-group + KUZU_MAX_CONCURRENT_QUERIES=20 + ``` + +2. **Start the server**: + ```bash + docker-compose -f docker-compose-kuzu.yml up + ``` + +3. **Access the MCP server**: + The server will be available at `http://localhost:8000` + +## Configuration + +### Persistent Storage + +By default, KuzuDB data is stored in a Docker volume at `/data/graphiti.kuzu`. This ensures your data persists across container restarts. + +To use a different location, set the `KUZU_DB` environment variable: +```bash +KUZU_DB=/data/my-custom-db.kuzu +``` + +### In-Memory Mode + +For testing or temporary usage, you can run KuzuDB in memory-only mode: +```bash +KUZU_DB=:memory: +``` +Note: Data will be lost when the container stops. + +### Performance Tuning + +Adjust the maximum number of concurrent queries: +```bash +KUZU_MAX_CONCURRENT_QUERIES=20 # Default is 10 +``` + +## Data Management + +### Backup + +To backup your KuzuDB data: +```bash +# Create a backup of the volume +docker run --rm -v mcp_server_kuzu_data:/data -v $(pwd):/backup alpine tar czf /backup/kuzu-backup.tar.gz -C /data . +``` + +### Restore + +To restore from a backup: +```bash +# Restore from backup +docker run --rm -v mcp_server_kuzu_data:/data -v $(pwd):/backup alpine tar xzf /backup/kuzu-backup.tar.gz -C /data +``` + +### Clear Data + +To completely clear the KuzuDB data: +```bash +# Stop the container +docker-compose -f docker-compose-kuzu.yml down + +# Remove the volume +docker volume rm docker_kuzu_data + +# Restart (will create fresh volume) +docker-compose -f docker-compose-kuzu.yml up +``` + +## Switching from Neo4j + +If you're migrating from Neo4j to KuzuDB: + +1. Export your data from Neo4j (if needed) +2. Stop the Neo4j-based setup: `docker-compose down` +3. Start the KuzuDB setup: `docker-compose -f docker-compose-kuzu.yml up` +4. Re-import your data through the MCP API + +## Troubleshooting + +### Container won't start +- Check that port 8000 is not in use: `lsof -i :8000` +- Verify your `.env` file has valid API keys + +### Data not persisting +- Ensure the volume is properly mounted: `docker volume ls` +- Check volume permissions: `docker exec graphiti-mcp ls -la /data` + +### Performance issues +- Increase `KUZU_MAX_CONCURRENT_QUERIES` for better parallelism +- Monitor container resources: `docker stats graphiti-mcp` + +## Comparison with Neo4j Setup + +| Feature | KuzuDB | Neo4j | +|---------|--------|-------| +| External Database | No | Yes | +| Memory Usage | Low (~100MB) | High (~512MB min) | +| Startup Time | Instant | 30+ seconds | +| Persistent Storage | Single file | Multiple files | +| Docker Services | 1 | 2 | +| Default Port | 8000 (MCP only) | 8000, 7474, 7687 | \ No newline at end of file diff --git a/mcp_server/docker/docker-compose-kuzu.yml b/mcp_server/docker/docker-compose-kuzu.yml index 7183659c..84ad72a7 100644 --- a/mcp_server/docker/docker-compose-kuzu.yml +++ b/mcp_server/docker/docker-compose-kuzu.yml @@ -8,9 +8,9 @@ services: - path: .env required: false # Makes the file optional. Default value is 'true' environment: - # Database configuration for KuzuDB - - KUZU_DB=${KUZU_DB:-:memory:} - - KUZU_MAX_CONCURRENT_QUERIES=${KUZU_MAX_CONCURRENT_QUERIES:-1} + # Database configuration for KuzuDB - using persistent storage + - KUZU_DB=${KUZU_DB:-/data/graphiti.kuzu} + - KUZU_MAX_CONCURRENT_QUERIES=${KUZU_MAX_CONCURRENT_QUERIES:-10} # LLM provider configurations - OPENAI_API_KEY=${OPENAI_API_KEY} - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} @@ -29,14 +29,14 @@ services: - CONFIG_PATH=/app/config/config.yaml - PATH=/root/.local/bin:${PATH} volumes: - - ./config.yaml:/app/config/config.yaml:ro - # Optional: If you want to persist KuzuDB data, uncomment the following line - # and change KUZU_DB to /data/kuzu.db - # - kuzu_data:/data + - ../config/config-docker-kuzu.yaml:/app/config/config.yaml:ro + # Persistent KuzuDB data storage + - kuzu_data:/data ports: - "8000:8000" # Expose the MCP server via HTTP for SSE transport command: ["uv", "run", "graphiti_mcp_server.py", "--transport", "sse", "--config", "/app/config/config.yaml"] -# Optional: Volume for persistent KuzuDB storage -# volumes: -# kuzu_data: \ No newline at end of file +# Volume for persistent KuzuDB storage +volumes: + kuzu_data: + driver: local \ No newline at end of file