From d7290df5bedd7a3c601452350aec1849d3841743 Mon Sep 17 00:00:00 2001 From: Brandt Weary Date: Fri, 19 Sep 2025 16:20:05 -0700 Subject: [PATCH] feat(mcp): add configurable similarity thresholds Allows users to tune search quality for their domain via environment variables: - GRAPHITI_MIN_SIMILARITY_SCORE: Controls semantic search filtering (default 0.6) - GRAPHITI_RERANKER_MIN_SCORE: Controls post-RRF filtering (default 0.0) This enables users to optimize search precision/recall for their specific use case. For example, technical documentation benefits from higher thresholds (0.8) to filter noise. --- mcp_server/README.md | 2 ++ mcp_server/graphiti_mcp_server.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/mcp_server/README.md b/mcp_server/README.md index d957feb8..ef995bc3 100644 --- a/mcp_server/README.md +++ b/mcp_server/README.md @@ -101,6 +101,8 @@ The server uses the following environment variables: - `AZURE_OPENAI_EMBEDDING_API_VERSION`: Optional Azure OpenAI API version - `AZURE_OPENAI_USE_MANAGED_IDENTITY`: Optional use Azure Managed Identities for authentication - `SEMAPHORE_LIMIT`: Episode processing concurrency. See [Concurrency and LLM Provider 429 Rate Limit Errors](#concurrency-and-llm-provider-429-rate-limit-errors) +- `GRAPHITI_MIN_SIMILARITY_SCORE`: Minimum similarity score for semantic search (default: `0.6`, range: 0.0-1.0). Higher values filter more aggressively. +- `GRAPHITI_RERANKER_MIN_SCORE`: Minimum score after RRF fusion (default: `0.0`). Typically left at 0.0 as RRF is rank-based. You can set these variables in a `.env` file in the project directory. diff --git a/mcp_server/graphiti_mcp_server.py b/mcp_server/graphiti_mcp_server.py index 9b382074..a8d7ffc5 100644 --- a/mcp_server/graphiti_mcp_server.py +++ b/mcp_server/graphiti_mcp_server.py @@ -864,6 +864,17 @@ async def search_memory_nodes( search_config = NODE_HYBRID_SEARCH_RRF.model_copy(deep=True) search_config.limit = max_nodes + # Apply configurable similarity thresholds from environment + # sim_min_score filters during initial semantic search (recommended to tune for your domain) + sim_min_score = float(os.getenv('GRAPHITI_MIN_SIMILARITY_SCORE', '0.6')) + # reranker_min_score filters after RRF fusion (recommended to keep at 0.0 per industry consensus, + # as RRF is rank-based and doesn't require score thresholds) + reranker_min_score = float(os.getenv('GRAPHITI_RERANKER_MIN_SCORE', '0.0')) + + if search_config.node_config: + search_config.node_config.sim_min_score = sim_min_score + search_config.reranker_min_score = reranker_min_score + filters = SearchFilters() if entity != '': filters.node_labels = [entity]