From 2be658435a21de9ca3cd6ca2ed789f44d927b145 Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Mon, 3 Nov 2025 17:33:32 -0800
Subject: [PATCH] Add dynamic max_tokens configuration for Anthropic models

Implements model-specific max output token limits for AnthropicClient,
following the same pattern as GeminiClient. This replaces the previous
hardcoded min() cap that was preventing models from using their full
output capacity.

Changes:
- Added ANTHROPIC_MODEL_MAX_TOKENS mapping with limits for all supported
  Claude models (ranging from 4K to 65K tokens)
- Implemented _get_max_tokens_for_model() to lookup model-specific limits
- Implemented _resolve_max_tokens() with clear precedence rules:
  1. Explicit max_tokens parameter
  2. Instance max_tokens from initialization
  3. Model-specific limit from mapping
  4. Default fallback (8192 tokens)

This allows edge_operations.py to request 16384 tokens for edge extraction
without being artificially capped, while ensuring cheaper models with lower
limits are still properly handled.

Resolves TODO in anthropic_client.py:207-208.
---
 graphiti_core/llm_client/anthropic_client.py | 73 ++++++++++++++++++--
 1 file changed, 67 insertions(+), 6 deletions(-)

diff --git a/graphiti_core/llm_client/anthropic_client.py b/graphiti_core/llm_client/anthropic_client.py
index 1f2916b3..b757011a 100644
--- a/graphiti_core/llm_client/anthropic_client.py
+++ b/graphiti_core/llm_client/anthropic_client.py
@@ -64,6 +64,31 @@ AnthropicModel = Literal[
 
 DEFAULT_MODEL: AnthropicModel = 'claude-3-7-sonnet-latest'
 
+# Maximum output tokens for different Anthropic models
+# Based on official Anthropic documentation (as of 2025)
+ANTHROPIC_MODEL_MAX_TOKENS = {
+    # Claude 3.7 models - 64K tokens (128K with beta header)
+    'claude-3-7-sonnet-latest': 65536,
+    'claude-3-7-sonnet-20250219': 65536,
+    # Claude 3.5 models
+    'claude-3-5-haiku-latest': 8192,
+    'claude-3-5-haiku-20241022': 8192,
+    'claude-3-5-sonnet-latest': 8192,
+    'claude-3-5-sonnet-20241022': 8192,
+    'claude-3-5-sonnet-20240620': 8192,
+    # Claude 3 models - 4K tokens
+    'claude-3-opus-latest': 4096,
+    'claude-3-opus-20240229': 4096,
+    'claude-3-sonnet-20240229': 4096,
+    'claude-3-haiku-20240307': 4096,
+    # Claude 2 models - 4K tokens
+    'claude-2.1': 4096,
+    'claude-2.0': 4096,
+}
+
+# Default max tokens for models not in the mapping
+DEFAULT_ANTHROPIC_MAX_TOKENS = 8192
+
 
 class AnthropicClient(LLMClient):
     """
@@ -177,6 +202,45 @@ class AnthropicClient(LLMClient):
         tool_choice_cast = typing.cast(ToolChoiceParam, tool_choice)
         return tool_list_cast, tool_choice_cast
 
+    def _get_max_tokens_for_model(self, model: str) -> int:
+        """Get the maximum output tokens for a specific Anthropic model.
+
+        Args:
+            model: The model name to look up
+
+        Returns:
+            int: The maximum output tokens for the model
+        """
+        return ANTHROPIC_MODEL_MAX_TOKENS.get(model, DEFAULT_ANTHROPIC_MAX_TOKENS)
+
+    def _resolve_max_tokens(self, requested_max_tokens: int | None, model: str) -> int:
+        """
+        Resolve the maximum output tokens to use based on precedence rules.
+
+        Precedence order (highest to lowest):
+        1. Explicit max_tokens parameter passed to generate_response()
+        2. Instance max_tokens set during client initialization
+        3. Model-specific maximum tokens from ANTHROPIC_MODEL_MAX_TOKENS mapping
+        4. DEFAULT_ANTHROPIC_MAX_TOKENS as final fallback
+
+        Args:
+            requested_max_tokens: The max_tokens parameter passed to generate_response()
+            model: The model name to look up model-specific limits
+
+        Returns:
+            int: The resolved maximum tokens to use
+        """
+        # 1. Use explicit parameter if provided
+        if requested_max_tokens is not None:
+            return requested_max_tokens
+
+        # 2. Use instance max_tokens if set during initialization
+        if self.max_tokens is not None:
+            return self.max_tokens
+
+        # 3. Use model-specific maximum or return DEFAULT_ANTHROPIC_MAX_TOKENS
+        return self._get_max_tokens_for_model(model)
+
     async def _generate_response(
         self,
         messages: list[Message],
@@ -204,12 +268,9 @@ class AnthropicClient(LLMClient):
         user_messages = [{'role': m.role, 'content': m.content} for m in messages[1:]]
         user_messages_cast = typing.cast(list[MessageParam], user_messages)
 
-        # TODO: Replace hacky min finding solution after fixing hardcoded EXTRACT_EDGES_MAX_TOKENS = 16384 in
-        # edge_operations.py. Throws errors with cheaper models that lower max_tokens.
-        max_creation_tokens: int = min(
-            max_tokens if max_tokens is not None else self.config.max_tokens,
-            DEFAULT_MAX_TOKENS,
-        )
+        # Resolve max_tokens dynamically based on the model's capabilities
+        # This allows different models to use their full output capacity
+        max_creation_tokens: int = self._resolve_max_tokens(max_tokens, self.model)
 
         try:
             # Create the appropriate tool based on whether response_model is provided