diff --git a/graphiti_core/llm_client/anthropic_client.py b/graphiti_core/llm_client/anthropic_client.py index 1f2916b3..b757011a 100644 --- a/graphiti_core/llm_client/anthropic_client.py +++ b/graphiti_core/llm_client/anthropic_client.py @@ -64,6 +64,31 @@ AnthropicModel = Literal[ DEFAULT_MODEL: AnthropicModel = 'claude-3-7-sonnet-latest' +# Maximum output tokens for different Anthropic models +# Based on official Anthropic documentation (as of 2025) +ANTHROPIC_MODEL_MAX_TOKENS = { + # Claude 3.7 models - 64K tokens (128K with beta header) + 'claude-3-7-sonnet-latest': 65536, + 'claude-3-7-sonnet-20250219': 65536, + # Claude 3.5 models + 'claude-3-5-haiku-latest': 8192, + 'claude-3-5-haiku-20241022': 8192, + 'claude-3-5-sonnet-latest': 8192, + 'claude-3-5-sonnet-20241022': 8192, + 'claude-3-5-sonnet-20240620': 8192, + # Claude 3 models - 4K tokens + 'claude-3-opus-latest': 4096, + 'claude-3-opus-20240229': 4096, + 'claude-3-sonnet-20240229': 4096, + 'claude-3-haiku-20240307': 4096, + # Claude 2 models - 4K tokens + 'claude-2.1': 4096, + 'claude-2.0': 4096, +} + +# Default max tokens for models not in the mapping +DEFAULT_ANTHROPIC_MAX_TOKENS = 8192 + class AnthropicClient(LLMClient): """ @@ -177,6 +202,45 @@ class AnthropicClient(LLMClient): tool_choice_cast = typing.cast(ToolChoiceParam, tool_choice) return tool_list_cast, tool_choice_cast + def _get_max_tokens_for_model(self, model: str) -> int: + """Get the maximum output tokens for a specific Anthropic model. + + Args: + model: The model name to look up + + Returns: + int: The maximum output tokens for the model + """ + return ANTHROPIC_MODEL_MAX_TOKENS.get(model, DEFAULT_ANTHROPIC_MAX_TOKENS) + + def _resolve_max_tokens(self, requested_max_tokens: int | None, model: str) -> int: + """ + Resolve the maximum output tokens to use based on precedence rules. + + Precedence order (highest to lowest): + 1. Explicit max_tokens parameter passed to generate_response() + 2. Instance max_tokens set during client initialization + 3. Model-specific maximum tokens from ANTHROPIC_MODEL_MAX_TOKENS mapping + 4. DEFAULT_ANTHROPIC_MAX_TOKENS as final fallback + + Args: + requested_max_tokens: The max_tokens parameter passed to generate_response() + model: The model name to look up model-specific limits + + Returns: + int: The resolved maximum tokens to use + """ + # 1. Use explicit parameter if provided + if requested_max_tokens is not None: + return requested_max_tokens + + # 2. Use instance max_tokens if set during initialization + if self.max_tokens is not None: + return self.max_tokens + + # 3. Use model-specific maximum or return DEFAULT_ANTHROPIC_MAX_TOKENS + return self._get_max_tokens_for_model(model) + async def _generate_response( self, messages: list[Message], @@ -204,12 +268,9 @@ class AnthropicClient(LLMClient): user_messages = [{'role': m.role, 'content': m.content} for m in messages[1:]] user_messages_cast = typing.cast(list[MessageParam], user_messages) - # TODO: Replace hacky min finding solution after fixing hardcoded EXTRACT_EDGES_MAX_TOKENS = 16384 in - # edge_operations.py. Throws errors with cheaper models that lower max_tokens. - max_creation_tokens: int = min( - max_tokens if max_tokens is not None else self.config.max_tokens, - DEFAULT_MAX_TOKENS, - ) + # Resolve max_tokens dynamically based on the model's capabilities + # This allows different models to use their full output capacity + max_creation_tokens: int = self._resolve_max_tokens(max_tokens, self.model) try: # Create the appropriate tool based on whether response_model is provided