From 4cfd7baf369dada0b1d568ce9e041ce5bc6afbbc Mon Sep 17 00:00:00 2001 From: realugbun Date: Fri, 23 May 2025 09:22:31 -0400 Subject: [PATCH] allow adding thinking config to support current and future gemini models --- graphiti_core/llm_client/gemini_client.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/graphiti_core/llm_client/gemini_client.py b/graphiti_core/llm_client/gemini_client.py index e876d089..53aed7f8 100644 --- a/graphiti_core/llm_client/gemini_client.py +++ b/graphiti_core/llm_client/gemini_client.py @@ -51,10 +51,10 @@ class GeminiClient(LLMClient): model (str): The model name to use for generating responses. temperature (float): The temperature to use for generating responses. max_tokens (int): The maximum number of tokens to generate in a response. - thinking_budget (int): The maximum number of tokens to spend on thinking for 2.5 version models. 0 disables thinking. + thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it. Methods: - __init__(config: LLMConfig | None = None, cache: bool = False): - Initializes the GeminiClient with the provided configuration and cache setting. + __init__(config: LLMConfig | None = None, cache: bool = False, thinking_config: types.ThinkingConfig | None = None): + Initializes the GeminiClient with the provided configuration, cache setting, and optional thinking config. _generate_response(messages: list[Message]) -> dict[str, typing.Any]: Generates a response from the language model based on the provided messages. @@ -65,15 +65,16 @@ class GeminiClient(LLMClient): config: LLMConfig | None = None, cache: bool = False, max_tokens: int = DEFAULT_MAX_TOKENS, - thinking_budget: int = DEFAULT_THINKING_BUDGET, + thinking_config: types.ThinkingConfig | None = None, ): """ - Initialize the GeminiClient with the provided configuration and cache setting. + Initialize the GeminiClient with the provided configuration, cache setting, and optional thinking config. Args: config (LLMConfig | None): The configuration for the LLM client, including API key, model, temperature, and max tokens. cache (bool): Whether to use caching for responses. Defaults to False. - thinking_budget (int): The maximum number of tokens to spend on thinking for 2.5 version models. 0 disables thinking. + thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it. + Only use with models that support thinking (gemini-2.5+). Defaults to None. """ if config is None: @@ -87,7 +88,7 @@ class GeminiClient(LLMClient): api_key=config.api_key, ) self.max_tokens = max_tokens - self.thinking_budget = thinking_budget + self.thinking_config = thinking_config async def _generate_response( self, @@ -157,7 +158,7 @@ class GeminiClient(LLMClient): response_mime_type='application/json' if response_model else None, response_schema=response_model if response_model else None, system_instruction=system_prompt, - thinking_config=thinking_config_arg, + thinking_config=self.thinking_config, ) # Generate content using the simple string approach