From 756734be016ea9490585b89139c7be354d5ad938 Mon Sep 17 00:00:00 2001
From: realugbun <github.disorder751@passmail.net>
Date: Wed, 21 May 2025 22:11:19 -0400
Subject: [PATCH] add support for Gemini 2.5 model thinking budget

---
 graphiti_core/llm_client/gemini_client.py | 30 ++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/graphiti_core/llm_client/gemini_client.py b/graphiti_core/llm_client/gemini_client.py
index 107c4c60..e876d089 100644
--- a/graphiti_core/llm_client/gemini_client.py
+++ b/graphiti_core/llm_client/gemini_client.py
@@ -30,6 +30,14 @@ from .errors import RateLimitError
 logger = logging.getLogger(__name__)
 
 DEFAULT_MODEL = 'gemini-2.0-flash'
+DEFAULT_THINKING_BUDGET = 0
+
+# Gemini models that support thinking capabilities
+GEMINI_THINKING_MODELS = [
+    'gemini-2.5-pro',
+    'gemini-2.5-flash',
+    'gemini-2.5-flash-lite',
+]
 
 
 class GeminiClient(LLMClient):
@@ -43,7 +51,7 @@ class GeminiClient(LLMClient):
         model (str): The model name to use for generating responses.
         temperature (float): The temperature to use for generating responses.
         max_tokens (int): The maximum number of tokens to generate in a response.
-
+        thinking_budget (int): The maximum number of tokens to spend on thinking for 2.5 version models. 0 disables thinking.
     Methods:
         __init__(config: LLMConfig | None = None, cache: bool = False):
             Initializes the GeminiClient with the provided configuration and cache setting.
@@ -57,6 +65,7 @@ class GeminiClient(LLMClient):
         config: LLMConfig | None = None,
         cache: bool = False,
         max_tokens: int = DEFAULT_MAX_TOKENS,
+        thinking_budget: int = DEFAULT_THINKING_BUDGET,
     ):
         """
         Initialize the GeminiClient with the provided configuration and cache setting.
@@ -64,6 +73,8 @@ class GeminiClient(LLMClient):
         Args:
             config (LLMConfig | None): The configuration for the LLM client, including API key, model, temperature, and max tokens.
             cache (bool): Whether to use caching for responses. Defaults to False.
+            thinking_budget (int): The maximum number of tokens to spend on thinking for 2.5 version models. 0 disables thinking.
+
         """
         if config is None:
             config = LLMConfig()
@@ -76,6 +87,7 @@ class GeminiClient(LLMClient):
             api_key=config.api_key,
         )
         self.max_tokens = max_tokens
+        self.thinking_budget = thinking_budget
 
     async def _generate_response(
         self,
@@ -127,6 +139,17 @@ class GeminiClient(LLMClient):
                     types.Content(role=m.role, parts=[types.Part.from_text(text=m.content)])
                 )
 
+            # Determine the model to be used
+            model_to_use = self.model or DEFAULT_MODEL
+
+            # Conditionally create thinking_config for models that support thinking
+            thinking_config_arg = None
+            if model_to_use in GEMINI_THINKING_MODELS:
+                thinking_config_arg = types.ThinkingConfig(
+                    include_thoughts=False,
+                    thinking_budget=self.thinking_budget,
+                )
+
             # Create generation config
             generation_config = types.GenerateContentConfig(
                 temperature=self.temperature,
@@ -134,12 +157,13 @@ class GeminiClient(LLMClient):
                 response_mime_type='application/json' if response_model else None,
                 response_schema=response_model if response_model else None,
                 system_instruction=system_prompt,
+                thinking_config=thinking_config_arg,
             )
 
             # Generate content using the simple string approach
             response = await self.client.aio.models.generate_content(
-                model=self.model or DEFAULT_MODEL,
-                contents=gemini_messages,  # type: ignore[arg-type]  # mypy fails on broad union type
+                model=model_to_use,
+                contents=gemini_messages,
                 config=generation_config,
             )