allow adding thinking config to support current and future gemini models

This commit is contained in:
realugbun 2025-05-23 09:22:31 -04:00 committed by Daniel Chalef
parent 756734be01
commit 4cfd7baf36

View file

@ -51,10 +51,10 @@ class GeminiClient(LLMClient):
model (str): The model name to use for generating responses. model (str): The model name to use for generating responses.
temperature (float): The temperature to use for generating responses. temperature (float): The temperature to use for generating responses.
max_tokens (int): The maximum number of tokens to generate in a response. max_tokens (int): The maximum number of tokens to generate in a response.
thinking_budget (int): The maximum number of tokens to spend on thinking for 2.5 version models. 0 disables thinking. thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it.
Methods: Methods:
__init__(config: LLMConfig | None = None, cache: bool = False): __init__(config: LLMConfig | None = None, cache: bool = False, thinking_config: types.ThinkingConfig | None = None):
Initializes the GeminiClient with the provided configuration and cache setting. Initializes the GeminiClient with the provided configuration, cache setting, and optional thinking config.
_generate_response(messages: list[Message]) -> dict[str, typing.Any]: _generate_response(messages: list[Message]) -> dict[str, typing.Any]:
Generates a response from the language model based on the provided messages. Generates a response from the language model based on the provided messages.
@ -65,15 +65,16 @@ class GeminiClient(LLMClient):
config: LLMConfig | None = None, config: LLMConfig | None = None,
cache: bool = False, cache: bool = False,
max_tokens: int = DEFAULT_MAX_TOKENS, max_tokens: int = DEFAULT_MAX_TOKENS,
thinking_budget: int = DEFAULT_THINKING_BUDGET, thinking_config: types.ThinkingConfig | None = None,
): ):
""" """
Initialize the GeminiClient with the provided configuration and cache setting. Initialize the GeminiClient with the provided configuration, cache setting, and optional thinking config.
Args: Args:
config (LLMConfig | None): The configuration for the LLM client, including API key, model, temperature, and max tokens. config (LLMConfig | None): The configuration for the LLM client, including API key, model, temperature, and max tokens.
cache (bool): Whether to use caching for responses. Defaults to False. cache (bool): Whether to use caching for responses. Defaults to False.
thinking_budget (int): The maximum number of tokens to spend on thinking for 2.5 version models. 0 disables thinking. thinking_config (types.ThinkingConfig | None): Optional thinking configuration for models that support it.
Only use with models that support thinking (gemini-2.5+). Defaults to None.
""" """
if config is None: if config is None:
@ -87,7 +88,7 @@ class GeminiClient(LLMClient):
api_key=config.api_key, api_key=config.api_key,
) )
self.max_tokens = max_tokens self.max_tokens = max_tokens
self.thinking_budget = thinking_budget self.thinking_config = thinking_config
async def _generate_response( async def _generate_response(
self, self,
@ -157,7 +158,7 @@ class GeminiClient(LLMClient):
response_mime_type='application/json' if response_model else None, response_mime_type='application/json' if response_model else None,
response_schema=response_model if response_model else None, response_schema=response_model if response_model else None,
system_instruction=system_prompt, system_instruction=system_prompt,
thinking_config=thinking_config_arg, thinking_config=self.thinking_config,
) )
# Generate content using the simple string approach # Generate content using the simple string approach