Use self.max_tokens when max_token isnt specified (#382)
* Fix: use self.max_tokens when max_token isnt specified * Fix: use self.max_tokens in OpenAI clients * Fix: use self.max_tokens in Anthropic client * Fix: use self.max_tokens in Gemini client
This commit is contained in:
parent
c36652eb75
commit
17c177e91a
5 changed files with 20 additions and 5 deletions
|
|
@ -262,7 +262,7 @@ class AnthropicClient(LLMClient):
|
|||
self,
|
||||
messages: list[Message],
|
||||
response_model: type[BaseModel] | None = None,
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS,
|
||||
max_tokens: int | None = None,
|
||||
) -> dict[str, typing.Any]:
|
||||
"""
|
||||
Generate a response from the LLM.
|
||||
|
|
@ -280,6 +280,9 @@ class AnthropicClient(LLMClient):
|
|||
RefusalError: If the LLM refuses to respond.
|
||||
Exception: If an error occurs during the generation process.
|
||||
"""
|
||||
if max_tokens is None:
|
||||
max_tokens = self.max_tokens
|
||||
|
||||
retry_count = 0
|
||||
max_retries = 2
|
||||
last_error: Exception | None = None
|
||||
|
|
|
|||
|
|
@ -127,8 +127,11 @@ class LLMClient(ABC):
|
|||
self,
|
||||
messages: list[Message],
|
||||
response_model: type[BaseModel] | None = None,
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS,
|
||||
max_tokens: int | None = None,
|
||||
) -> dict[str, typing.Any]:
|
||||
if max_tokens is None:
|
||||
max_tokens = self.max_tokens
|
||||
|
||||
if response_model is not None:
|
||||
serialized_model = json.dumps(response_model.model_json_schema())
|
||||
messages[
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ class GeminiClient(LLMClient):
|
|||
self,
|
||||
messages: list[Message],
|
||||
response_model: type[BaseModel] | None = None,
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS,
|
||||
max_tokens: int | None = None,
|
||||
) -> dict[str, typing.Any]:
|
||||
"""
|
||||
Generate a response from the Gemini language model.
|
||||
|
|
@ -180,6 +180,9 @@ class GeminiClient(LLMClient):
|
|||
Returns:
|
||||
dict[str, typing.Any]: The response from the language model.
|
||||
"""
|
||||
if max_tokens is None:
|
||||
max_tokens = self.max_tokens
|
||||
|
||||
# Call the internal _generate_response method
|
||||
return await self._generate_response(
|
||||
messages=messages, response_model=response_model, max_tokens=max_tokens
|
||||
|
|
|
|||
|
|
@ -131,8 +131,11 @@ class OpenAIClient(LLMClient):
|
|||
self,
|
||||
messages: list[Message],
|
||||
response_model: type[BaseModel] | None = None,
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS,
|
||||
max_tokens: int | None = None,
|
||||
) -> dict[str, typing.Any]:
|
||||
if max_tokens is None:
|
||||
max_tokens = self.max_tokens
|
||||
|
||||
retry_count = 0
|
||||
last_error = None
|
||||
|
||||
|
|
|
|||
|
|
@ -117,8 +117,11 @@ class OpenAIGenericClient(LLMClient):
|
|||
self,
|
||||
messages: list[Message],
|
||||
response_model: type[BaseModel] | None = None,
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS,
|
||||
max_tokens: int | None = None,
|
||||
) -> dict[str, typing.Any]:
|
||||
if max_tokens is None:
|
||||
max_tokens = self.max_tokens
|
||||
|
||||
retry_count = 0
|
||||
last_error = None
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue