diff --git a/graphiti_core/llm_client/anthropic_client.py b/graphiti_core/llm_client/anthropic_client.py index efa837c8..7078fe30 100644 --- a/graphiti_core/llm_client/anthropic_client.py +++ b/graphiti_core/llm_client/anthropic_client.py @@ -262,7 +262,7 @@ class AnthropicClient(LLMClient): self, messages: list[Message], response_model: type[BaseModel] | None = None, - max_tokens: int = DEFAULT_MAX_TOKENS, + max_tokens: int | None = None, ) -> dict[str, typing.Any]: """ Generate a response from the LLM. @@ -280,6 +280,9 @@ class AnthropicClient(LLMClient): RefusalError: If the LLM refuses to respond. Exception: If an error occurs during the generation process. """ + if max_tokens is None: + max_tokens = self.max_tokens + retry_count = 0 max_retries = 2 last_error: Exception | None = None diff --git a/graphiti_core/llm_client/client.py b/graphiti_core/llm_client/client.py index aa1a0f8a..c466e3fa 100644 --- a/graphiti_core/llm_client/client.py +++ b/graphiti_core/llm_client/client.py @@ -127,8 +127,11 @@ class LLMClient(ABC): self, messages: list[Message], response_model: type[BaseModel] | None = None, - max_tokens: int = DEFAULT_MAX_TOKENS, + max_tokens: int | None = None, ) -> dict[str, typing.Any]: + if max_tokens is None: + max_tokens = self.max_tokens + if response_model is not None: serialized_model = json.dumps(response_model.model_json_schema()) messages[ diff --git a/graphiti_core/llm_client/gemini_client.py b/graphiti_core/llm_client/gemini_client.py index 02186fc4..5c668e3a 100644 --- a/graphiti_core/llm_client/gemini_client.py +++ b/graphiti_core/llm_client/gemini_client.py @@ -166,7 +166,7 @@ class GeminiClient(LLMClient): self, messages: list[Message], response_model: type[BaseModel] | None = None, - max_tokens: int = DEFAULT_MAX_TOKENS, + max_tokens: int | None = None, ) -> dict[str, typing.Any]: """ Generate a response from the Gemini language model. @@ -180,6 +180,9 @@ class GeminiClient(LLMClient): Returns: dict[str, typing.Any]: The response from the language model. """ + if max_tokens is None: + max_tokens = self.max_tokens + # Call the internal _generate_response method return await self._generate_response( messages=messages, response_model=response_model, max_tokens=max_tokens diff --git a/graphiti_core/llm_client/openai_client.py b/graphiti_core/llm_client/openai_client.py index 6726e3d2..fb72926a 100644 --- a/graphiti_core/llm_client/openai_client.py +++ b/graphiti_core/llm_client/openai_client.py @@ -131,8 +131,11 @@ class OpenAIClient(LLMClient): self, messages: list[Message], response_model: type[BaseModel] | None = None, - max_tokens: int = DEFAULT_MAX_TOKENS, + max_tokens: int | None = None, ) -> dict[str, typing.Any]: + if max_tokens is None: + max_tokens = self.max_tokens + retry_count = 0 last_error = None diff --git a/graphiti_core/llm_client/openai_generic_client.py b/graphiti_core/llm_client/openai_generic_client.py index ba799d38..d48a56fc 100644 --- a/graphiti_core/llm_client/openai_generic_client.py +++ b/graphiti_core/llm_client/openai_generic_client.py @@ -117,8 +117,11 @@ class OpenAIGenericClient(LLMClient): self, messages: list[Message], response_model: type[BaseModel] | None = None, - max_tokens: int = DEFAULT_MAX_TOKENS, + max_tokens: int | None = None, ) -> dict[str, typing.Any]: + if max_tokens is None: + max_tokens = self.max_tokens + retry_count = 0 last_error = None