diff --git a/graphiti_core/llm_client/anthropic_client.py b/graphiti_core/llm_client/anthropic_client.py
index efa837c8..7078fe30 100644
--- a/graphiti_core/llm_client/anthropic_client.py
+++ b/graphiti_core/llm_client/anthropic_client.py
@@ -262,7 +262,7 @@ class AnthropicClient(LLMClient):
         self,
         messages: list[Message],
         response_model: type[BaseModel] | None = None,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
     ) -> dict[str, typing.Any]:
         """
         Generate a response from the LLM.
@@ -280,6 +280,9 @@ class AnthropicClient(LLMClient):
             RefusalError: If the LLM refuses to respond.
             Exception: If an error occurs during the generation process.
         """
+        if max_tokens is None:
+            max_tokens = self.max_tokens
+
         retry_count = 0
         max_retries = 2
         last_error: Exception | None = None
diff --git a/graphiti_core/llm_client/client.py b/graphiti_core/llm_client/client.py
index aa1a0f8a..c466e3fa 100644
--- a/graphiti_core/llm_client/client.py
+++ b/graphiti_core/llm_client/client.py
@@ -127,8 +127,11 @@ class LLMClient(ABC):
         self,
         messages: list[Message],
         response_model: type[BaseModel] | None = None,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
     ) -> dict[str, typing.Any]:
+        if max_tokens is None:
+            max_tokens = self.max_tokens
+
         if response_model is not None:
             serialized_model = json.dumps(response_model.model_json_schema())
             messages[
diff --git a/graphiti_core/llm_client/gemini_client.py b/graphiti_core/llm_client/gemini_client.py
index 02186fc4..5c668e3a 100644
--- a/graphiti_core/llm_client/gemini_client.py
+++ b/graphiti_core/llm_client/gemini_client.py
@@ -166,7 +166,7 @@ class GeminiClient(LLMClient):
         self,
         messages: list[Message],
         response_model: type[BaseModel] | None = None,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
     ) -> dict[str, typing.Any]:
         """
         Generate a response from the Gemini language model.
@@ -180,6 +180,9 @@ class GeminiClient(LLMClient):
         Returns:
             dict[str, typing.Any]: The response from the language model.
         """
+        if max_tokens is None:
+            max_tokens = self.max_tokens
+
         # Call the internal _generate_response method
         return await self._generate_response(
             messages=messages, response_model=response_model, max_tokens=max_tokens
diff --git a/graphiti_core/llm_client/openai_client.py b/graphiti_core/llm_client/openai_client.py
index 6726e3d2..fb72926a 100644
--- a/graphiti_core/llm_client/openai_client.py
+++ b/graphiti_core/llm_client/openai_client.py
@@ -131,8 +131,11 @@ class OpenAIClient(LLMClient):
         self,
         messages: list[Message],
         response_model: type[BaseModel] | None = None,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
     ) -> dict[str, typing.Any]:
+        if max_tokens is None:
+            max_tokens = self.max_tokens
+
         retry_count = 0
         last_error = None
 
diff --git a/graphiti_core/llm_client/openai_generic_client.py b/graphiti_core/llm_client/openai_generic_client.py
index ba799d38..d48a56fc 100644
--- a/graphiti_core/llm_client/openai_generic_client.py
+++ b/graphiti_core/llm_client/openai_generic_client.py
@@ -117,8 +117,11 @@ class OpenAIGenericClient(LLMClient):
         self,
         messages: list[Message],
         response_model: type[BaseModel] | None = None,
-        max_tokens: int = DEFAULT_MAX_TOKENS,
+        max_tokens: int | None = None,
     ) -> dict[str, typing.Any]:
+        if max_tokens is None:
+            max_tokens = self.max_tokens
+
         retry_count = 0
         last_error = None