From 29b04a08fe4db8239d74966780430241c8040d3d Mon Sep 17 00:00:00 2001 From: Daniel Chalef <131175+danielchalef@users.noreply.github.com> Date: Mon, 10 Nov 2025 22:39:45 -0800 Subject: [PATCH] Increase OpenAIGenericClient max_tokens to 16K and update docs - Set default max_tokens to 16384 (16K) for OpenAIGenericClient to better support local models - Add documentation note clarifying OpenAIGenericClient should be used for Ollama and LM Studio - Previous default was 8192 (8K) --- README.md | 2 ++ graphiti_core/llm_client/openai_generic_client.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 4deeaccf..16f36d7f 100644 --- a/README.md +++ b/README.md @@ -523,6 +523,8 @@ reranker, leveraging Gemini's log probabilities feature to rank passage relevanc Graphiti supports Ollama for running local LLMs and embedding models via Ollama's OpenAI-compatible API. This is ideal for privacy-focused applications or when you want to avoid API costs. +**Note:** Use `OpenAIGenericClient` (not `OpenAIClient`) for Ollama and other OpenAI-compatible providers like LM Studio. The `OpenAIGenericClient` is optimized for local models with a higher default max token limit (16K vs 8K) and full support for structured outputs. + Install the models: ```bash diff --git a/graphiti_core/llm_client/openai_generic_client.py b/graphiti_core/llm_client/openai_generic_client.py index 5493c55a..50ad68a3 100644 --- a/graphiti_core/llm_client/openai_generic_client.py +++ b/graphiti_core/llm_client/openai_generic_client.py @@ -77,6 +77,10 @@ class OpenAIGenericClient(LLMClient): if config is None: config = LLMConfig() + # Override max_tokens default to 16K for better compatibility with local models + if config.max_tokens == DEFAULT_MAX_TOKENS: + config.max_tokens = 16384 + super().__init__(config, cache) if client is None: