Gpt 5 default (#849)

* gpt-5-mini and gpt-5-nano default * bump version * remove unused imports * linter * update * disable neptune errors while we get a fixture in place * update pyright * revert non-structured completions * fix typo
2025-08-21 12:10:57 -04:00 · 2025-08-21 12:10:57 -04:00 · 1edcbaa9e9
commit 1edcbaa9e9
parent ef56dc779a
6 changed files with 2051 additions and 2033 deletions
--- a/graphiti_core/llm_client/config.py
+++ b/graphiti_core/llm_client/config.py
@ -17,7 +17,7 @@ limitations under the License.
 from enum import Enum

 DEFAULT_MAX_TOKENS = 8192
-DEFAULT_TEMPERATURE = 0
+DEFAULT_TEMPERATURE = 1


 class ModelSize(Enum):
--- a/graphiti_core/llm_client/openai_base_client.py
+++ b/graphiti_core/llm_client/openai_base_client.py
@ -31,8 +31,10 @@ from .errors import RateLimitError, RefusalError

 logger = logging.getLogger(__name__)

-DEFAULT_MODEL = 'gpt-4.1-mini'
-DEFAULT_SMALL_MODEL = 'gpt-4.1-nano'
+DEFAULT_MODEL = 'gpt-5-mini'
+DEFAULT_SMALL_MODEL = 'gpt-5-nano'
+DEFAULT_REASONING = 'minimal'
+DEFAULT_VERBOSITY = 'low'


 class BaseOpenAIClient(LLMClient):
@ -51,6 +53,8 @@ class BaseOpenAIClient(LLMClient):
        config: LLMConfig | None = None,
        cache: bool = False,
        max_tokens: int = DEFAULT_MAX_TOKENS,
+        reasoning: str | None = DEFAULT_REASONING,
+        verbosity: str | None = DEFAULT_VERBOSITY,
    ):
        if cache:
            raise NotImplementedError('Caching is not implemented for OpenAI-based clients')
@ -60,6 +64,8 @@ class BaseOpenAIClient(LLMClient):

        super().__init__(config, cache)
        self.max_tokens = max_tokens
+        self.reasoning = reasoning
+        self.verbosity = verbosity

    @abstractmethod
    async def _create_completion(
@ -81,6 +87,8 @@ class BaseOpenAIClient(LLMClient):
        temperature: float | None,
        max_tokens: int,
        response_model: type[BaseModel],
+        reasoning: str | None,
+        verbosity: str | None,
    ) -> Any:
        """Create a structured completion using the specific client implementation."""
        pass
@ -140,6 +148,8 @@ class BaseOpenAIClient(LLMClient):
                    temperature=self.temperature,
                    max_tokens=max_tokens or self.max_tokens,
                    response_model=response_model,
+                    reasoning=self.reasoning,
+                    verbosity=self.verbosity,
                )
                return self._handle_structured_response(response)
            else:
--- a/graphiti_core/llm_client/openai_client.py
+++ b/graphiti_core/llm_client/openai_client.py
@ -21,7 +21,7 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel

 from .config import DEFAULT_MAX_TOKENS, LLMConfig
-from .openai_base_client import BaseOpenAIClient
+from .openai_base_client import DEFAULT_REASONING, DEFAULT_VERBOSITY, BaseOpenAIClient


 class OpenAIClient(BaseOpenAIClient):
@ -41,6 +41,8 @@ class OpenAIClient(BaseOpenAIClient):
        cache: bool = False,
        client: typing.Any = None,
        max_tokens: int = DEFAULT_MAX_TOKENS,
+        reasoning: str = DEFAULT_REASONING,
+        verbosity: str = DEFAULT_VERBOSITY,
    ):
        """
        Initialize the OpenAIClient with the provided configuration, cache setting, and client.
@ -50,7 +52,7 @@ class OpenAIClient(BaseOpenAIClient):
            cache (bool): Whether to use caching for responses. Defaults to False.
            client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
        """
-        super().__init__(config, cache, max_tokens)
+        super().__init__(config, cache, max_tokens, reasoning, verbosity)

        if config is None:
            config = LLMConfig()
@ -67,6 +69,8 @@ class OpenAIClient(BaseOpenAIClient):
        temperature: float | None,
        max_tokens: int,
        response_model: type[BaseModel],
+        reasoning: str | None = None,
+        verbosity: str | None = None,
    ):
        """Create a structured completion using OpenAI's beta parse API."""
        response = await self.client.responses.parse(
@ -75,6 +79,8 @@ class OpenAIClient(BaseOpenAIClient):
            temperature=temperature,
            max_output_tokens=max_tokens,
            text_format=response_model,  # type: ignore
+            reasoning={'effort': reasoning} if reasoning is not None else None,  # type: ignore
+            text={'verbosity': verbosity} if verbosity is not None else None,  # type: ignore
        )

        return response
@ -86,6 +92,8 @@ class OpenAIClient(BaseOpenAIClient):
        temperature: float | None,
        max_tokens: int,
        response_model: type[BaseModel] | None = None,
+        reasoning: str | None = None,
+        verbosity: str | None = None,
    ):
        """Create a regular completion with JSON format."""
        return await self.client.chat.completions.create(
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "graphiti-core"
 description = "A temporal graph building library"
-version = "0.18.9"
+version = "0.19.0"
 authors = [
    { name = "Paul Paliychuk", email = "paul@getzep.com" },
    { name = "Preston Rasmussen", email = "preston@getzep.com" },
@ -34,7 +34,7 @@ voyageai = ["voyageai>=0.2.3"]
 sentence-transformers = ["sentence-transformers>=3.2.1"]
 neptune = ["langchain-aws>=0.2.29", "opensearch-py>=3.0.0", "boto3>=1.39.16"]
 dev = [
-    "pyright>=1.1.380",
+    "pyright>=1.1.404",
    "groq>=0.2.0",
    "anthropic>=0.49.0",
    "google-genai>=1.8.0",
--- a/tests/helpers_test.py
+++ b/tests/helpers_test.py
@ -48,7 +48,7 @@ if os.getenv('DISABLE_NEPTUNE') is None:
    try:
        from graphiti_core.driver.neptune_driver import NeptuneDriver

-        HAS_NEPTUNE = True
+        HAS_NEPTUNE = False
    except ImportError:
        pass

--- a/uv.lock
+++ b/uv.lock