Gpt 5 default (#849)

* gpt-5-mini and gpt-5-nano default

* bump version

* remove unused imports

* linter

* update

* disable neptune errors while we get a fixture in place

* update pyright

* revert non-structured completions

* fix typo
This commit is contained in:
Preston Rasmussen 2025-08-21 12:10:57 -04:00 committed by GitHub
parent ef56dc779a
commit 1edcbaa9e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 2051 additions and 2033 deletions

View file

@ -17,7 +17,7 @@ limitations under the License.
from enum import Enum
DEFAULT_MAX_TOKENS = 8192
DEFAULT_TEMPERATURE = 0
DEFAULT_TEMPERATURE = 1
class ModelSize(Enum):

View file

@ -31,8 +31,10 @@ from .errors import RateLimitError, RefusalError
logger = logging.getLogger(__name__)
DEFAULT_MODEL = 'gpt-4.1-mini'
DEFAULT_SMALL_MODEL = 'gpt-4.1-nano'
DEFAULT_MODEL = 'gpt-5-mini'
DEFAULT_SMALL_MODEL = 'gpt-5-nano'
DEFAULT_REASONING = 'minimal'
DEFAULT_VERBOSITY = 'low'
class BaseOpenAIClient(LLMClient):
@ -51,6 +53,8 @@ class BaseOpenAIClient(LLMClient):
config: LLMConfig | None = None,
cache: bool = False,
max_tokens: int = DEFAULT_MAX_TOKENS,
reasoning: str | None = DEFAULT_REASONING,
verbosity: str | None = DEFAULT_VERBOSITY,
):
if cache:
raise NotImplementedError('Caching is not implemented for OpenAI-based clients')
@ -60,6 +64,8 @@ class BaseOpenAIClient(LLMClient):
super().__init__(config, cache)
self.max_tokens = max_tokens
self.reasoning = reasoning
self.verbosity = verbosity
@abstractmethod
async def _create_completion(
@ -81,6 +87,8 @@ class BaseOpenAIClient(LLMClient):
temperature: float | None,
max_tokens: int,
response_model: type[BaseModel],
reasoning: str | None,
verbosity: str | None,
) -> Any:
"""Create a structured completion using the specific client implementation."""
pass
@ -140,6 +148,8 @@ class BaseOpenAIClient(LLMClient):
temperature=self.temperature,
max_tokens=max_tokens or self.max_tokens,
response_model=response_model,
reasoning=self.reasoning,
verbosity=self.verbosity,
)
return self._handle_structured_response(response)
else:

View file

@ -21,7 +21,7 @@ from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from .config import DEFAULT_MAX_TOKENS, LLMConfig
from .openai_base_client import BaseOpenAIClient
from .openai_base_client import DEFAULT_REASONING, DEFAULT_VERBOSITY, BaseOpenAIClient
class OpenAIClient(BaseOpenAIClient):
@ -41,6 +41,8 @@ class OpenAIClient(BaseOpenAIClient):
cache: bool = False,
client: typing.Any = None,
max_tokens: int = DEFAULT_MAX_TOKENS,
reasoning: str = DEFAULT_REASONING,
verbosity: str = DEFAULT_VERBOSITY,
):
"""
Initialize the OpenAIClient with the provided configuration, cache setting, and client.
@ -50,7 +52,7 @@ class OpenAIClient(BaseOpenAIClient):
cache (bool): Whether to use caching for responses. Defaults to False.
client (Any | None): An optional async client instance to use. If not provided, a new AsyncOpenAI client is created.
"""
super().__init__(config, cache, max_tokens)
super().__init__(config, cache, max_tokens, reasoning, verbosity)
if config is None:
config = LLMConfig()
@ -67,6 +69,8 @@ class OpenAIClient(BaseOpenAIClient):
temperature: float | None,
max_tokens: int,
response_model: type[BaseModel],
reasoning: str | None = None,
verbosity: str | None = None,
):
"""Create a structured completion using OpenAI's beta parse API."""
response = await self.client.responses.parse(
@ -75,6 +79,8 @@ class OpenAIClient(BaseOpenAIClient):
temperature=temperature,
max_output_tokens=max_tokens,
text_format=response_model, # type: ignore
reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore
)
return response
@ -86,6 +92,8 @@ class OpenAIClient(BaseOpenAIClient):
temperature: float | None,
max_tokens: int,
response_model: type[BaseModel] | None = None,
reasoning: str | None = None,
verbosity: str | None = None,
):
"""Create a regular completion with JSON format."""
return await self.client.chat.completions.create(

View file

@ -1,7 +1,7 @@
[project]
name = "graphiti-core"
description = "A temporal graph building library"
version = "0.18.9"
version = "0.19.0"
authors = [
{ name = "Paul Paliychuk", email = "paul@getzep.com" },
{ name = "Preston Rasmussen", email = "preston@getzep.com" },
@ -34,7 +34,7 @@ voyageai = ["voyageai>=0.2.3"]
sentence-transformers = ["sentence-transformers>=3.2.1"]
neptune = ["langchain-aws>=0.2.29", "opensearch-py>=3.0.0", "boto3>=1.39.16"]
dev = [
"pyright>=1.1.380",
"pyright>=1.1.404",
"groq>=0.2.0",
"anthropic>=0.49.0",
"google-genai>=1.8.0",

View file

@ -48,7 +48,7 @@ if os.getenv('DISABLE_NEPTUNE') is None:
try:
from graphiti_core.driver.neptune_driver import NeptuneDriver
HAS_NEPTUNE = True
HAS_NEPTUNE = False
except ImportError:
pass

4050
uv.lock generated

File diff suppressed because it is too large Load diff