feat(openai): add JSON-schema fallback for structured outputs
Prefer responses.parse for structured parsing; on clear non-support
(404/NotFound/AttributeError or error mentioning “responses”), fall back
to chat.completions.create with response_format: {type: "json_schema"}
Build JSON Schema from Pydantic v2 (model_json_schema) with v1
(schema) fallback
Preserve reasoning-model temperature behavior (gpt-5/o1/o3) in both
primary and fallback paths
Normalize provider output to a JSON string and wrap with a minimal
response exposing .output_text
Update imports and minor lint fixes
Motivation: Improve compatibility with OpenAI-compatible providers that lack
/v1/responses while keeping the native OpenAI path unchanged.
Notes: No breaking changes; existing tests pass.
This commit is contained in:
parent
299b581bbe
commit
998e60baf8
1 changed files with 72 additions and 13 deletions
|
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||
"""
|
||||
|
||||
import typing
|
||||
|
||||
import json
|
||||
import openai
|
||||
from openai import AsyncOpenAI
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -72,21 +73,79 @@ class OpenAIClient(BaseOpenAIClient):
|
|||
reasoning: str | None = None,
|
||||
verbosity: str | None = None,
|
||||
):
|
||||
"""Create a structured completion using OpenAI's beta parse API."""
|
||||
# Reasoning models (gpt-5 family) don't support temperature
|
||||
"""Create a structured completion.
|
||||
|
||||
Prefer the Responses API with beta parse when available; otherwise fall back to
|
||||
Chat Completions with JSON schema (json mode) compatible with providers like SiliconFlow.
|
||||
"""
|
||||
# Reasoning models (gpt-5/o1/o3 family) often don't support temperature
|
||||
is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
|
||||
|
||||
response = await self.client.responses.parse(
|
||||
model=model,
|
||||
input=messages, # type: ignore
|
||||
temperature=temperature if not is_reasoning_model else None,
|
||||
max_output_tokens=max_tokens,
|
||||
text_format=response_model, # type: ignore
|
||||
reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore
|
||||
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore
|
||||
)
|
||||
try:
|
||||
# Primary path: use OpenAI Responses API with structured parsing
|
||||
response = await self.client.responses.parse(
|
||||
model=model,
|
||||
input=messages, # type: ignore
|
||||
temperature=temperature if not is_reasoning_model else None,
|
||||
max_output_tokens=max_tokens,
|
||||
text_format=response_model, # type: ignore
|
||||
reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore
|
||||
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
# Fallback path: use chat.completions with JSON schema when /v1/responses isn't supported
|
||||
# Only fall back for clear non-support cases (e.g., 404 NotFound) or attribute issues
|
||||
should_fallback = isinstance(e, (openai.NotFoundError, AttributeError))
|
||||
if not should_fallback:
|
||||
# Some SDKs may wrap errors differently; be permissive if message hints 404/unknown endpoint
|
||||
msg = str(e).lower()
|
||||
if '404' in msg or 'not found' in msg or 'responses' in msg:
|
||||
should_fallback = True
|
||||
|
||||
return response
|
||||
if not should_fallback:
|
||||
raise
|
||||
|
||||
# Build JSON schema from the Pydantic model (Pydantic v2 preferred)
|
||||
try:
|
||||
json_schema = response_model.model_json_schema()
|
||||
except Exception:
|
||||
# Pydantic v1 compatibility
|
||||
json_schema = response_model.schema() # type: ignore[attr-defined]
|
||||
|
||||
# Some providers require a schema name; use model class name by default
|
||||
schema_name = getattr(response_model, '__name__', 'structured_response')
|
||||
|
||||
print(f'Falling back to chat.completions with JSON schema for model {model}...')
|
||||
completion = await self.client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature if not is_reasoning_model else None,
|
||||
max_tokens=max_tokens,
|
||||
response_format={
|
||||
'type': 'json_schema',
|
||||
'json_schema': {
|
||||
'name': schema_name,
|
||||
'schema': json_schema,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
content = completion.choices[0].message.content if completion.choices else None
|
||||
output_text = content if content is not None else '{}'
|
||||
|
||||
# Ensure return a JSON string; serialize dict-like outputs defensively
|
||||
if not isinstance(output_text, str):
|
||||
try:
|
||||
output_text = json.dumps(output_text)
|
||||
except Exception:
|
||||
output_text = '{}'
|
||||
|
||||
class _SimpleResponse:
|
||||
def __init__(self, text: str):
|
||||
self.output_text = text
|
||||
|
||||
return _SimpleResponse(output_text)
|
||||
|
||||
async def _create_completion(
|
||||
self,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue