feat(openai): add JSON-schema fallback for structured outputs

Prefer responses.parse for structured parsing; on clear non-support
(404/NotFound/AttributeError or error mentioning “responses”), fall back
to chat.completions.create with response_format: {type: "json_schema"}
Build JSON Schema from Pydantic v2 (model_json_schema) with v1
(schema) fallback
Preserve reasoning-model temperature behavior (gpt-5/o1/o3) in both
primary and fallback paths
Normalize provider output to a JSON string and wrap with a minimal
response exposing .output_text
Update imports and minor lint fixes
Motivation: Improve compatibility with OpenAI-compatible providers that lack
/v1/responses while keeping the native OpenAI path unchanged.

Notes: No breaking changes; existing tests pass.
This commit is contained in:
Galleons2029 2025-11-06 16:11:08 +08:00
parent 299b581bbe
commit 998e60baf8

View file

@ -15,7 +15,8 @@ limitations under the License.
""" """
import typing import typing
import json
import openai
from openai import AsyncOpenAI from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel from pydantic import BaseModel
@ -72,21 +73,79 @@ class OpenAIClient(BaseOpenAIClient):
reasoning: str | None = None, reasoning: str | None = None,
verbosity: str | None = None, verbosity: str | None = None,
): ):
"""Create a structured completion using OpenAI's beta parse API.""" """Create a structured completion.
# Reasoning models (gpt-5 family) don't support temperature
Prefer the Responses API with beta parse when available; otherwise fall back to
Chat Completions with JSON schema (json mode) compatible with providers like SiliconFlow.
"""
# Reasoning models (gpt-5/o1/o3 family) often don't support temperature
is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3') is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
response = await self.client.responses.parse( try:
model=model, # Primary path: use OpenAI Responses API with structured parsing
input=messages, # type: ignore response = await self.client.responses.parse(
temperature=temperature if not is_reasoning_model else None, model=model,
max_output_tokens=max_tokens, input=messages, # type: ignore
text_format=response_model, # type: ignore temperature=temperature if not is_reasoning_model else None,
reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore max_output_tokens=max_tokens,
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore text_format=response_model, # type: ignore
) reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore
)
return response
except Exception as e:
# Fallback path: use chat.completions with JSON schema when /v1/responses isn't supported
# Only fall back for clear non-support cases (e.g., 404 NotFound) or attribute issues
should_fallback = isinstance(e, (openai.NotFoundError, AttributeError))
if not should_fallback:
# Some SDKs may wrap errors differently; be permissive if message hints 404/unknown endpoint
msg = str(e).lower()
if '404' in msg or 'not found' in msg or 'responses' in msg:
should_fallback = True
return response if not should_fallback:
raise
# Build JSON schema from the Pydantic model (Pydantic v2 preferred)
try:
json_schema = response_model.model_json_schema()
except Exception:
# Pydantic v1 compatibility
json_schema = response_model.schema() # type: ignore[attr-defined]
# Some providers require a schema name; use model class name by default
schema_name = getattr(response_model, '__name__', 'structured_response')
print(f'Falling back to chat.completions with JSON schema for model {model}...')
completion = await self.client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature if not is_reasoning_model else None,
max_tokens=max_tokens,
response_format={
'type': 'json_schema',
'json_schema': {
'name': schema_name,
'schema': json_schema,
},
},
)
content = completion.choices[0].message.content if completion.choices else None
output_text = content if content is not None else '{}'
# Ensure return a JSON string; serialize dict-like outputs defensively
if not isinstance(output_text, str):
try:
output_text = json.dumps(output_text)
except Exception:
output_text = '{}'
class _SimpleResponse:
def __init__(self, text: str):
self.output_text = text
return _SimpleResponse(output_text)
async def _create_completion( async def _create_completion(
self, self,