feat(openai): add JSON-schema fallback for structured outputs

Prefer responses.parse for structured parsing; on clear non-support
(404/NotFound/AttributeError or error mentioning “responses”), fall back
to chat.completions.create with response_format: {type: "json_schema"}
Build JSON Schema from Pydantic v2 (model_json_schema) with v1
(schema) fallback
Preserve reasoning-model temperature behavior (gpt-5/o1/o3) in both
primary and fallback paths
Normalize provider output to a JSON string and wrap with a minimal
response exposing .output_text
Update imports and minor lint fixes
Motivation: Improve compatibility with OpenAI-compatible providers that lack
/v1/responses while keeping the native OpenAI path unchanged.

Notes: No breaking changes; existing tests pass.
This commit is contained in:
Galleons2029 2025-11-06 16:11:08 +08:00
parent 299b581bbe
commit 998e60baf8

View file

@ -15,7 +15,8 @@ limitations under the License.
"""
import typing
import json
import openai
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
@ -72,21 +73,79 @@ class OpenAIClient(BaseOpenAIClient):
reasoning: str | None = None,
verbosity: str | None = None,
):
"""Create a structured completion using OpenAI's beta parse API."""
# Reasoning models (gpt-5 family) don't support temperature
"""Create a structured completion.
Prefer the Responses API with beta parse when available; otherwise fall back to
Chat Completions with JSON schema (json mode) compatible with providers like SiliconFlow.
"""
# Reasoning models (gpt-5/o1/o3 family) often don't support temperature
is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
response = await self.client.responses.parse(
model=model,
input=messages, # type: ignore
temperature=temperature if not is_reasoning_model else None,
max_output_tokens=max_tokens,
text_format=response_model, # type: ignore
reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore
)
try:
# Primary path: use OpenAI Responses API with structured parsing
response = await self.client.responses.parse(
model=model,
input=messages, # type: ignore
temperature=temperature if not is_reasoning_model else None,
max_output_tokens=max_tokens,
text_format=response_model, # type: ignore
reasoning={'effort': reasoning} if reasoning is not None else None, # type: ignore
text={'verbosity': verbosity} if verbosity is not None else None, # type: ignore
)
return response
except Exception as e:
# Fallback path: use chat.completions with JSON schema when /v1/responses isn't supported
# Only fall back for clear non-support cases (e.g., 404 NotFound) or attribute issues
should_fallback = isinstance(e, (openai.NotFoundError, AttributeError))
if not should_fallback:
# Some SDKs may wrap errors differently; be permissive if message hints 404/unknown endpoint
msg = str(e).lower()
if '404' in msg or 'not found' in msg or 'responses' in msg:
should_fallback = True
return response
if not should_fallback:
raise
# Build JSON schema from the Pydantic model (Pydantic v2 preferred)
try:
json_schema = response_model.model_json_schema()
except Exception:
# Pydantic v1 compatibility
json_schema = response_model.schema() # type: ignore[attr-defined]
# Some providers require a schema name; use model class name by default
schema_name = getattr(response_model, '__name__', 'structured_response')
print(f'Falling back to chat.completions with JSON schema for model {model}...')
completion = await self.client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature if not is_reasoning_model else None,
max_tokens=max_tokens,
response_format={
'type': 'json_schema',
'json_schema': {
'name': schema_name,
'schema': json_schema,
},
},
)
content = completion.choices[0].message.content if completion.choices else None
output_text = content if content is not None else '{}'
# Ensure return a JSON string; serialize dict-like outputs defensively
if not isinstance(output_text, str):
try:
output_text = json.dumps(output_text)
except Exception:
output_text = '{}'
class _SimpleResponse:
def __init__(self, text: str):
self.output_text = text
return _SimpleResponse(output_text)
async def _create_completion(
self,