From 998e60baf865aa73d71db7bfca0f4141b942bca7 Mon Sep 17 00:00:00 2001
From: Galleons2029 <galleons777@gmail.com>
Date: Thu, 6 Nov 2025 16:11:08 +0800
Subject: [PATCH] feat(openai): add JSON-schema fallback for structured outputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prefer responses.parse for structured parsing; on clear non-support
(404/NotFound/AttributeError or error mentioning “responses”), fall back
to chat.completions.create with response_format: {type: "json_schema"}
Build JSON Schema from Pydantic v2 (model_json_schema) with v1
(schema) fallback
Preserve reasoning-model temperature behavior (gpt-5/o1/o3) in both
primary and fallback paths
Normalize provider output to a JSON string and wrap with a minimal
response exposing .output_text
Update imports and minor lint fixes
Motivation: Improve compatibility with OpenAI-compatible providers that lack
/v1/responses while keeping the native OpenAI path unchanged.

Notes: No breaking changes; existing tests pass.
---
 graphiti_core/llm_client/openai_client.py | 85 +++++++++++++++++++----
 1 file changed, 72 insertions(+), 13 deletions(-)

diff --git a/graphiti_core/llm_client/openai_client.py b/graphiti_core/llm_client/openai_client.py
index a586f9f0..30f5e722 100644
--- a/graphiti_core/llm_client/openai_client.py
+++ b/graphiti_core/llm_client/openai_client.py
@@ -15,7 +15,8 @@ limitations under the License.
 """
 
 import typing
-
+import json
+import openai
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
@@ -72,21 +73,79 @@ class OpenAIClient(BaseOpenAIClient):
         reasoning: str | None = None,
         verbosity: str | None = None,
     ):
-        """Create a structured completion using OpenAI's beta parse API."""
-        # Reasoning models (gpt-5 family) don't support temperature
+        """Create a structured completion.
+
+        Prefer the Responses API with beta parse when available; otherwise fall back to
+        Chat Completions with JSON schema (json mode) compatible with providers like SiliconFlow.
+        """
+        # Reasoning models (gpt-5/o1/o3 family) often don't support temperature
         is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
 
-        response = await self.client.responses.parse(
-            model=model,
-            input=messages,  # type: ignore
-            temperature=temperature if not is_reasoning_model else None,
-            max_output_tokens=max_tokens,
-            text_format=response_model,  # type: ignore
-            reasoning={'effort': reasoning} if reasoning is not None else None,  # type: ignore
-            text={'verbosity': verbosity} if verbosity is not None else None,  # type: ignore
-        )
+        try:
+            # Primary path: use OpenAI Responses API with structured parsing
+            response = await self.client.responses.parse(
+                model=model,
+                input=messages,  # type: ignore
+                temperature=temperature if not is_reasoning_model else None,
+                max_output_tokens=max_tokens,
+                text_format=response_model,  # type: ignore
+                reasoning={'effort': reasoning} if reasoning is not None else None,  # type: ignore
+                text={'verbosity': verbosity} if verbosity is not None else None,  # type: ignore
+            )
+            return response
+        except Exception as e:
+            # Fallback path: use chat.completions with JSON schema when /v1/responses isn't supported
+            # Only fall back for clear non-support cases (e.g., 404 NotFound) or attribute issues
+            should_fallback = isinstance(e, (openai.NotFoundError, AttributeError))
+            if not should_fallback:
+                # Some SDKs may wrap errors differently; be permissive if message hints 404/unknown endpoint
+                msg = str(e).lower()
+                if '404' in msg or 'not found' in msg or 'responses' in msg:
+                    should_fallback = True
 
-        return response
+            if not should_fallback:
+                raise
+
+            # Build JSON schema from the Pydantic model (Pydantic v2 preferred)
+            try:
+                json_schema = response_model.model_json_schema()
+            except Exception:
+                # Pydantic v1 compatibility
+                json_schema = response_model.schema()  # type: ignore[attr-defined]
+
+            # Some providers require a schema name; use model class name by default
+            schema_name = getattr(response_model, '__name__', 'structured_response')
+
+            print(f'Falling back to chat.completions with JSON schema for model {model}...')
+            completion = await self.client.chat.completions.create(
+                model=model,
+                messages=messages,
+                temperature=temperature if not is_reasoning_model else None,
+                max_tokens=max_tokens,
+                response_format={
+                    'type': 'json_schema',
+                    'json_schema': {
+                        'name': schema_name,
+                        'schema': json_schema,
+                    },
+                },
+            )
+
+            content = completion.choices[0].message.content if completion.choices else None
+            output_text = content if content is not None else '{}'
+
+            # Ensure return a JSON string; serialize dict-like outputs defensively
+            if not isinstance(output_text, str):
+                try:
+                    output_text = json.dumps(output_text)
+                except Exception:
+                    output_text = '{}'
+
+            class _SimpleResponse:
+                def __init__(self, text: str):
+                    self.output_text = text
+
+            return _SimpleResponse(output_text)
 
     async def _create_completion(
         self,