Use OpenAI structured output API for response validation

Replace prompt-based schema injection with native json_schema response format. This improves token efficiency and reliability by having OpenAI enforce the schema directly instead of embedding it in the prompt message. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-09 20:51:46 -08:00 · 2025-11-09 20:51:46 -08:00 · f8a17ce435
commit f8a17ce435
parent d4a92772ec
1 changed files with 14 additions and 9 deletions
--- a/graphiti_core/llm_client/openai_generic_client.py
+++ b/graphiti_core/llm_client/openai_generic_client.py
@ -99,12 +99,25 @@ class OpenAIGenericClient(LLMClient):
            elif m.role == 'system':
                openai_messages.append({'role': 'system', 'content': m.content})
        try:
+            # Prepare response format
+            response_format = {'type': 'json_object'}
+            if response_model is not None:
+                schema_name = getattr(response_model, '__name__', 'structured_response')
+                json_schema = response_model.model_json_schema()
+                response_format = {
+                    'type': 'json_schema',
+                    'json_schema': {
+                        'name': schema_name,
+                        'schema': json_schema,
+                    },
+                }
+
            response = await self.client.chat.completions.create(
                model=self.model or DEFAULT_MODEL,
                messages=openai_messages,
                temperature=self.temperature,
                max_tokens=self.max_tokens,
-                response_format={'type': 'json_object'},
+                response_format=response_format,
            )
            result = response.choices[0].message.content or ''
            return json.loads(result)
@ -126,14 +139,6 @@ class OpenAIGenericClient(LLMClient):
        if max_tokens is None:
            max_tokens = self.max_tokens

-        if response_model is not None:
-            serialized_model = json.dumps(response_model.model_json_schema())
-            messages[
-                -1
-            ].content += (
-                f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
-            )
-
        # Add multilingual extraction instructions
        messages[0].content += get_extraction_language_instruction(group_id)