From 9f69c5bf85a6b48fea683eb7c1b0dce5ae03a000 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Fri, 21 Nov 2025 12:46:31 +0800
Subject: [PATCH] feat: Support structured output `parsed` from OpenAI

Added support for structured output (JSON mode) from the OpenAI API in `openai.py` and `azure_openai.py`.

When `response_format` is used to request structured data, the new logic checks for the `message.parsed` attribute. If it exists, it's serialized into a JSON string as the final content. If not, the code falls back to the existing `message.content` handling, ensuring backward compatibility.
---
 lightrag/llm/azure_openai.py | 17 ++++++--
 lightrag/llm/openai.py       | 77 ++++++++++++++++++++----------------
 2 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py
index c183c3a9..c67bae10 100644
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -113,9 +113,20 @@ async def azure_openai_complete_if_cache(
 
         return inner()
     else:
-        content = response.choices[0].message.content
-        if r"\u" in content:
-            content = safe_unicode_decode(content.encode("utf-8"))
+        message = response.choices[0].message
+        
+        # Handle parsed responses (structured output via response_format)
+        # When using beta.chat.completions.parse(), the response is in message.parsed
+        if hasattr(message, "parsed") and message.parsed is not None:
+            # Serialize the parsed structured response to JSON
+            content = message.parsed.model_dump_json()
+            logger.debug("Using parsed structured response from API")
+        else:
+            # Handle regular content responses
+            content = message.content
+            if content and r"\u" in content:
+                content = safe_unicode_decode(content.encode("utf-8"))
+        
         return content
 
 
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 948ae270..cea85b04 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -453,46 +453,55 @@ async def openai_complete_if_cache(
                 raise InvalidResponseError("Invalid response from OpenAI API")
 
             message = response.choices[0].message
-            content = getattr(message, "content", None)
-            reasoning_content = getattr(message, "reasoning_content", "")
+            
+            # Handle parsed responses (structured output via response_format)
+            # When using beta.chat.completions.parse(), the response is in message.parsed
+            if hasattr(message, "parsed") and message.parsed is not None:
+                # Serialize the parsed structured response to JSON
+                final_content = message.parsed.model_dump_json()
+                logger.debug("Using parsed structured response from API")
+            else:
+                # Handle regular content responses
+                content = getattr(message, "content", None)
+                reasoning_content = getattr(message, "reasoning_content", "")
 
-            # Handle COT logic for non-streaming responses (only if enabled)
-            final_content = ""
+                # Handle COT logic for non-streaming responses (only if enabled)
+                final_content = ""
 
-            if enable_cot:
-                # Check if we should include reasoning content
-                should_include_reasoning = False
-                if reasoning_content and reasoning_content.strip():
-                    if not content or content.strip() == "":
-                        # Case 1: Only reasoning content, should include COT
-                        should_include_reasoning = True
-                        final_content = (
-                            content or ""
-                        )  # Use empty string if content is None
+                if enable_cot:
+                    # Check if we should include reasoning content
+                    should_include_reasoning = False
+                    if reasoning_content and reasoning_content.strip():
+                        if not content or content.strip() == "":
+                            # Case 1: Only reasoning content, should include COT
+                            should_include_reasoning = True
+                            final_content = (
+                                content or ""
+                            )  # Use empty string if content is None
+                        else:
+                            # Case 3: Both content and reasoning_content present, ignore reasoning
+                            should_include_reasoning = False
+                            final_content = content
                     else:
-                        # Case 3: Both content and reasoning_content present, ignore reasoning
-                        should_include_reasoning = False
-                        final_content = content
+                        # No reasoning content, use regular content
+                        final_content = content or ""
+
+                    # Apply COT wrapping if needed
+                    if should_include_reasoning:
+                        if r"\u" in reasoning_content:
+                            reasoning_content = safe_unicode_decode(
+                                reasoning_content.encode("utf-8")
+                            )
+                        final_content = f"<think>{reasoning_content}</think>{final_content}"
                 else:
-                    # No reasoning content, use regular content
+                    # COT disabled, only use regular content
                     final_content = content or ""
 
-                # Apply COT wrapping if needed
-                if should_include_reasoning:
-                    if r"\u" in reasoning_content:
-                        reasoning_content = safe_unicode_decode(
-                            reasoning_content.encode("utf-8")
-                        )
-                    final_content = f"<think>{reasoning_content}</think>{final_content}"
-            else:
-                # COT disabled, only use regular content
-                final_content = content or ""
-
-            # Validate final content
-            if not final_content or final_content.strip() == "":
-                logger.error("Received empty content from OpenAI API")
-                await openai_async_client.close()  # Ensure client is closed
-                raise InvalidResponseError("Received empty content from OpenAI API")
+                # Validate final content
+                if not final_content or final_content.strip() == "":
+                    logger.error("Received empty content from OpenAI API")
+                    await openai_async_client.close()  # Ensure client is closed
+                    raise InvalidResponseError("Received empty content from OpenAI API")
 
             # Apply Unicode decoding to final content if needed
             if r"\u" in final_content: