From 88ab73f6ae2fac3edb5e6add25bb8783df1f862c Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Sun, 9 Nov 2025 11:52:26 +0800
Subject: [PATCH] HotFix: Restore streaming response in OpenAI LLM

The stream and timeout parameters were moved from **kwargs to explicit
parameters in a previous commit, but were not being passed to the OpenAI
API, causing streaming responses to fail and fall back to non-streaming
mode.Fixes the issue where stream=True was being silently ignored, resulting
in unexpected non-streaming behavior.
---
 lightrag/llm/openai.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 3339ea3a..dd84db71 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -138,7 +138,6 @@ async def openai_complete_if_cache(
     base_url: str | None = None,
     api_key: str | None = None,
     token_tracker: Any | None = None,
-    keyword_extraction: bool = False,  # Will be removed from kwargs before passing to OpenAI
     stream: bool | None = None,
     timeout: int | None = None,
     **kwargs: Any,
@@ -170,14 +169,14 @@ async def openai_complete_if_cache(
         api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
         token_tracker: Optional token usage tracker for monitoring API usage.
         enable_cot: Whether to enable Chain of Thought (COT) processing. Default is False.
+        stream: Whether to stream the response. Default is False.
+        timeout: Request timeout in seconds. Default is None.
         **kwargs: Additional keyword arguments to pass to the OpenAI API.
             Special kwargs:
             - openai_client_configs: Dict of configuration options for the AsyncOpenAI client.
                 These will be passed to the client constructor but will be overridden by
                 explicit parameters (api_key, base_url).
             - keyword_extraction: Will be removed from kwargs before passing to OpenAI.
-            - stream: Whether to stream the response. Default is False.
-            - timeout: Request timeout in seconds. Default is None.
 
     Returns:
         The completed text (with integrated COT content if available) or an async iterator
@@ -228,6 +227,12 @@ async def openai_complete_if_cache(
 
     messages = kwargs.pop("messages", messages)
 
+    # Add explicit parameters back to kwargs so they're passed to OpenAI API
+    if stream is not None:
+        kwargs["stream"] = stream
+    if timeout is not None:
+        kwargs["timeout"] = timeout
+
     try:
         # Don't use async with context manager, use client directly
         if "response_format" in kwargs: