From 88ab73f6ae2fac3edb5e6add25bb8783df1f862c Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 9 Nov 2025 11:52:26 +0800 Subject: [PATCH] HotFix: Restore streaming response in OpenAI LLM The stream and timeout parameters were moved from **kwargs to explicit parameters in a previous commit, but were not being passed to the OpenAI API, causing streaming responses to fail and fall back to non-streaming mode.Fixes the issue where stream=True was being silently ignored, resulting in unexpected non-streaming behavior. --- lightrag/llm/openai.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 3339ea3a..dd84db71 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -138,7 +138,6 @@ async def openai_complete_if_cache( base_url: str | None = None, api_key: str | None = None, token_tracker: Any | None = None, - keyword_extraction: bool = False, # Will be removed from kwargs before passing to OpenAI stream: bool | None = None, timeout: int | None = None, **kwargs: Any, @@ -170,14 +169,14 @@ async def openai_complete_if_cache( api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable. token_tracker: Optional token usage tracker for monitoring API usage. enable_cot: Whether to enable Chain of Thought (COT) processing. Default is False. + stream: Whether to stream the response. Default is False. + timeout: Request timeout in seconds. Default is None. **kwargs: Additional keyword arguments to pass to the OpenAI API. Special kwargs: - openai_client_configs: Dict of configuration options for the AsyncOpenAI client. These will be passed to the client constructor but will be overridden by explicit parameters (api_key, base_url). - keyword_extraction: Will be removed from kwargs before passing to OpenAI. - - stream: Whether to stream the response. Default is False. - - timeout: Request timeout in seconds. Default is None. Returns: The completed text (with integrated COT content if available) or an async iterator @@ -228,6 +227,12 @@ async def openai_complete_if_cache( messages = kwargs.pop("messages", messages) + # Add explicit parameters back to kwargs so they're passed to OpenAI API + if stream is not None: + kwargs["stream"] = stream + if timeout is not None: + kwargs["timeout"] = timeout + try: # Don't use async with context manager, use client directly if "response_format" in kwargs: