From 09cbcc457213b5e101e4167d7b9405c2673af68a Mon Sep 17 00:00:00 2001
From: kwilt <kwilt@pm.me>
Date: Mon, 9 Jun 2025 08:28:14 -0500
Subject: [PATCH 1/8] fix typo: "extrat" -> extract

---
 lightrag/lightrag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 0bf7de83..6979680c 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -1006,7 +1006,7 @@ class LightRAG:
                         except Exception as e:
                             # Log error and update pipeline status
                             logger.error(traceback.format_exc())
-                            error_msg = f"Failed to extrat document {current_file_number}/{total_files}: {file_path}"
+                            error_msg = f"Failed to extract document {current_file_number}/{total_files}: {file_path}"
                             logger.error(error_msg)
                             async with pipeline_status_lock:
                                 pipeline_status["latest_message"] = error_msg

From ad81e59d9a9c4c14c92a0d91b60324c3814b81c6 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 13:30:45 +0800
Subject: [PATCH 2/8] Fix logger problem

---
 lightrag/api/routers/ollama_api.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py
index 93394fbb..8d0895bb 100644
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -1,7 +1,7 @@
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
-import logging
+from lightrag.utils import logger
 import time
 import json
 import re
@@ -278,7 +278,7 @@ class OllamaAPI:
                                     else:
                                         error_msg = f"Provider error: {error_msg}"
 
-                                    logging.error(f"Stream error: {error_msg}")
+                                    logger.error(f"Stream error: {error_msg}")
 
                                     # Send error message to client
                                     error_data = {
@@ -496,7 +496,7 @@ class OllamaAPI:
                                     else:
                                         error_msg = f"Provider error: {error_msg}"
 
-                                    logging.error(f"Stream error: {error_msg}")
+                                    logger.error(f"Stream error: {error_msg}")
 
                                     # Send error message to client
                                     error_data = {

From 7b07d4c9174ad140a8c7dd99d192010b7f103c9d Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 13:42:30 +0800
Subject: [PATCH 3/8] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=20LiteLLM=20?=
 =?UTF-8?q?=E5=AE=A2=E6=88=B7=E7=AB=AF=E7=9A=84=20application/octet-stream?=
 =?UTF-8?q?=20=E8=AF=B7=E6=B1=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修改 Ollama API 路由处理程序，使 /chat 和 /generate 端点能够接受 Content-Type 为 application/octet-stream 的请求。通过绕过 FastAPI 的自动请求验证机制，手动解析请求体，解决了 LiteLLM 客户端连接时出现的 422 错误。此更改保持了对现有 application/json 请求的向后兼容性。
---
 lightrag/api/routers/ollama_api.py | 59 +++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py
index 8d0895bb..625c0162 100644
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -1,6 +1,6 @@
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Type
 from lightrag.utils import logger
 import time
 import json
@@ -95,6 +95,47 @@ class OllamaTagResponse(BaseModel):
     models: List[OllamaModel]
 
 
+async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
+    """
+    Parse request body based on Content-Type header.
+    Supports both application/json and application/octet-stream.
+    
+    Args:
+        request: The FastAPI Request object
+        model_class: The Pydantic model class to parse the request into
+        
+    Returns:
+        An instance of the provided model_class
+    """
+    content_type = request.headers.get("content-type", "").lower()
+    
+    try:
+        if content_type.startswith("application/json"):
+            # FastAPI already handles JSON parsing for us
+            body = await request.json()
+        elif content_type.startswith("application/octet-stream"):
+            # Manually parse octet-stream as JSON
+            body_bytes = await request.body()
+            body = json.loads(body_bytes.decode('utf-8'))
+        else:
+            # Try to parse as JSON for any other content type
+            body_bytes = await request.body()
+            body = json.loads(body_bytes.decode('utf-8'))
+        
+        # Create an instance of the model
+        return model_class(**body)
+    except json.JSONDecodeError:
+        raise HTTPException(
+            status_code=400, 
+            detail="Invalid JSON in request body"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Error parsing request body: {str(e)}"
+        )
+
+
 def estimate_tokens(text: str) -> int:
     """Estimate the number of tokens in text using tiktoken"""
     tokens = TiktokenTokenizer().encode(text)
@@ -197,13 +238,17 @@ class OllamaAPI:
                 ]
             )
 
-        @self.router.post("/generate", dependencies=[Depends(combined_auth)])
-        async def generate(raw_request: Request, request: OllamaGenerateRequest):
+        @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
+        async def generate(raw_request: Request):
             """Handle generate completion requests acting as an Ollama model
             For compatibility purpose, the request is not processed by LightRAG,
             and will be handled by underlying LLM model.
+            Supports both application/json and application/octet-stream Content-Types.
             """
             try:
+                # Parse the request body manually
+                request = await parse_request_body(raw_request, OllamaGenerateRequest)
+                
                 query = request.prompt
                 start_time = time.time_ns()
                 prompt_tokens = estimate_tokens(query)
@@ -363,13 +408,17 @@ class OllamaAPI:
                 trace_exception(e)
                 raise HTTPException(status_code=500, detail=str(e))
 
-        @self.router.post("/chat", dependencies=[Depends(combined_auth)])
-        async def chat(raw_request: Request, request: OllamaChatRequest):
+        @self.router.post("/chat", dependencies=[Depends(combined_auth)], include_in_schema=True)
+        async def chat(raw_request: Request):
             """Process chat completion requests acting as an Ollama model
             Routes user queries through LightRAG by selecting query mode based on prefix indicators.
             Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
+            Supports both application/json and application/octet-stream Content-Types.
             """
             try:
+                # Parse the request body manually
+                request = await parse_request_body(raw_request, OllamaChatRequest)
+                
                 # Get all messages
                 messages = request.messages
                 if not messages:

From 9351b09cc7fa2b29bd8974cd2bff651fc74ee466 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 15:16:32 +0800
Subject: [PATCH 4/8] Enhance Ollama interface compatibility

---
 lightrag/api/routers/ollama_api.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py
index 625c0162..8aeb21c8 100644
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -579,6 +579,11 @@ class OllamaAPI:
                                 data = {
                                     "model": self.ollama_server_infos.LIGHTRAG_MODEL,
                                     "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
+                                    "message": {
+                                        "role": "assistant",
+                                        "content": "",
+                                        "images": None,
+                                    },
                                     "done": True,
                                     "total_duration": total_time,
                                     "load_duration": 0,

From c3f5c413fa2bffa6f714e379d02157a8868b270c Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 16:23:02 +0800
Subject: [PATCH 5/8] feat(api): add /ps endpoint to list running models for
 Ollama API

---
 lightrag/api/routers/ollama_api.py | 49 ++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py
index 8aeb21c8..25e0521f 100644
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -95,6 +95,29 @@ class OllamaTagResponse(BaseModel):
     models: List[OllamaModel]
 
 
+class OllamaRunningModelDetails(BaseModel):
+    parent_model: str
+    format: str
+    family: str
+    families: List[str]
+    parameter_size: str
+    quantization_level: str
+
+
+class OllamaRunningModel(BaseModel):
+    name: str
+    model: str
+    size: int
+    digest: str
+    details: OllamaRunningModelDetails
+    expires_at: str
+    size_vram: int
+
+
+class OllamaPsResponse(BaseModel):
+    models: List[OllamaRunningModel]
+
+
 async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
     """
     Parse request body based on Content-Type header.
@@ -237,6 +260,32 @@ class OllamaAPI:
                     }
                 ]
             )
+            
+        @self.router.get("/ps", dependencies=[Depends(combined_auth)])
+        async def get_running_models():
+            """List Running Models - returns currently running models"""
+            return OllamaPsResponse(
+                models=[
+                    {
+                        "name": self.ollama_server_infos.LIGHTRAG_MODEL,
+                        "model": self.ollama_server_infos.LIGHTRAG_MODEL,
+                        "size": self.ollama_server_infos.LIGHTRAG_SIZE,
+                        "digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
+                        "details": {
+                            "parent_model": "",
+                            "format": "gguf",
+                            "family": "llama",
+                            "families": [
+                                "llama"
+                            ],
+                            "parameter_size": "7.2B",
+                            "quantization_level": "Q4_0"
+                        },
+                        "expires_at": "2050-12-31T14:38:31.83753-07:00",
+                        "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE
+                    }
+                ]
+            )
 
         @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
         async def generate(raw_request: Request):

From 888be97b061613ccd1c94dcf0b4bdc142e8508be Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 16:36:05 +0800
Subject: [PATCH 6/8] Fix linting

---
 lightrag/api/routers/ollama_api.py | 46 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py
index 25e0521f..f77184e6 100644
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -118,20 +118,22 @@ class OllamaPsResponse(BaseModel):
     models: List[OllamaRunningModel]
 
 
-async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
+async def parse_request_body(
+    request: Request, model_class: Type[BaseModel]
+) -> BaseModel:
     """
     Parse request body based on Content-Type header.
     Supports both application/json and application/octet-stream.
-    
+
     Args:
         request: The FastAPI Request object
         model_class: The Pydantic model class to parse the request into
-        
+
     Returns:
         An instance of the provided model_class
     """
     content_type = request.headers.get("content-type", "").lower()
-    
+
     try:
         if content_type.startswith("application/json"):
             # FastAPI already handles JSON parsing for us
@@ -139,23 +141,19 @@ async def parse_request_body(request: Request, model_class: Type[BaseModel]) ->
         elif content_type.startswith("application/octet-stream"):
             # Manually parse octet-stream as JSON
             body_bytes = await request.body()
-            body = json.loads(body_bytes.decode('utf-8'))
+            body = json.loads(body_bytes.decode("utf-8"))
         else:
             # Try to parse as JSON for any other content type
             body_bytes = await request.body()
-            body = json.loads(body_bytes.decode('utf-8'))
-        
+            body = json.loads(body_bytes.decode("utf-8"))
+
         # Create an instance of the model
         return model_class(**body)
     except json.JSONDecodeError:
-        raise HTTPException(
-            status_code=400, 
-            detail="Invalid JSON in request body"
-        )
+        raise HTTPException(status_code=400, detail="Invalid JSON in request body")
     except Exception as e:
         raise HTTPException(
-            status_code=400,
-            detail=f"Error parsing request body: {str(e)}"
+            status_code=400, detail=f"Error parsing request body: {str(e)}"
         )
 
 
@@ -260,7 +258,7 @@ class OllamaAPI:
                     }
                 ]
             )
-            
+
         @self.router.get("/ps", dependencies=[Depends(combined_auth)])
         async def get_running_models():
             """List Running Models - returns currently running models"""
@@ -275,19 +273,19 @@ class OllamaAPI:
                             "parent_model": "",
                             "format": "gguf",
                             "family": "llama",
-                            "families": [
-                                "llama"
-                            ],
+                            "families": ["llama"],
                             "parameter_size": "7.2B",
-                            "quantization_level": "Q4_0"
+                            "quantization_level": "Q4_0",
                         },
                         "expires_at": "2050-12-31T14:38:31.83753-07:00",
-                        "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE
+                        "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE,
                     }
                 ]
             )
 
-        @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
+        @self.router.post(
+            "/generate", dependencies=[Depends(combined_auth)], include_in_schema=True
+        )
         async def generate(raw_request: Request):
             """Handle generate completion requests acting as an Ollama model
             For compatibility purpose, the request is not processed by LightRAG,
@@ -297,7 +295,7 @@ class OllamaAPI:
             try:
                 # Parse the request body manually
                 request = await parse_request_body(raw_request, OllamaGenerateRequest)
-                
+
                 query = request.prompt
                 start_time = time.time_ns()
                 prompt_tokens = estimate_tokens(query)
@@ -457,7 +455,9 @@ class OllamaAPI:
                 trace_exception(e)
                 raise HTTPException(status_code=500, detail=str(e))
 
-        @self.router.post("/chat", dependencies=[Depends(combined_auth)], include_in_schema=True)
+        @self.router.post(
+            "/chat", dependencies=[Depends(combined_auth)], include_in_schema=True
+        )
         async def chat(raw_request: Request):
             """Process chat completion requests acting as an Ollama model
             Routes user queries through LightRAG by selecting query mode based on prefix indicators.
@@ -467,7 +467,7 @@ class OllamaAPI:
             try:
                 # Parse the request body manually
                 request = await parse_request_body(raw_request, OllamaChatRequest)
-                
+
                 # Get all messages
                 messages = request.messages
                 if not messages:

From 62621979f09a152b8fd8c076df8e0bd964134957 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 16:37:52 +0800
Subject: [PATCH 7/8] Bump api version to 0173

---
 lightrag/api/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py
index d4b6cc8f..5d14dfb6 100644
--- a/lightrag/api/__init__.py
+++ b/lightrag/api/__init__.py
@@ -1 +1 @@
-__api_version__ = "0172"
+__api_version__ = "0173"

From 13892656952784880d53d68654eaebb930de2400 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 11 Jun 2025 17:03:27 +0800
Subject: [PATCH 8/8] Bump api version to 1.3.9

---
 lightrag/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index db182c42..1b88de1e 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
 
-__version__ = "1.3.8"
+__version__ = "1.3.9"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"