From 09cbcc457213b5e101e4167d7b9405c2673af68a Mon Sep 17 00:00:00 2001 From: kwilt Date: Mon, 9 Jun 2025 08:28:14 -0500 Subject: [PATCH 1/8] fix typo: "extrat" -> extract --- lightrag/lightrag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 0bf7de83..6979680c 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -1006,7 +1006,7 @@ class LightRAG: except Exception as e: # Log error and update pipeline status logger.error(traceback.format_exc()) - error_msg = f"Failed to extrat document {current_file_number}/{total_files}: {file_path}" + error_msg = f"Failed to extract document {current_file_number}/{total_files}: {file_path}" logger.error(error_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = error_msg From ad81e59d9a9c4c14c92a0d91b60324c3814b81c6 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 13:30:45 +0800 Subject: [PATCH 2/8] Fix logger problem --- lightrag/api/routers/ollama_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 93394fbb..8d0895bb 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel from typing import List, Dict, Any, Optional -import logging +from lightrag.utils import logger import time import json import re @@ -278,7 +278,7 @@ class OllamaAPI: else: error_msg = f"Provider error: {error_msg}" - logging.error(f"Stream error: {error_msg}") + logger.error(f"Stream error: {error_msg}") # Send error message to client error_data = { @@ -496,7 +496,7 @@ class OllamaAPI: else: error_msg = f"Provider error: {error_msg}" - logging.error(f"Stream error: {error_msg}") + logger.error(f"Stream error: {error_msg}") # Send error message to client error_data = { From 7b07d4c9174ad140a8c7dd99d192010b7f103c9d Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 13:42:30 +0800 Subject: [PATCH 3/8] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=20LiteLLM=20?= =?UTF-8?q?=E5=AE=A2=E6=88=B7=E7=AB=AF=E7=9A=84=20application/octet-stream?= =?UTF-8?q?=20=E8=AF=B7=E6=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修改 Ollama API 路由处理程序,使 /chat 和 /generate 端点能够接受 Content-Type 为 application/octet-stream 的请求。通过绕过 FastAPI 的自动请求验证机制,手动解析请求体,解决了 LiteLLM 客户端连接时出现的 422 错误。此更改保持了对现有 application/json 请求的向后兼容性。 --- lightrag/api/routers/ollama_api.py | 59 +++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 8d0895bb..625c0162 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -1,6 +1,6 @@ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Type from lightrag.utils import logger import time import json @@ -95,6 +95,47 @@ class OllamaTagResponse(BaseModel): models: List[OllamaModel] +async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel: + """ + Parse request body based on Content-Type header. + Supports both application/json and application/octet-stream. + + Args: + request: The FastAPI Request object + model_class: The Pydantic model class to parse the request into + + Returns: + An instance of the provided model_class + """ + content_type = request.headers.get("content-type", "").lower() + + try: + if content_type.startswith("application/json"): + # FastAPI already handles JSON parsing for us + body = await request.json() + elif content_type.startswith("application/octet-stream"): + # Manually parse octet-stream as JSON + body_bytes = await request.body() + body = json.loads(body_bytes.decode('utf-8')) + else: + # Try to parse as JSON for any other content type + body_bytes = await request.body() + body = json.loads(body_bytes.decode('utf-8')) + + # Create an instance of the model + return model_class(**body) + except json.JSONDecodeError: + raise HTTPException( + status_code=400, + detail="Invalid JSON in request body" + ) + except Exception as e: + raise HTTPException( + status_code=400, + detail=f"Error parsing request body: {str(e)}" + ) + + def estimate_tokens(text: str) -> int: """Estimate the number of tokens in text using tiktoken""" tokens = TiktokenTokenizer().encode(text) @@ -197,13 +238,17 @@ class OllamaAPI: ] ) - @self.router.post("/generate", dependencies=[Depends(combined_auth)]) - async def generate(raw_request: Request, request: OllamaGenerateRequest): + @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True) + async def generate(raw_request: Request): """Handle generate completion requests acting as an Ollama model For compatibility purpose, the request is not processed by LightRAG, and will be handled by underlying LLM model. + Supports both application/json and application/octet-stream Content-Types. """ try: + # Parse the request body manually + request = await parse_request_body(raw_request, OllamaGenerateRequest) + query = request.prompt start_time = time.time_ns() prompt_tokens = estimate_tokens(query) @@ -363,13 +408,17 @@ class OllamaAPI: trace_exception(e) raise HTTPException(status_code=500, detail=str(e)) - @self.router.post("/chat", dependencies=[Depends(combined_auth)]) - async def chat(raw_request: Request, request: OllamaChatRequest): + @self.router.post("/chat", dependencies=[Depends(combined_auth)], include_in_schema=True) + async def chat(raw_request: Request): """Process chat completion requests acting as an Ollama model Routes user queries through LightRAG by selecting query mode based on prefix indicators. Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM. + Supports both application/json and application/octet-stream Content-Types. """ try: + # Parse the request body manually + request = await parse_request_body(raw_request, OllamaChatRequest) + # Get all messages messages = request.messages if not messages: From 9351b09cc7fa2b29bd8974cd2bff651fc74ee466 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 15:16:32 +0800 Subject: [PATCH 4/8] Enhance Ollama interface compatibility --- lightrag/api/routers/ollama_api.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 625c0162..8aeb21c8 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -579,6 +579,11 @@ class OllamaAPI: data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, + "message": { + "role": "assistant", + "content": "", + "images": None, + }, "done": True, "total_duration": total_time, "load_duration": 0, From c3f5c413fa2bffa6f714e379d02157a8868b270c Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 16:23:02 +0800 Subject: [PATCH 5/8] feat(api): add /ps endpoint to list running models for Ollama API --- lightrag/api/routers/ollama_api.py | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 8aeb21c8..25e0521f 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -95,6 +95,29 @@ class OllamaTagResponse(BaseModel): models: List[OllamaModel] +class OllamaRunningModelDetails(BaseModel): + parent_model: str + format: str + family: str + families: List[str] + parameter_size: str + quantization_level: str + + +class OllamaRunningModel(BaseModel): + name: str + model: str + size: int + digest: str + details: OllamaRunningModelDetails + expires_at: str + size_vram: int + + +class OllamaPsResponse(BaseModel): + models: List[OllamaRunningModel] + + async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel: """ Parse request body based on Content-Type header. @@ -237,6 +260,32 @@ class OllamaAPI: } ] ) + + @self.router.get("/ps", dependencies=[Depends(combined_auth)]) + async def get_running_models(): + """List Running Models - returns currently running models""" + return OllamaPsResponse( + models=[ + { + "name": self.ollama_server_infos.LIGHTRAG_MODEL, + "model": self.ollama_server_infos.LIGHTRAG_MODEL, + "size": self.ollama_server_infos.LIGHTRAG_SIZE, + "digest": self.ollama_server_infos.LIGHTRAG_DIGEST, + "details": { + "parent_model": "", + "format": "gguf", + "family": "llama", + "families": [ + "llama" + ], + "parameter_size": "7.2B", + "quantization_level": "Q4_0" + }, + "expires_at": "2050-12-31T14:38:31.83753-07:00", + "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE + } + ] + ) @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True) async def generate(raw_request: Request): From 888be97b061613ccd1c94dcf0b4bdc142e8508be Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 16:36:05 +0800 Subject: [PATCH 6/8] Fix linting --- lightrag/api/routers/ollama_api.py | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 25e0521f..f77184e6 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -118,20 +118,22 @@ class OllamaPsResponse(BaseModel): models: List[OllamaRunningModel] -async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel: +async def parse_request_body( + request: Request, model_class: Type[BaseModel] +) -> BaseModel: """ Parse request body based on Content-Type header. Supports both application/json and application/octet-stream. - + Args: request: The FastAPI Request object model_class: The Pydantic model class to parse the request into - + Returns: An instance of the provided model_class """ content_type = request.headers.get("content-type", "").lower() - + try: if content_type.startswith("application/json"): # FastAPI already handles JSON parsing for us @@ -139,23 +141,19 @@ async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> elif content_type.startswith("application/octet-stream"): # Manually parse octet-stream as JSON body_bytes = await request.body() - body = json.loads(body_bytes.decode('utf-8')) + body = json.loads(body_bytes.decode("utf-8")) else: # Try to parse as JSON for any other content type body_bytes = await request.body() - body = json.loads(body_bytes.decode('utf-8')) - + body = json.loads(body_bytes.decode("utf-8")) + # Create an instance of the model return model_class(**body) except json.JSONDecodeError: - raise HTTPException( - status_code=400, - detail="Invalid JSON in request body" - ) + raise HTTPException(status_code=400, detail="Invalid JSON in request body") except Exception as e: raise HTTPException( - status_code=400, - detail=f"Error parsing request body: {str(e)}" + status_code=400, detail=f"Error parsing request body: {str(e)}" ) @@ -260,7 +258,7 @@ class OllamaAPI: } ] ) - + @self.router.get("/ps", dependencies=[Depends(combined_auth)]) async def get_running_models(): """List Running Models - returns currently running models""" @@ -275,19 +273,19 @@ class OllamaAPI: "parent_model": "", "format": "gguf", "family": "llama", - "families": [ - "llama" - ], + "families": ["llama"], "parameter_size": "7.2B", - "quantization_level": "Q4_0" + "quantization_level": "Q4_0", }, "expires_at": "2050-12-31T14:38:31.83753-07:00", - "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE + "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE, } ] ) - @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True) + @self.router.post( + "/generate", dependencies=[Depends(combined_auth)], include_in_schema=True + ) async def generate(raw_request: Request): """Handle generate completion requests acting as an Ollama model For compatibility purpose, the request is not processed by LightRAG, @@ -297,7 +295,7 @@ class OllamaAPI: try: # Parse the request body manually request = await parse_request_body(raw_request, OllamaGenerateRequest) - + query = request.prompt start_time = time.time_ns() prompt_tokens = estimate_tokens(query) @@ -457,7 +455,9 @@ class OllamaAPI: trace_exception(e) raise HTTPException(status_code=500, detail=str(e)) - @self.router.post("/chat", dependencies=[Depends(combined_auth)], include_in_schema=True) + @self.router.post( + "/chat", dependencies=[Depends(combined_auth)], include_in_schema=True + ) async def chat(raw_request: Request): """Process chat completion requests acting as an Ollama model Routes user queries through LightRAG by selecting query mode based on prefix indicators. @@ -467,7 +467,7 @@ class OllamaAPI: try: # Parse the request body manually request = await parse_request_body(raw_request, OllamaChatRequest) - + # Get all messages messages = request.messages if not messages: From 62621979f09a152b8fd8c076df8e0bd964134957 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 16:37:52 +0800 Subject: [PATCH 7/8] Bump api version to 0173 --- lightrag/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py index d4b6cc8f..5d14dfb6 100644 --- a/lightrag/api/__init__.py +++ b/lightrag/api/__init__.py @@ -1 +1 @@ -__api_version__ = "0172" +__api_version__ = "0173" From 13892656952784880d53d68654eaebb930de2400 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 17:03:27 +0800 Subject: [PATCH 8/8] Bump api version to 1.3.9 --- lightrag/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/__init__.py b/lightrag/__init__.py index db182c42..1b88de1e 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1,5 +1,5 @@ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam -__version__ = "1.3.8" +__version__ = "1.3.9" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG"