From 7b07d4c9174ad140a8c7dd99d192010b7f103c9d Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 11 Jun 2025 13:42:30 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=20LiteLLM=20?= =?UTF-8?q?=E5=AE=A2=E6=88=B7=E7=AB=AF=E7=9A=84=20application/octet-stream?= =?UTF-8?q?=20=E8=AF=B7=E6=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修改 Ollama API 路由处理程序,使 /chat 和 /generate 端点能够接受 Content-Type 为 application/octet-stream 的请求。通过绕过 FastAPI 的自动请求验证机制,手动解析请求体,解决了 LiteLLM 客户端连接时出现的 422 错误。此更改保持了对现有 application/json 请求的向后兼容性。 --- lightrag/api/routers/ollama_api.py | 59 +++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 8d0895bb..625c0162 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -1,6 +1,6 @@ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Type from lightrag.utils import logger import time import json @@ -95,6 +95,47 @@ class OllamaTagResponse(BaseModel): models: List[OllamaModel] +async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel: + """ + Parse request body based on Content-Type header. + Supports both application/json and application/octet-stream. + + Args: + request: The FastAPI Request object + model_class: The Pydantic model class to parse the request into + + Returns: + An instance of the provided model_class + """ + content_type = request.headers.get("content-type", "").lower() + + try: + if content_type.startswith("application/json"): + # FastAPI already handles JSON parsing for us + body = await request.json() + elif content_type.startswith("application/octet-stream"): + # Manually parse octet-stream as JSON + body_bytes = await request.body() + body = json.loads(body_bytes.decode('utf-8')) + else: + # Try to parse as JSON for any other content type + body_bytes = await request.body() + body = json.loads(body_bytes.decode('utf-8')) + + # Create an instance of the model + return model_class(**body) + except json.JSONDecodeError: + raise HTTPException( + status_code=400, + detail="Invalid JSON in request body" + ) + except Exception as e: + raise HTTPException( + status_code=400, + detail=f"Error parsing request body: {str(e)}" + ) + + def estimate_tokens(text: str) -> int: """Estimate the number of tokens in text using tiktoken""" tokens = TiktokenTokenizer().encode(text) @@ -197,13 +238,17 @@ class OllamaAPI: ] ) - @self.router.post("/generate", dependencies=[Depends(combined_auth)]) - async def generate(raw_request: Request, request: OllamaGenerateRequest): + @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True) + async def generate(raw_request: Request): """Handle generate completion requests acting as an Ollama model For compatibility purpose, the request is not processed by LightRAG, and will be handled by underlying LLM model. + Supports both application/json and application/octet-stream Content-Types. """ try: + # Parse the request body manually + request = await parse_request_body(raw_request, OllamaGenerateRequest) + query = request.prompt start_time = time.time_ns() prompt_tokens = estimate_tokens(query) @@ -363,13 +408,17 @@ class OllamaAPI: trace_exception(e) raise HTTPException(status_code=500, detail=str(e)) - @self.router.post("/chat", dependencies=[Depends(combined_auth)]) - async def chat(raw_request: Request, request: OllamaChatRequest): + @self.router.post("/chat", dependencies=[Depends(combined_auth)], include_in_schema=True) + async def chat(raw_request: Request): """Process chat completion requests acting as an Ollama model Routes user queries through LightRAG by selecting query mode based on prefix indicators. Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM. + Supports both application/json and application/octet-stream Content-Types. """ try: + # Parse the request body manually + request = await parse_request_body(raw_request, OllamaChatRequest) + # Get all messages messages = request.messages if not messages: