diff --git a/lightrag/__init__.py b/lightrag/__init__.py index db182c42..1b88de1e 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1,5 +1,5 @@ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam -__version__ = "1.3.8" +__version__ = "1.3.9" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG" diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py index d4b6cc8f..5d14dfb6 100644 --- a/lightrag/api/__init__.py +++ b/lightrag/api/__init__.py @@ -1 +1 @@ -__api_version__ = "0172" +__api_version__ = "0173" diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 93394fbb..f77184e6 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel -from typing import List, Dict, Any, Optional -import logging +from typing import List, Dict, Any, Optional, Type +from lightrag.utils import logger import time import json import re @@ -95,6 +95,68 @@ class OllamaTagResponse(BaseModel): models: List[OllamaModel] +class OllamaRunningModelDetails(BaseModel): + parent_model: str + format: str + family: str + families: List[str] + parameter_size: str + quantization_level: str + + +class OllamaRunningModel(BaseModel): + name: str + model: str + size: int + digest: str + details: OllamaRunningModelDetails + expires_at: str + size_vram: int + + +class OllamaPsResponse(BaseModel): + models: List[OllamaRunningModel] + + +async def parse_request_body( + request: Request, model_class: Type[BaseModel] +) -> BaseModel: + """ + Parse request body based on Content-Type header. + Supports both application/json and application/octet-stream. + + Args: + request: The FastAPI Request object + model_class: The Pydantic model class to parse the request into + + Returns: + An instance of the provided model_class + """ + content_type = request.headers.get("content-type", "").lower() + + try: + if content_type.startswith("application/json"): + # FastAPI already handles JSON parsing for us + body = await request.json() + elif content_type.startswith("application/octet-stream"): + # Manually parse octet-stream as JSON + body_bytes = await request.body() + body = json.loads(body_bytes.decode("utf-8")) + else: + # Try to parse as JSON for any other content type + body_bytes = await request.body() + body = json.loads(body_bytes.decode("utf-8")) + + # Create an instance of the model + return model_class(**body) + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON in request body") + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Error parsing request body: {str(e)}" + ) + + def estimate_tokens(text: str) -> int: """Estimate the number of tokens in text using tiktoken""" tokens = TiktokenTokenizer().encode(text) @@ -197,13 +259,43 @@ class OllamaAPI: ] ) - @self.router.post("/generate", dependencies=[Depends(combined_auth)]) - async def generate(raw_request: Request, request: OllamaGenerateRequest): + @self.router.get("/ps", dependencies=[Depends(combined_auth)]) + async def get_running_models(): + """List Running Models - returns currently running models""" + return OllamaPsResponse( + models=[ + { + "name": self.ollama_server_infos.LIGHTRAG_MODEL, + "model": self.ollama_server_infos.LIGHTRAG_MODEL, + "size": self.ollama_server_infos.LIGHTRAG_SIZE, + "digest": self.ollama_server_infos.LIGHTRAG_DIGEST, + "details": { + "parent_model": "", + "format": "gguf", + "family": "llama", + "families": ["llama"], + "parameter_size": "7.2B", + "quantization_level": "Q4_0", + }, + "expires_at": "2050-12-31T14:38:31.83753-07:00", + "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE, + } + ] + ) + + @self.router.post( + "/generate", dependencies=[Depends(combined_auth)], include_in_schema=True + ) + async def generate(raw_request: Request): """Handle generate completion requests acting as an Ollama model For compatibility purpose, the request is not processed by LightRAG, and will be handled by underlying LLM model. + Supports both application/json and application/octet-stream Content-Types. """ try: + # Parse the request body manually + request = await parse_request_body(raw_request, OllamaGenerateRequest) + query = request.prompt start_time = time.time_ns() prompt_tokens = estimate_tokens(query) @@ -278,7 +370,7 @@ class OllamaAPI: else: error_msg = f"Provider error: {error_msg}" - logging.error(f"Stream error: {error_msg}") + logger.error(f"Stream error: {error_msg}") # Send error message to client error_data = { @@ -363,13 +455,19 @@ class OllamaAPI: trace_exception(e) raise HTTPException(status_code=500, detail=str(e)) - @self.router.post("/chat", dependencies=[Depends(combined_auth)]) - async def chat(raw_request: Request, request: OllamaChatRequest): + @self.router.post( + "/chat", dependencies=[Depends(combined_auth)], include_in_schema=True + ) + async def chat(raw_request: Request): """Process chat completion requests acting as an Ollama model Routes user queries through LightRAG by selecting query mode based on prefix indicators. Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM. + Supports both application/json and application/octet-stream Content-Types. """ try: + # Parse the request body manually + request = await parse_request_body(raw_request, OllamaChatRequest) + # Get all messages messages = request.messages if not messages: @@ -496,7 +594,7 @@ class OllamaAPI: else: error_msg = f"Provider error: {error_msg}" - logging.error(f"Stream error: {error_msg}") + logger.error(f"Stream error: {error_msg}") # Send error message to client error_data = { @@ -530,6 +628,11 @@ class OllamaAPI: data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, + "message": { + "role": "assistant", + "content": "", + "images": None, + }, "done": True, "total_duration": total_time, "load_duration": 0, diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 6d206d5c..f1c3747b 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -1007,7 +1007,7 @@ class LightRAG: except Exception as e: # Log error and update pipeline status logger.error(traceback.format_exc()) - error_msg = f"Failed to extrat document {current_file_number}/{total_files}: {file_path}" + error_msg = f"Failed to extract document {current_file_number}/{total_files}: {file_path}" logger.error(error_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = error_msg