diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 8fbc869e..8a9b5b77 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -25,6 +25,7 @@ from lightrag.api.utils_api import ( display_splash_screen, check_env_file, ) +from lightrag.llm.openai import openai_complete_if_cache, openai_embed from .config import ( global_args, update_uvicorn_mode_config, @@ -645,7 +646,7 @@ def create_app(args): embedding_func=raganything_embedding_func, ) - logger.info("Check the download status of the RAGANything parser...") + logger.info("Check the download status of the RAGAnything parser...") rag_anything.verify_parser_installation_once() RAGManager.set_rag(rag_anything) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index c25bac50..2293a0e6 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -111,17 +111,6 @@ def sanitize_filename(filename: str, input_dir: Path) -> str: return clean_name -class ScanRequest(BaseModel): - """Request model for document scanning operations. - - Attributes: - framework (str | None): Processing framework to use for scanning. - Can be "lightrag" or "raganything". If None, uses default framework. - """ - - framework: str | None = None - - class SchemeConfig(BaseModel): """Configuration model for processing schemes. @@ -135,10 +124,16 @@ class SchemeConfig(BaseModel): - "mineru": MinerU parser for comprehensive document parsing - "docling": Docling parser for office document processing - "": Default/automatic extractor selection + modelSource (Literal["huggingface", "modelscope", "local", ""]): The model source used by Mineru. + - "huggingface": Using pre-trained models from the Hugging Face model library + - "modelscope": using model resources on ModelScope platform + - "local": Use custom models deployed locally + - "":Maintain the default model source configuration of the system (usually huggingface) """ framework: Literal["lightrag", "raganything"] extractor: Literal["mineru", "docling", ""] = "" # 默认值 + modelSource: Literal["huggingface", "modelscope", "local", ""] = "" class Scheme(BaseModel): @@ -185,6 +180,12 @@ class SchemesResponse(BaseModel): data: Optional[List[Dict[str, Any]]] = Field(None, description="List of schemes") +class ScanRequest(BaseModel): + """Request model for document scanning operations. + """ + schemeConfig: SchemeConfig = Field(..., description="Scanning scheme configuration") + + class ScanResponse(BaseModel): """Response model for document scanning operation @@ -1403,8 +1404,9 @@ async def pipeline_index_files( async def pipeline_index_files_raganything( rag_anything: RAGAnything, file_paths: List[Path], - track_id: str = None, scheme_name: str = None, + parser: str = None, + source: str = None ): """Index multiple files using RAGAnything framework for multimodal processing. @@ -1414,6 +1416,10 @@ async def pipeline_index_files_raganything( track_id (str, optional): Tracking ID for batch monitoring. Defaults to None. scheme_name (str, optional): Processing scheme name for categorization. Defaults to None. + parser (str, optional): Document extraction tool to use. + Defaults to None. + source (str, optional): The model source used by Mineru. + Defaults to None. Note: - Uses RAGAnything's process_document_complete_lightrag_api method for each file @@ -1438,6 +1444,8 @@ async def pipeline_index_files_raganything( output_dir="./output", parse_method="auto", scheme_name=scheme_name, + parser=parser, + source=source ) if success: pass @@ -1481,7 +1489,7 @@ async def run_scanning_process( rag_anything: RAGAnything, doc_manager: DocumentManager, track_id: str = None, - scheme_name: str = None, + schemeConfig = None, ): """Background task to scan and index documents @@ -1490,8 +1498,8 @@ async def run_scanning_process( rag_anythingL: RAGAnything instance doc_manager: DocumentManager instance track_id: Optional tracking ID to pass to all scanned files - scheme_name (str, optional): Processing scheme name for categorization. - Defaults to None + schemeConfig: Scanning scheme configuration. + Defaults to None """ try: new_files = doc_manager.scan_directory_for_new_files() @@ -1504,6 +1512,10 @@ async def run_scanning_process( is_pipeline_scan_busy = pipeline_status.get("scan_disabled", False) is_pipeline_busy = pipeline_status.get("busy", False) + scheme_name = schemeConfig.framework + extractor = schemeConfig.extractor + modelSource = schemeConfig.modelSource + if new_files: # Process all files at once with track_id if is_pipeline_busy: @@ -1525,7 +1537,7 @@ async def run_scanning_process( ) elif scheme_name == "raganything": await pipeline_index_files_raganything( - rag_anything, new_files, track_id, scheme_name=scheme_name + rag_anything, new_files, scheme_name=scheme_name, parser=extractor, source=modelSource ) logger.info( f"Scanning process completed with raganything: {total_files} files Processed." @@ -1834,6 +1846,7 @@ def create_document_routes( "config": { "framework": schemes[0].config.framework, "extractor": schemes[0].config.extractor, + "modelSource": schemes[0].config.modelSource, }, } # 保存新方案 @@ -1842,6 +1855,7 @@ def create_document_routes( item["name"] = updated_item["name"] item["config"]["framework"] = updated_item["config"]["framework"] item["config"]["extractor"] = updated_item["config"]["extractor"] + item["config"]["modelSource"] = updated_item["config"]["modelSource"] break # 写回文件 @@ -1909,6 +1923,7 @@ def create_document_routes( "config": { "framework": scheme.config.framework, "extractor": scheme.config.extractor, + "modelSource": scheme.config.modelSource, }, } @@ -1989,7 +2004,6 @@ def create_document_routes( Returns: ScanResponse: A response object containing the scanning status and track_id """ - scheme_name = request.framework # Generate track_id with "scan" prefix for scanning operation track_id = generate_track_id("scan") @@ -2000,7 +2014,7 @@ def create_document_routes( rag_anything, doc_manager, track_id, - scheme_name=scheme_name, + schemeConfig=request.schemeConfig, ) return ScanResponse( status="scanning_started", @@ -2077,6 +2091,8 @@ def create_document_routes( config = load_config() current_framework = config.get("framework") + current_extractor = config.get("extractor") + current_modelSource = config.get("modelSource") doc_pre_id = f"doc-pre-{safe_filename}" if current_framework and current_framework == "lightrag": @@ -2095,6 +2111,8 @@ def create_document_routes( output_dir="./output", parse_method="auto", scheme_name=current_framework, + parser=current_extractor, + source=current_modelSource ) await rag.doc_status.upsert(