From 82a0f8cc1fe5ddea189850cbd7ce60957539cfb1 Mon Sep 17 00:00:00 2001 From: hzywhite <1569582518@qq.com> Date: Thu, 4 Sep 2025 10:57:41 +0800 Subject: [PATCH] merge --- lightrag/api/lightrag_server.py | 19 +++++++++++-------- lightrag/api/routers/document_routes.py | 19 ++++++++++++++----- lightrag/lightrag.py | 16 +++++++++++----- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 7268208a..8fbc869e 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -543,6 +543,9 @@ def create_app(args): }, ollama_server_infos=ollama_server_infos, ) + except Exception as e: + logger.error(f"Failed to initialize LightRAG: {e}") + raise # Initialize RAGAnything with comprehensive error handling rag_anything = None @@ -648,33 +651,33 @@ def create_app(args): RAGManager.set_rag(rag_anything) raganything_enabled = True logger.info( - "✅ The RAGAnything feature has been successfully enabled, supporting multimodal document processing functionality" + "The RAGAnything feature has been successfully enabled, supporting multimodal document processing functionality" ) except ImportError as e: raganything_error_message = ( f"RAGAnything dependency package not installed: {str(e)}" ) - logger.warning(f"⚠️ {raganything_error_message}") + logger.warning(f"{raganything_error_message}") logger.info( - "💡 Please run 'pip install raganything' to install dependency packages to enable multimodal document processing functionality" + "Please run 'pip install raganything' to install dependency packages to enable multimodal document processing functionality" ) except ValueError as e: raganything_error_message = f"RAGAnything configuration error: {str(e)}" - logger.warning(f"⚠️ {raganything_error_message}") + logger.warning(f"{raganything_error_message}") logger.info( - "💡 Please check if the environment variables LLM-BINDING_API_KEY and LLM-BINDING_HOST are set correctly" + "Please check if the environment variables LLM-BINDING_API_KEY and LLM-BINDING_HOST are set correctly" ) except Exception as e: raganything_error_message = f"RAGAnything initialization failed: {str(e)}" - logger.error(f"❌ {raganything_error_message}") + logger.error(f" {raganything_error_message}") logger.info( - "💡 The system will run in basic mode and only support standard document processing functions" + "The system will run in basic mode and only support standard document processing functions" ) if not raganything_enabled: logger.info( - "🔄 The system has been downgraded to basic mode, but LightRAG core functions are still available" + "The system has been downgraded to basic mode, but LightRAG core functions are still available" ) # Add routes diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 632e2f38..c25bac50 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -5,7 +5,6 @@ This module contains all document-related routes for the LightRAG API. import asyncio import json import uuid - from lightrag.utils import logger, get_pinyin_sort_key import aiofiles import shutil @@ -464,7 +463,7 @@ class DocStatusResponse(BaseModel): "id": "doc_123456", "content_summary": "Research paper on machine learning", "scheme_name": "lightrag", - "multimodal_content": None, + "multimodal_content": [], "content_length": 15240, "status": "PROCESSED", "created_at": "2025-03-31T12:34:56", @@ -499,7 +498,7 @@ class DocsStatusesResponse(BaseModel): "id": "doc_123", "content_summary": "Pending document", "scheme_name": "lightrag", - "multimodal_content": None, + "multimodal_content": [], "content_length": 5000, "status": "PENDING", "created_at": "2025-03-31T10:00:00", @@ -516,7 +515,7 @@ class DocsStatusesResponse(BaseModel): "id": "doc_456", "content_summary": "Processed document", "scheme_name": "lightrag", - "multimodal_content": None, + "multimodal_content": [], "content_length": 8000, "status": "PROCESSED", "created_at": "2025-03-31T09:00:00", @@ -878,6 +877,8 @@ async def pipeline_enqueue_file( rag: LightRAG instance file_path: Path to the saved file track_id: Optional tracking ID, if not provided will be generated + scheme_name (str, optional): Processing scheme name for categorization. + Defaults to None Returns: tuple: (success: bool, track_id: str) """ @@ -1346,6 +1347,8 @@ async def pipeline_index_file( rag: LightRAG instance file_path: Path to the saved file track_id: Optional tracking ID + scheme_name (str, optional): Processing scheme name for categorization. + Defaults to None """ try: success, returned_track_id = await pipeline_enqueue_file( @@ -1368,6 +1371,8 @@ async def pipeline_index_files( rag: LightRAG instance file_paths: Paths to the files to index track_id: Optional tracking ID to pass to all files + scheme_name (str, optional): Processing scheme name for categorization. + Defaults to None """ if not file_paths: return @@ -1482,8 +1487,11 @@ async def run_scanning_process( Args: rag: LightRAG instance + rag_anythingL: RAGAnything instance doc_manager: DocumentManager instance track_id: Optional tracking ID to pass to all scanned files + scheme_name (str, optional): Processing scheme name for categorization. + Defaults to None """ try: new_files = doc_manager.scan_directory_for_new_files() @@ -2017,7 +2025,7 @@ def create_document_routes( Args: background_tasks: FastAPI BackgroundTasks for async processing - file (UploadFile): The file to be uploaded. It must have an allowed extension. + file (UploadFile): The file to be uploaded. It must have an allowed extension schemeId (str): ID of the processing scheme to use for this file. The scheme determines whether to use LightRAG or RAGAnything framework for processing. @@ -2544,6 +2552,7 @@ def create_document_routes( DocStatusResponse( id=doc_id, content_summary=doc_status.content_summary, + multimodal_content=doc_status.multimodal_content, content_length=doc_status.content_length, status=doc_status.status, created_at=format_datetime(doc_status.created_at), diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index b4842924..57c2714e 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -881,6 +881,8 @@ class LightRAG: Args: input: Single document string or list of document strings + multimodal_content (list[dict[str, Any]] | list[list[dict[str, Any]]] | None, optional): + Multimodal content (images, tables, equations) associated with documents split_by_character: if split_by_character is not None, split the string by character, if chunk longer than chunk_token_size, it will be split again by token size. split_by_character_only: if split_by_character_only is True, split the string by character only, when @@ -888,6 +890,7 @@ class LightRAG: ids: single string of the document ID or list of unique document IDs, if not provided, MD5 hash IDs will be generated file_paths: single string of the file path or list of file paths, used for citation track_id: tracking ID for monitoring processing status, if not provided, will be generated + scheme_name (str | None, optional): Scheme name for categorizing documents Returns: str: tracking ID for monitoring processing status @@ -896,8 +899,8 @@ class LightRAG: return loop.run_until_complete( self.ainsert( input, - split_by_character, multimodal_content, + split_by_character, split_by_character_only, ids, file_paths, @@ -963,13 +966,14 @@ class LightRAG: self.move_file_to_enqueue(current_file_path) else: continue - + await self.apipeline_process_enqueue_documents( split_by_character, split_by_character_only ) return track_id + def move_file_to_enqueue(self, file_path): try: enqueued_dir = file_path.parent / "__enqueued__" @@ -1093,7 +1097,9 @@ class LightRAG: async def apipeline_enqueue_documents( self, input: str | list[str], - multimodal_content: list[dict[str, Any]] | None = None, + multimodal_content: list[dict[str, Any]] + | list[list[dict[str, Any]]] + | None = None, ids: list[str] | None = None, file_paths: str | list[str] | None = None, track_id: str | None = None, @@ -1192,8 +1198,8 @@ class LightRAG: "file_path": content_data[ "file_path" ], # Store file path in document status - "track_id": track_id, - "scheme_name": scheme_name, # Store track_id in document status + "track_id": track_id, # Store track_id in document status + "scheme_name": scheme_name, } for id_, content_data in contents.items() }