From 82a0f8cc1fe5ddea189850cbd7ce60957539cfb1 Mon Sep 17 00:00:00 2001
From: hzywhite <1569582518@qq.com>
Date: Thu, 4 Sep 2025 10:57:41 +0800
Subject: [PATCH] merge

---
 lightrag/api/lightrag_server.py         | 19 +++++++++++--------
 lightrag/api/routers/document_routes.py | 19 ++++++++++++++-----
 lightrag/lightrag.py                    | 16 +++++++++++-----
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 7268208a..8fbc869e 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -543,6 +543,9 @@ def create_app(args):
             },
             ollama_server_infos=ollama_server_infos,
         )
+    except Exception as e:
+        logger.error(f"Failed to initialize LightRAG: {e}")
+        raise
 
     # Initialize RAGAnything with comprehensive error handling
     rag_anything = None
@@ -648,33 +651,33 @@ def create_app(args):
         RAGManager.set_rag(rag_anything)
         raganything_enabled = True
         logger.info(
-            "✅ The RAGAnything feature has been successfully enabled, supporting multimodal document processing functionality"
+            "The RAGAnything feature has been successfully enabled, supporting multimodal document processing functionality"
         )
 
     except ImportError as e:
         raganything_error_message = (
             f"RAGAnything dependency package not installed: {str(e)}"
         )
-        logger.warning(f"⚠️  {raganything_error_message}")
+        logger.warning(f"{raganything_error_message}")
         logger.info(
-            "💡 Please run 'pip install raganything' to install dependency packages to enable multimodal document processing functionality"
+            "Please run 'pip install raganything' to install dependency packages to enable multimodal document processing functionality"
         )
     except ValueError as e:
         raganything_error_message = f"RAGAnything configuration error: {str(e)}"
-        logger.warning(f"⚠️  {raganything_error_message}")
+        logger.warning(f"{raganything_error_message}")
         logger.info(
-            "💡 Please check if the environment variables LLM-BINDING_API_KEY and LLM-BINDING_HOST are set correctly"
+            "Please check if the environment variables LLM-BINDING_API_KEY and LLM-BINDING_HOST are set correctly"
         )
     except Exception as e:
         raganything_error_message = f"RAGAnything initialization failed: {str(e)}"
-        logger.error(f"❌ {raganything_error_message}")
+        logger.error(f" {raganything_error_message}")
         logger.info(
-            "💡 The system will run in basic mode and only support standard document processing functions"
+            "The system will run in basic mode and only support standard document processing functions"
         )
 
     if not raganything_enabled:
         logger.info(
-            "🔄 The system has been downgraded to basic mode, but LightRAG core functions are still available"
+            "The system has been downgraded to basic mode, but LightRAG core functions are still available"
         )
 
     # Add routes
diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py
index 632e2f38..c25bac50 100644
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -5,7 +5,6 @@ This module contains all document-related routes for the LightRAG API.
 import asyncio
 import json
 import uuid
-
 from lightrag.utils import logger, get_pinyin_sort_key
 import aiofiles
 import shutil
@@ -464,7 +463,7 @@ class DocStatusResponse(BaseModel):
                 "id": "doc_123456",
                 "content_summary": "Research paper on machine learning",
                 "scheme_name": "lightrag",
-                "multimodal_content": None,
+                "multimodal_content": [],
                 "content_length": 15240,
                 "status": "PROCESSED",
                 "created_at": "2025-03-31T12:34:56",
@@ -499,7 +498,7 @@ class DocsStatusesResponse(BaseModel):
                             "id": "doc_123",
                             "content_summary": "Pending document",
                             "scheme_name": "lightrag",
-                            "multimodal_content": None,
+                            "multimodal_content": [],
                             "content_length": 5000,
                             "status": "PENDING",
                             "created_at": "2025-03-31T10:00:00",
@@ -516,7 +515,7 @@ class DocsStatusesResponse(BaseModel):
                             "id": "doc_456",
                             "content_summary": "Processed document",
                             "scheme_name": "lightrag",
-                            "multimodal_content": None,
+                            "multimodal_content": [],
                             "content_length": 8000,
                             "status": "PROCESSED",
                             "created_at": "2025-03-31T09:00:00",
@@ -878,6 +877,8 @@ async def pipeline_enqueue_file(
         rag: LightRAG instance
         file_path: Path to the saved file
         track_id: Optional tracking ID, if not provided will be generated
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None
     Returns:
         tuple: (success: bool, track_id: str)
     """
@@ -1346,6 +1347,8 @@ async def pipeline_index_file(
         rag: LightRAG instance
         file_path: Path to the saved file
         track_id: Optional tracking ID
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None
     """
     try:
         success, returned_track_id = await pipeline_enqueue_file(
@@ -1368,6 +1371,8 @@ async def pipeline_index_files(
         rag: LightRAG instance
         file_paths: Paths to the files to index
         track_id: Optional tracking ID to pass to all files
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None
     """
     if not file_paths:
         return
@@ -1482,8 +1487,11 @@ async def run_scanning_process(
 
     Args:
         rag: LightRAG instance
+        rag_anythingL: RAGAnything instance
         doc_manager: DocumentManager instance
         track_id: Optional tracking ID to pass to all scanned files
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None 
     """
     try:
         new_files = doc_manager.scan_directory_for_new_files()
@@ -2017,7 +2025,7 @@ def create_document_routes(
 
         Args:
             background_tasks: FastAPI BackgroundTasks for async processing
-            file (UploadFile): The file to be uploaded. It must have an allowed extension.
+            file (UploadFile): The file to be uploaded. It must have an allowed extension
             schemeId (str): ID of the processing scheme to use for this file. The scheme
                 determines whether to use LightRAG or RAGAnything framework for processing.
 
@@ -2544,6 +2552,7 @@ def create_document_routes(
                         DocStatusResponse(
                             id=doc_id,
                             content_summary=doc_status.content_summary,
+                            multimodal_content=doc_status.multimodal_content,
                             content_length=doc_status.content_length,
                             status=doc_status.status,
                             created_at=format_datetime(doc_status.created_at),
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index b4842924..57c2714e 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -881,6 +881,8 @@ class LightRAG:
 
         Args:
             input: Single document string or list of document strings
+            multimodal_content (list[dict[str, Any]] | list[list[dict[str, Any]]] | None, optional):
+                Multimodal content (images, tables, equations) associated with documents
             split_by_character: if split_by_character is not None, split the string by character, if chunk longer than
             chunk_token_size, it will be split again by token size.
             split_by_character_only: if split_by_character_only is True, split the string by character only, when
@@ -888,6 +890,7 @@ class LightRAG:
             ids: single string of the document ID or list of unique document IDs, if not provided, MD5 hash IDs will be generated
             file_paths: single string of the file path or list of file paths, used for citation
             track_id: tracking ID for monitoring processing status, if not provided, will be generated
+            scheme_name (str | None, optional): Scheme name for categorizing documents
 
         Returns:
             str: tracking ID for monitoring processing status
@@ -896,8 +899,8 @@ class LightRAG:
         return loop.run_until_complete(
             self.ainsert(
                 input,
-                split_by_character,
                 multimodal_content,
+                split_by_character,
                 split_by_character_only,
                 ids,
                 file_paths,
@@ -963,13 +966,14 @@ class LightRAG:
                 self.move_file_to_enqueue(current_file_path)
             else:
                 continue
-
+        
         await self.apipeline_process_enqueue_documents(
             split_by_character, split_by_character_only
         )
 
         return track_id
 
+
     def move_file_to_enqueue(self, file_path):
         try:
             enqueued_dir = file_path.parent / "__enqueued__"
@@ -1093,7 +1097,9 @@ class LightRAG:
     async def apipeline_enqueue_documents(
         self,
         input: str | list[str],
-        multimodal_content: list[dict[str, Any]] | None = None,
+        multimodal_content: list[dict[str, Any]]
+        | list[list[dict[str, Any]]]
+        | None = None,
         ids: list[str] | None = None,
         file_paths: str | list[str] | None = None,
         track_id: str | None = None,
@@ -1192,8 +1198,8 @@ class LightRAG:
                 "file_path": content_data[
                     "file_path"
                 ],  # Store file path in document status
-                "track_id": track_id,
-                "scheme_name": scheme_name,  # Store track_id in document status
+                "track_id": track_id,  # Store track_id in document status
+                "scheme_name": scheme_name,
             }
             for id_, content_data in contents.items()
         }