merge

2025-09-04 10:57:41 +08:00 · 2025-09-04 10:57:41 +08:00 · 82a0f8cc1f
commit 82a0f8cc1f
parent e27031587d
3 changed files with 36 additions and 18 deletions
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -543,6 +543,9 @@ def create_app(args):
            },
            ollama_server_infos=ollama_server_infos,
        )
+    except Exception as e:
+        logger.error(f"Failed to initialize LightRAG: {e}")
+        raise

    # Initialize RAGAnything with comprehensive error handling
    rag_anything = None
@ -648,33 +651,33 @@ def create_app(args):
        RAGManager.set_rag(rag_anything)
        raganything_enabled = True
        logger.info(
-            "✅ The RAGAnything feature has been successfully enabled, supporting multimodal document processing functionality"
+            "The RAGAnything feature has been successfully enabled, supporting multimodal document processing functionality"
        )

    except ImportError as e:
        raganything_error_message = (
            f"RAGAnything dependency package not installed: {str(e)}"
        )
-        logger.warning(f"⚠️  {raganything_error_message}")
+        logger.warning(f"{raganything_error_message}")
        logger.info(
-            "💡 Please run 'pip install raganything' to install dependency packages to enable multimodal document processing functionality"
+            "Please run 'pip install raganything' to install dependency packages to enable multimodal document processing functionality"
        )
    except ValueError as e:
        raganything_error_message = f"RAGAnything configuration error: {str(e)}"
-        logger.warning(f"⚠️  {raganything_error_message}")
+        logger.warning(f"{raganything_error_message}")
        logger.info(
-            "💡 Please check if the environment variables LLM-BINDING_API_KEY and LLM-BINDING_HOST are set correctly"
+            "Please check if the environment variables LLM-BINDING_API_KEY and LLM-BINDING_HOST are set correctly"
        )
    except Exception as e:
        raganything_error_message = f"RAGAnything initialization failed: {str(e)}"
-        logger.error(f"❌ {raganything_error_message}")
+        logger.error(f" {raganything_error_message}")
        logger.info(
-            "💡 The system will run in basic mode and only support standard document processing functions"
+            "The system will run in basic mode and only support standard document processing functions"
        )

    if not raganything_enabled:
        logger.info(
-            "🔄 The system has been downgraded to basic mode, but LightRAG core functions are still available"
+            "The system has been downgraded to basic mode, but LightRAG core functions are still available"
        )

    # Add routes
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@ -5,7 +5,6 @@ This module contains all document-related routes for the LightRAG API.
 import asyncio
 import json
 import uuid
-
 from lightrag.utils import logger, get_pinyin_sort_key
 import aiofiles
 import shutil
@ -464,7 +463,7 @@ class DocStatusResponse(BaseModel):
                "id": "doc_123456",
                "content_summary": "Research paper on machine learning",
                "scheme_name": "lightrag",
-                "multimodal_content": None,
+                "multimodal_content": [],
                "content_length": 15240,
                "status": "PROCESSED",
                "created_at": "2025-03-31T12:34:56",
@ -499,7 +498,7 @@ class DocsStatusesResponse(BaseModel):
                            "id": "doc_123",
                            "content_summary": "Pending document",
                            "scheme_name": "lightrag",
-                            "multimodal_content": None,
+                            "multimodal_content": [],
                            "content_length": 5000,
                            "status": "PENDING",
                            "created_at": "2025-03-31T10:00:00",
@ -516,7 +515,7 @@ class DocsStatusesResponse(BaseModel):
                            "id": "doc_456",
                            "content_summary": "Processed document",
                            "scheme_name": "lightrag",
-                            "multimodal_content": None,
+                            "multimodal_content": [],
                            "content_length": 8000,
                            "status": "PROCESSED",
                            "created_at": "2025-03-31T09:00:00",
@ -878,6 +877,8 @@ async def pipeline_enqueue_file(
        rag: LightRAG instance
        file_path: Path to the saved file
        track_id: Optional tracking ID, if not provided will be generated
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None
    Returns:
        tuple: (success: bool, track_id: str)
    """
@ -1346,6 +1347,8 @@ async def pipeline_index_file(
        rag: LightRAG instance
        file_path: Path to the saved file
        track_id: Optional tracking ID
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None
    """
    try:
        success, returned_track_id = await pipeline_enqueue_file(
@ -1368,6 +1371,8 @@ async def pipeline_index_files(
        rag: LightRAG instance
        file_paths: Paths to the files to index
        track_id: Optional tracking ID to pass to all files
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None
    """
    if not file_paths:
        return
@ -1482,8 +1487,11 @@ async def run_scanning_process(

    Args:
        rag: LightRAG instance
+        rag_anythingL: RAGAnything instance
        doc_manager: DocumentManager instance
        track_id: Optional tracking ID to pass to all scanned files
+        scheme_name (str, optional): Processing scheme name for categorization.
+            Defaults to None 
    """
    try:
        new_files = doc_manager.scan_directory_for_new_files()
@ -2017,7 +2025,7 @@ def create_document_routes(

        Args:
            background_tasks: FastAPI BackgroundTasks for async processing
-            file (UploadFile): The file to be uploaded. It must have an allowed extension.
+            file (UploadFile): The file to be uploaded. It must have an allowed extension
            schemeId (str): ID of the processing scheme to use for this file. The scheme
                determines whether to use LightRAG or RAGAnything framework for processing.

@ -2544,6 +2552,7 @@ def create_document_routes(
                        DocStatusResponse(
                            id=doc_id,
                            content_summary=doc_status.content_summary,
+                            multimodal_content=doc_status.multimodal_content,
                            content_length=doc_status.content_length,
                            status=doc_status.status,
                            created_at=format_datetime(doc_status.created_at),
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -881,6 +881,8 @@ class LightRAG:

        Args:
            input: Single document string or list of document strings
+            multimodal_content (list[dict[str, Any]] | list[list[dict[str, Any]]] | None, optional):
+                Multimodal content (images, tables, equations) associated with documents
            split_by_character: if split_by_character is not None, split the string by character, if chunk longer than
            chunk_token_size, it will be split again by token size.
            split_by_character_only: if split_by_character_only is True, split the string by character only, when
@ -888,6 +890,7 @@ class LightRAG:
            ids: single string of the document ID or list of unique document IDs, if not provided, MD5 hash IDs will be generated
            file_paths: single string of the file path or list of file paths, used for citation
            track_id: tracking ID for monitoring processing status, if not provided, will be generated
+            scheme_name (str | None, optional): Scheme name for categorizing documents

        Returns:
            str: tracking ID for monitoring processing status
@ -896,8 +899,8 @@ class LightRAG:
        return loop.run_until_complete(
            self.ainsert(
                input,
-                split_by_character,
                multimodal_content,
+                split_by_character,
                split_by_character_only,
                ids,
                file_paths,
@ -963,13 +966,14 @@ class LightRAG:
                self.move_file_to_enqueue(current_file_path)
            else:
                continue
-
+        
        await self.apipeline_process_enqueue_documents(
            split_by_character, split_by_character_only
        )

        return track_id

+
    def move_file_to_enqueue(self, file_path):
        try:
            enqueued_dir = file_path.parent / "__enqueued__"
@ -1093,7 +1097,9 @@ class LightRAG:
    async def apipeline_enqueue_documents(
        self,
        input: str | list[str],
-        multimodal_content: list[dict[str, Any]] | None = None,
+        multimodal_content: list[dict[str, Any]]
+        | list[list[dict[str, Any]]]
+        | None = None,
        ids: list[str] | None = None,
        file_paths: str | list[str] | None = None,
        track_id: str | None = None,
@ -1192,8 +1198,8 @@ class LightRAG:
                "file_path": content_data[
                    "file_path"
                ],  # Store file path in document status
-                "track_id": track_id,
-                "scheme_name": scheme_name,  # Store track_id in document status
+                "track_id": track_id,  # Store track_id in document status
+                "scheme_name": scheme_name,
            }
            for id_, content_data in contents.items()
        }