fix lint

2025-06-05 17:37:11 +08:00 · 2025-06-05 17:37:11 +08:00 · cc9040d70c
commit cc9040d70c
parent 962974589a
8 changed files with 673 additions and 517 deletions
--- a/examples/mineru_example.py
+++ b/examples/mineru_example.py
@ -10,10 +10,12 @@ This example shows how to:
 import os
 import argparse
 from pathlib import Path
 from lightrag.mineru_parser import MineruParser
-def parse_document(file_path: str, output_dir: str = None, method: str = "auto", stats: bool = False):
+
 def parse_document(
    file_path: str, output_dir: str = None, method: str = "auto", stats: bool = False
 ):
    """
    Parse a document using MinerU parser
@ -26,9 +28,7 @@ def parse_document(file_path: str, output_dir: str = None, method: str = "auto",
    try:
        # Parse the document
        content_list, md_content = MineruParser.parse_document(
-            file_path=file_path,
+            file_path=file_path, parse_method=method, output_dir=output_dir
            parse_method=method,
            output_dir=output_dir
        )
        # Display statistics if requested
@ -39,7 +39,7 @@ def parse_document(file_path: str, output_dir: str = None, method: str = "auto",
            # Count different types of content
            content_types = {}
            for item in content_list:
-                content_type = item.get('type', 'unknown')
+                content_type = item.get("type", "unknown")
                content_types[content_type] = content_types.get(content_type, 0) + 1
            print("\nContent Type Distribution:")
@ -52,17 +52,22 @@ def parse_document(file_path: str, output_dir: str = None, method: str = "auto",
        print(f"Error parsing document: {str(e)}")
        return None, None
 def main():
    """Main function to run the example"""
-    parser = argparse.ArgumentParser(description='MinerU Parser Example')
+    parser = argparse.ArgumentParser(description="MinerU Parser Example")
-    parser.add_argument('file_path', help='Path to the document to parse')
+    parser.add_argument("file_path", help="Path to the document to parse")
-    parser.add_argument('--output', '-o', help='Output directory path')
+    parser.add_argument("--output", "-o", help="Output directory path")
-    parser.add_argument('--method', '-m', 
+    parser.add_argument(
-                       choices=['auto', 'ocr', 'txt'],
+        "--method",
-                       default='auto',
+        "-m",
-                       help='Parsing method (auto, ocr, txt)')
+        choices=["auto", "ocr", "txt"],
-    parser.add_argument('--stats', action='store_true',
+        default="auto",
-                       help='Display content statistics')
+        help="Parsing method (auto, ocr, txt)",
    )
    parser.add_argument(
        "--stats", action="store_true", help="Display content statistics"
    )
    args = parser.parse_args()
@ -72,11 +77,9 @@ def main():
    # Parse document
    content_list, md_content = parse_document(
-        args.file_path,
+        args.file_path, args.output, args.method, args.stats
        args.output,
        args.method,
        args.stats
    )
-if __name__ == '__main__':
+
 if __name__ == "__main__":
    main()
--- a/examples/modalprocessors_example.py
+++ b/examples/modalprocessors_example.py
@ -8,72 +8,90 @@ import asyncio
 import argparse
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from pathlib import Path
 from lightrag import LightRAG
 from lightrag.modalprocessors import (
    ImageModalProcessor,
    TableModalProcessor,
    EquationModalProcessor,
    GenericModalProcessor
 )
 WORKING_DIR = "./rag_storage"
 def get_llm_model_func(api_key: str, base_url: str = None):
    return lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
        "gpt-4o-mini",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=api_key,
        base_url=base_url,
        **kwargs,
    )
-def get_vision_model_func(api_key: str, base_url: str = None):
+def get_llm_model_func(api_key: str, base_url: str = None):
-    return lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
+    return (
-        "gpt-4o",
+        lambda prompt,
        "",
        system_prompt=None,
        history_messages=[],
-        messages=[
+        **kwargs: openai_complete_if_cache(
-            {"role": "system", "content": system_prompt} if system_prompt else None,
+            "gpt-4o-mini",
-            {"role": "user", "content": [
+            prompt,
-                {"type": "text", "text": prompt},
+            system_prompt=system_prompt,
-                {
+            history_messages=history_messages,
-                    "type": "image_url",
+            api_key=api_key,
-                    "image_url": {
+            base_url=base_url,
-                        "url": f"data:image/jpeg;base64,{image_data}"
+            **kwargs,
-                    }
+        )
                }
            ]} if image_data else {"role": "user", "content": prompt}
        ],
        api_key=api_key,
        base_url=base_url,
        **kwargs,
    ) if image_data else openai_complete_if_cache(
        "gpt-4o-mini",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=api_key,
        base_url=base_url,
        **kwargs,
    )
 def get_vision_model_func(api_key: str, base_url: str = None):
    return (
        lambda prompt,
        system_prompt=None,
        history_messages=[],
        image_data=None,
        **kwargs: openai_complete_if_cache(
            "gpt-4o",
            "",
            system_prompt=None,
            history_messages=[],
            messages=[
                {"role": "system", "content": system_prompt} if system_prompt else None,
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{image_data}"
                            },
                        },
                    ],
                }
                if image_data
                else {"role": "user", "content": prompt},
            ],
            api_key=api_key,
            base_url=base_url,
            **kwargs,
        )
        if image_data
        else openai_complete_if_cache(
            "gpt-4o-mini",
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            api_key=api_key,
            base_url=base_url,
            **kwargs,
        )
    )
 async def process_image_example(lightrag: LightRAG, vision_model_func):
    """Example of processing an image"""
    # Create image processor
    image_processor = ImageModalProcessor(
-        lightrag=lightrag,
+        lightrag=lightrag, modal_caption_func=vision_model_func
        modal_caption_func=vision_model_func
    )
    # Prepare image content
    image_content = {
        "img_path": "image.jpg",
        "img_caption": ["Example image caption"],
-        "img_footnote": ["Example image footnote"]
+        "img_footnote": ["Example image footnote"],
    }
    # Process image
@ -81,19 +99,19 @@ async def process_image_example(lightrag: LightRAG, vision_model_func):
        modal_content=image_content,
        content_type="image",
        file_path="image_example.jpg",
-        entity_name="Example Image"
+        entity_name="Example Image",
    )
    print("Image Processing Results:")
    print(f"Description: {description}")
    print(f"Entity Info: {entity_info}")
 async def process_table_example(lightrag: LightRAG, llm_model_func):
    """Example of processing a table"""
    # Create table processor
    table_processor = TableModalProcessor(
-        lightrag=lightrag,
+        lightrag=lightrag, modal_caption_func=llm_model_func
        modal_caption_func=llm_model_func
    )
    # Prepare table content
@ -105,7 +123,7 @@ async def process_table_example(lightrag: LightRAG, llm_model_func):
        | Mary | 30  | Designer   |
        """,
        "table_caption": ["Employee Information Table"],
-        "table_footnote": ["Data updated as of 2024"]
+        "table_footnote": ["Data updated as of 2024"],
    }
    # Process table
@ -113,39 +131,37 @@ async def process_table_example(lightrag: LightRAG, llm_model_func):
        modal_content=table_content,
        content_type="table",
        file_path="table_example.md",
-        entity_name="Employee Table"
+        entity_name="Employee Table",
    )
    print("\nTable Processing Results:")
    print(f"Description: {description}")
    print(f"Entity Info: {entity_info}")
 async def process_equation_example(lightrag: LightRAG, llm_model_func):
    """Example of processing a mathematical equation"""
    # Create equation processor
    equation_processor = EquationModalProcessor(
-        lightrag=lightrag,
+        lightrag=lightrag, modal_caption_func=llm_model_func
        modal_caption_func=llm_model_func
    )
    # Prepare equation content
-    equation_content = {
+    equation_content = {"text": "E = mc^2", "text_format": "LaTeX"}
        "text": "E = mc^2",
        "text_format": "LaTeX"
    }
    # Process equation
    description, entity_info = await equation_processor.process_multimodal_content(
        modal_content=equation_content,
        content_type="equation",
        file_path="equation_example.txt",
-        entity_name="Mass-Energy Equivalence"
+        entity_name="Mass-Energy Equivalence",
    )
    print("\nEquation Processing Results:")
    print(f"Description: {description}")
    print(f"Entity Info: {entity_info}")
 async def initialize_rag(api_key: str, base_url: str = None):
    rag = LightRAG(
        working_dir=WORKING_DIR,
@ -155,7 +171,10 @@ async def initialize_rag(api_key: str, base_url: str = None):
            api_key=api_key,
            base_url=base_url,
        ),
-        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
+        llm_model_func=lambda prompt,
        system_prompt=None,
        history_messages=[],
        **kwargs: openai_complete_if_cache(
            "gpt-4o-mini",
            prompt,
            system_prompt=system_prompt,
@ -171,18 +190,22 @@ async def initialize_rag(api_key: str, base_url: str = None):
    return rag
 def main():
    """Main function to run the example"""
-    parser = argparse.ArgumentParser(description='Modal Processors Example')
+    parser = argparse.ArgumentParser(description="Modal Processors Example")
-    parser.add_argument('--api-key', required=True, help='OpenAI API key')
+    parser.add_argument("--api-key", required=True, help="OpenAI API key")
-    parser.add_argument('--base-url', help='Optional base URL for API')
+    parser.add_argument("--base-url", help="Optional base URL for API")
-    parser.add_argument('--working-dir', '-w', default=WORKING_DIR, help='Working directory path')
+    parser.add_argument(
        "--working-dir", "-w", default=WORKING_DIR, help="Working directory path"
    )
    args = parser.parse_args()
    # Run examples
    asyncio.run(main_async(args.api_key, args.base_url))
 async def main_async(api_key: str, base_url: str = None):
    # Initialize LightRAG
    lightrag = await initialize_rag(api_key, base_url)
@ -196,5 +219,6 @@ async def main_async(api_key: str, base_url: str = None):
    await process_table_example(lightrag, llm_model_func)
    await process_equation_example(lightrag, llm_model_func)
 if __name__ == "__main__":
    main()
--- a/examples/raganything_example.py
+++ b/examples/raganything_example.py
@ -11,12 +11,17 @@ This example shows how to:
 import os
 import argparse
 import asyncio
 from pathlib import Path
 from lightrag.mineru_parser import MineruParser
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.raganything import RAGAnything
-async def process_with_rag(file_path: str, output_dir: str, api_key: str, base_url: str = None, working_dir: str = None):
+
 async def process_with_rag(
    file_path: str,
    output_dir: str,
    api_key: str,
    base_url: str = None,
    working_dir: str = None,
 ):
    """
    Process document with RAGAnything
@ -30,7 +35,10 @@ async def process_with_rag(file_path: str, output_dir: str, api_key: str, base_u
        # Initialize RAGAnything
        rag = RAGAnything(
            working_dir=working_dir,
-            llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
+            llm_model_func=lambda prompt,
            system_prompt=None,
            history_messages=[],
            **kwargs: openai_complete_if_cache(
                "gpt-4o-mini",
                prompt,
                system_prompt=system_prompt,
@ -39,27 +47,40 @@ async def process_with_rag(file_path: str, output_dir: str, api_key: str, base_u
                base_url=base_url,
                **kwargs,
            ),
-            vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
+            vision_model_func=lambda prompt,
            system_prompt=None,
            history_messages=[],
            image_data=None,
            **kwargs: openai_complete_if_cache(
                "gpt-4o",
                "",
                system_prompt=None,
                history_messages=[],
                messages=[
-                    {"role": "system", "content": system_prompt} if system_prompt else None,
+                    {"role": "system", "content": system_prompt}
-                    {"role": "user", "content": [
+                    if system_prompt
-                        {"type": "text", "text": prompt},
+                    else None,
-                        {
+                    {
-                            "type": "image_url",
+                        "role": "user",
-                            "image_url": {
+                        "content": [
-                                "url": f"data:image/jpeg;base64,{image_data}"
+                            {"type": "text", "text": prompt},
-                            }
+                            {
-                        }
+                                "type": "image_url",
-                    ]} if image_data else {"role": "user", "content": prompt}
+                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{image_data}"
                                },
                            },
                        ],
                    }
                    if image_data
                    else {"role": "user", "content": prompt},
                ],
                api_key=api_key,
                base_url=base_url,
                **kwargs,
-            ) if image_data else openai_complete_if_cache(
+            )
            if image_data
            else openai_complete_if_cache(
                "gpt-4o-mini",
                prompt,
                system_prompt=system_prompt,
@ -75,21 +96,19 @@ async def process_with_rag(file_path: str, output_dir: str, api_key: str, base_u
                base_url=base_url,
            ),
            embedding_dim=3072,
-            max_token_size=8192
+            max_token_size=8192,
        )
        # Process document
        await rag.process_document_complete(
-            file_path=file_path,
+            file_path=file_path, output_dir=output_dir, parse_method="auto"
            output_dir=output_dir,
            parse_method="auto"
        )
        # Example queries
        queries = [
            "What is the main content of the document?",
            "Describe the images and figures in the document",
-            "Tell me about the experimental results and data tables"
+            "Tell me about the experimental results and data tables",
        ]
        print("\nQuerying processed document:")
@ -101,14 +120,21 @@ async def process_with_rag(file_path: str, output_dir: str, api_key: str, base_u
    except Exception as e:
        print(f"Error processing with RAG: {str(e)}")
 def main():
    """Main function to run the example"""
-    parser = argparse.ArgumentParser(description='MinerU RAG Example')
+    parser = argparse.ArgumentParser(description="MinerU RAG Example")
-    parser.add_argument('file_path', help='Path to the document to process')
+    parser.add_argument("file_path", help="Path to the document to process")
-    parser.add_argument('--working_dir', '-w', default="./rag_storage", help='Working directory path')
+    parser.add_argument(
-    parser.add_argument('--output', '-o', default="./output", help='Output directory path')
+        "--working_dir", "-w", default="./rag_storage", help="Working directory path"
-    parser.add_argument('--api-key', required=True, help='OpenAI API key for RAG processing')
+    )
-    parser.add_argument('--base-url', help='Optional base URL for API')
+    parser.add_argument(
        "--output", "-o", default="./output", help="Output directory path"
    )
    parser.add_argument(
        "--api-key", required=True, help="OpenAI API key for RAG processing"
    )
    parser.add_argument("--base-url", help="Optional base URL for API")
    args = parser.parse_args()
@ -117,13 +143,12 @@ def main():
        os.makedirs(args.output, exist_ok=True)
    # Process with RAG
-    asyncio.run(process_with_rag(
+    asyncio.run(
-        args.file_path,
+        process_with_rag(
-        args.output,
+            args.file_path, args.output, args.api_key, args.base_url, args.working_dir
-        args.api_key,
+        )
-        args.base_url,
+    )
        args.working_dir
    ))
-if __name__ == '__main__':
+
 if __name__ == "__main__":
    main()
--- a/lightrag/mineru_parser.py
+++ b/lightrag/mineru_parser.py
@ -1,4 +1,4 @@
-# type: ignore
+# type: ignore
 """
 MinerU Document Parser Utility
@ -14,7 +14,18 @@ import os
 import json
 import argparse
 from pathlib import Path
-from typing import Dict, List, Optional, Union, Tuple, Any, TypeVar, cast, TYPE_CHECKING, ClassVar
+from typing import (
    Dict,
    List,
    Optional,
    Union,
    Tuple,
    Any,
    TypeVar,
    cast,
    TYPE_CHECKING,
    ClassVar,
 )
 # Type stubs for magic_pdf
 FileBasedDataWriter = Any
@ -28,20 +39,27 @@ read_local_office = Any
 read_local_images = Any
 if TYPE_CHECKING:
-    from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
+    from magic_pdf.data.data_reader_writer import (
        FileBasedDataWriter,
        FileBasedDataReader,
    )
    from magic_pdf.data.dataset import PymuDocDataset
    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
    from magic_pdf.config.enums import SupportedPdfParseMethod
    from magic_pdf.data.read_api import read_local_office, read_local_images
 else:
    # MinerU imports
-    from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
+    from magic_pdf.data.data_reader_writer import (
        FileBasedDataWriter,
        FileBasedDataReader,
    )
    from magic_pdf.data.dataset import PymuDocDataset
    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
    from magic_pdf.config.enums import SupportedPdfParseMethod
    from magic_pdf.data.read_api import read_local_office, read_local_images
-T = TypeVar('T')
+T = TypeVar("T")
 class MineruParser:
    """
@ -58,7 +76,11 @@ class MineruParser:
        pass
    @staticmethod
-    def safe_write(writer: Any, content: Union[str, bytes, Dict[str, Any], List[Any]], filename: str) -> None:
+    def safe_write(
        writer: Any,
        content: Union[str, bytes, Dict[str, Any], List[Any]],
        filename: str,
    ) -> None:
        """
        Safely write content to a file, ensuring the filename is valid
@ -80,15 +102,22 @@ class MineruParser:
                writer.write(content, filename)
            except TypeError:
                # If the writer expects bytes, convert string to bytes
-                writer.write(content.encode('utf-8'), filename)
+                writer.write(content.encode("utf-8"), filename)
        else:
            # For dict/list content, always encode as JSON string first
            if isinstance(content, (dict, list)):
                try:
-                    writer.write(json.dumps(content, ensure_ascii=False, indent=4), filename)
+                    writer.write(
                        json.dumps(content, ensure_ascii=False, indent=4), filename
                    )
                except TypeError:
                    # If the writer expects bytes, convert JSON string to bytes
-                    writer.write(json.dumps(content, ensure_ascii=False, indent=4).encode('utf-8'), filename)
+                    writer.write(
                        json.dumps(content, ensure_ascii=False, indent=4).encode(
                            "utf-8"
                        ),
                        filename,
                    )
            else:
                # Regular content (assumed to be bytes or compatible)
                writer.write(content, filename)
@ -97,7 +126,7 @@ class MineruParser:
    def parse_pdf(
        pdf_path: Union[str, Path],
        output_dir: Optional[str] = None,
-        use_ocr: bool = False
+        use_ocr: bool = False,
    ) -> Tuple[List[Dict[str, Any]], str]:
        """
        Parse PDF document
@ -150,9 +179,15 @@ class MineruParser:
            # Draw visualizations
            try:
-                infer_result.draw_model(os.path.join(local_md_dir, f"{name_without_suff}_model.pdf"))  # type: ignore
+                infer_result.draw_model(
-                pipe_result.draw_layout(os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf"))  # type: ignore
+                    os.path.join(local_md_dir, f"{name_without_suff}_model.pdf")
-                pipe_result.draw_span(os.path.join(local_md_dir, f"{name_without_suff}_spans.pdf"))  # type: ignore
+                )  # type: ignore
                pipe_result.draw_layout(
                    os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf")
                )  # type: ignore
                pipe_result.draw_span(
                    os.path.join(local_md_dir, f"{name_without_suff}_spans.pdf")
                )  # type: ignore
            except Exception as e:
                print(f"Warning: Failed to draw visualizations: {str(e)}")
@ -162,7 +197,9 @@ class MineruParser:
            # Save files using dump methods (consistent with API)
            pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)  # type: ignore
-            pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)  # type: ignore
+            pipe_result.dump_content_list(
                md_writer, f"{name_without_suff}_content_list.json", image_dir
            )  # type: ignore
            pipe_result.dump_middle_json(md_writer, f"{name_without_suff}_middle.json")  # type: ignore
            # Save model result - convert JSON string to bytes before writing
@ -171,16 +208,24 @@ class MineruParser:
            try:
                # Try to write to a file manually to avoid FileBasedDataWriter issues
-                model_file_path = os.path.join(local_md_dir, f"{name_without_suff}_model.json")
+                model_file_path = os.path.join(
-                with open(model_file_path, 'w', encoding='utf-8') as f:
+                    local_md_dir, f"{name_without_suff}_model.json"
                )
                with open(model_file_path, "w", encoding="utf-8") as f:
                    f.write(json_str)
            except Exception as e:
-                print(f"Warning: Failed to save model result using file write: {str(e)}")
+                print(
                    f"Warning: Failed to save model result using file write: {str(e)}"
                )
                try:
                    # If direct file write fails, try using the writer with bytes encoding
-                    md_writer.write(json_str.encode('utf-8'), f"{name_without_suff}_model.json")  # type: ignore
+                    md_writer.write(
                        json_str.encode("utf-8"), f"{name_without_suff}_model.json"
                    )  # type: ignore
                except Exception as e2:
-                    print(f"Warning: Failed to save model result using writer: {str(e2)}")
+                    print(
                        f"Warning: Failed to save model result using writer: {str(e2)}"
                    )
            return cast(Tuple[List[Dict[str, Any]], str], (content_list, md_content))
@ -190,8 +235,7 @@ class MineruParser:
    @staticmethod
    def parse_office_doc(
-        doc_path: Union[str, Path],
+        doc_path: Union[str, Path], output_dir: Optional[str] = None
        output_dir: Optional[str] = None
    ) -> Tuple[List[Dict[str, Any]], str]:
        """
        Parse office document (Word, PPT, etc.)
@ -231,9 +275,9 @@ class MineruParser:
            # Apply chain of operations according to API documentation
            # This follows the pattern shown in MS-Office example in the API docs
-            ds.apply(doc_analyze, ocr=True)\
+            ds.apply(doc_analyze, ocr=True).pipe_txt_mode(image_writer).dump_md(
-              .pipe_txt_mode(image_writer)\
+                md_writer, f"{name_without_suff}.md", image_dir
-              .dump_md(md_writer, f"{name_without_suff}.md", image_dir)  # type: ignore
+            )  # type: ignore
            # Re-execute for getting the content data
            infer_result = ds.apply(doc_analyze, ocr=True)  # type: ignore
@ -244,7 +288,9 @@ class MineruParser:
            content_list = pipe_result.get_content_list(image_dir)  # type: ignore
            # Save additional output files
-            pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)  # type: ignore
+            pipe_result.dump_content_list(
                md_writer, f"{name_without_suff}_content_list.json", image_dir
            )  # type: ignore
            pipe_result.dump_middle_json(md_writer, f"{name_without_suff}_middle.json")  # type: ignore
            # Save model result - convert JSON string to bytes before writing
@ -253,16 +299,24 @@ class MineruParser:
            try:
                # Try to write to a file manually to avoid FileBasedDataWriter issues
-                model_file_path = os.path.join(local_md_dir, f"{name_without_suff}_model.json")
+                model_file_path = os.path.join(
-                with open(model_file_path, 'w', encoding='utf-8') as f:
+                    local_md_dir, f"{name_without_suff}_model.json"
                )
                with open(model_file_path, "w", encoding="utf-8") as f:
                    f.write(json_str)
            except Exception as e:
-                print(f"Warning: Failed to save model result using file write: {str(e)}")
+                print(
                    f"Warning: Failed to save model result using file write: {str(e)}"
                )
                try:
                    # If direct file write fails, try using the writer with bytes encoding
-                    md_writer.write(json_str.encode('utf-8'), f"{name_without_suff}_model.json")  # type: ignore
+                    md_writer.write(
                        json_str.encode("utf-8"), f"{name_without_suff}_model.json"
                    )  # type: ignore
                except Exception as e2:
-                    print(f"Warning: Failed to save model result using writer: {str(e2)}")
+                    print(
                        f"Warning: Failed to save model result using writer: {str(e2)}"
                    )
            return cast(Tuple[List[Dict[str, Any]], str], (content_list, md_content))
@ -272,8 +326,7 @@ class MineruParser:
    @staticmethod
    def parse_image(
-        image_path: Union[str, Path],
+        image_path: Union[str, Path], output_dir: Optional[str] = None
        output_dir: Optional[str] = None
    ) -> Tuple[List[Dict[str, Any]], str]:
        """
        Parse image document
@ -313,9 +366,9 @@ class MineruParser:
            # Apply chain of operations according to API documentation
            # This follows the pattern shown in Image example in the API docs
-            ds.apply(doc_analyze, ocr=True)\
+            ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
-              .pipe_ocr_mode(image_writer)\
+                md_writer, f"{name_without_suff}.md", image_dir
-              .dump_md(md_writer, f"{name_without_suff}.md", image_dir)  # type: ignore
+            )  # type: ignore
            # Re-execute for getting the content data
            infer_result = ds.apply(doc_analyze, ocr=True)  # type: ignore
@ -326,7 +379,9 @@ class MineruParser:
            content_list = pipe_result.get_content_list(image_dir)  # type: ignore
            # Save additional output files
-            pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)  # type: ignore
+            pipe_result.dump_content_list(
                md_writer, f"{name_without_suff}_content_list.json", image_dir
            )  # type: ignore
            pipe_result.dump_middle_json(md_writer, f"{name_without_suff}_middle.json")  # type: ignore
            # Save model result - convert JSON string to bytes before writing
@ -335,16 +390,24 @@ class MineruParser:
            try:
                # Try to write to a file manually to avoid FileBasedDataWriter issues
-                model_file_path = os.path.join(local_md_dir, f"{name_without_suff}_model.json")
+                model_file_path = os.path.join(
-                with open(model_file_path, 'w', encoding='utf-8') as f:
+                    local_md_dir, f"{name_without_suff}_model.json"
                )
                with open(model_file_path, "w", encoding="utf-8") as f:
                    f.write(json_str)
            except Exception as e:
-                print(f"Warning: Failed to save model result using file write: {str(e)}")
+                print(
                    f"Warning: Failed to save model result using file write: {str(e)}"
                )
                try:
                    # If direct file write fails, try using the writer with bytes encoding
-                    md_writer.write(json_str.encode('utf-8'), f"{name_without_suff}_model.json")  # type: ignore
+                    md_writer.write(
                        json_str.encode("utf-8"), f"{name_without_suff}_model.json"
                    )  # type: ignore
                except Exception as e2:
-                    print(f"Warning: Failed to save model result using writer: {str(e2)}")
+                    print(
                        f"Warning: Failed to save model result using writer: {str(e2)}"
                    )
            return cast(Tuple[List[Dict[str, Any]], str], (content_list, md_content))
@ -357,7 +420,7 @@ class MineruParser:
        file_path: Union[str, Path],
        parse_method: str = "auto",
        output_dir: Optional[str] = None,
-        save_results: bool = True
+        save_results: bool = True,
    ) -> Tuple[List[Dict[str, Any]], str]:
        """
        Parse document using MinerU based on file extension
@ -382,51 +445,46 @@ class MineruParser:
        # Choose appropriate parser based on file type
        if ext in [".pdf"]:
            return MineruParser.parse_pdf(
-                file_path,
+                file_path, output_dir, use_ocr=(parse_method == "ocr")
                output_dir,
                use_ocr=(parse_method == "ocr")
            )
        elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
-            return MineruParser.parse_image(
+            return MineruParser.parse_image(file_path, output_dir)
                file_path,
                output_dir
            )
        elif ext in [".doc", ".docx", ".ppt", ".pptx"]:
-            return MineruParser.parse_office_doc(
+            return MineruParser.parse_office_doc(file_path, output_dir)
                file_path,
                output_dir
            )
        else:
            # For unsupported file types, default to PDF parsing
-            print(f"Warning: Unsupported file extension '{ext}', trying generic PDF parser")
+            print(
-            return MineruParser.parse_pdf(
+                f"Warning: Unsupported file extension '{ext}', trying generic PDF parser"
                file_path,
                output_dir,
                use_ocr=(parse_method == "ocr")
            )
            return MineruParser.parse_pdf(
                file_path, output_dir, use_ocr=(parse_method == "ocr")
            )
 def main():
    """
    Main function to run the MinerU parser from command line
    """
-    parser = argparse.ArgumentParser(description='Parse documents using MinerU')
+    parser = argparse.ArgumentParser(description="Parse documents using MinerU")
-    parser.add_argument('file_path', help='Path to the document to parse')
+    parser.add_argument("file_path", help="Path to the document to parse")
-    parser.add_argument('--output', '-o', help='Output directory path')
+    parser.add_argument("--output", "-o", help="Output directory path")
-    parser.add_argument('--method', '-m', 
+    parser.add_argument(
-                       choices=['auto', 'ocr', 'txt'],
+        "--method",
-                       default='auto',
+        "-m",
-                       help='Parsing method (auto, ocr, txt)')
+        choices=["auto", "ocr", "txt"],
-    parser.add_argument('--stats', action='store_true',
+        default="auto",
-                       help='Display content statistics')
+        help="Parsing method (auto, ocr, txt)",
    )
    parser.add_argument(
        "--stats", action="store_true", help="Display content statistics"
    )
    args = parser.parse_args()
    try:
        # Parse the document
        content_list, md_content = MineruParser.parse_document(
-            file_path=args.file_path,
+            file_path=args.file_path, parse_method=args.method, output_dir=args.output
            parse_method=args.method,
            output_dir=args.output
        )
        # Display statistics if requested
@ -437,7 +495,7 @@ def main():
            # Count different types of content
            content_types = {}
            for item in content_list:
-                content_type = item.get('type', 'unknown')
+                content_type = item.get("type", "unknown")
                content_types[content_type] = content_types.get(content_type, 0) + 1
            print("\nContent Type Distribution:")
@ -450,5 +508,6 @@ def main():
    return 0
-if __name__ == '__main__':
+
 if __name__ == "__main__":
    exit(main())
--- a/lightrag/modalprocessors.py
+++ b/lightrag/modalprocessors.py
@ -65,8 +65,8 @@ class BaseModalProcessor:
        raise NotImplementedError("Subclasses must implement this method")
    async def _create_entity_and_chunk(
-            self, modal_chunk: str, entity_info: Dict[str, Any],
+        self, modal_chunk: str, entity_info: Dict[str, Any], file_path: str
-            file_path: str) -> Tuple[str, Dict[str, Any]]:
+    ) -> Tuple[str, Dict[str, Any]]:
        """Create entity and text chunk"""
        # Create chunk
        chunk_id = compute_mdhash_id(str(modal_chunk), prefix="chunk-")
@ -93,16 +93,16 @@ class BaseModalProcessor:
            "created_at": int(time.time()),
        }
-        await self.knowledge_graph_inst.upsert_node(entity_info["entity_name"],
+        await self.knowledge_graph_inst.upsert_node(
-                                                    node_data)
+            entity_info["entity_name"], node_data
        )
        # Insert entity into vector database
        entity_vdb_data = {
            compute_mdhash_id(entity_info["entity_name"], prefix="ent-"): {
                "entity_name": entity_info["entity_name"],
                "entity_type": entity_info["entity_type"],
-                "content":
+                "content": f"{entity_info['entity_name']}\n{entity_info['summary']}",
                f"{entity_info['entity_name']}\n{entity_info['summary']}",
                "source_id": chunk_id,
                "file_path": file_path,
            }
@ -110,8 +110,7 @@ class BaseModalProcessor:
        await self.entities_vdb.upsert(entity_vdb_data)
        # Process entity and relationship extraction
-        await self._process_chunk_for_extraction(chunk_id,
+        await self._process_chunk_for_extraction(chunk_id, entity_info["entity_name"])
                                                 entity_info["entity_name"])
        # Ensure all storage updates are complete
        await self._insert_done()
@ -120,11 +119,12 @@ class BaseModalProcessor:
            "entity_name": entity_info["entity_name"],
            "entity_type": entity_info["entity_type"],
            "description": entity_info["summary"],
-            "chunk_id": chunk_id
+            "chunk_id": chunk_id,
        }
-    async def _process_chunk_for_extraction(self, chunk_id: str,
+    async def _process_chunk_for_extraction(
-                                            modal_entity_name: str):
+        self, chunk_id: str, modal_entity_name: str
    ):
        """Process chunk for entity and relationship extraction"""
        chunk_data = await self.text_chunks_db.get_by_id(chunk_id)
        if not chunk_data:
@ -168,37 +168,27 @@ class BaseModalProcessor:
                if entity_name != modal_entity_name:  # Skip self-relationship
                    # Create belongs_to relationship
                    relation_data = {
-                        "description":
+                        "description": f"Entity {entity_name} belongs to {modal_entity_name}",
-                        f"Entity {entity_name} belongs to {modal_entity_name}",
+                        "keywords": "belongs_to,part_of,contained_in",
-                        "keywords":
+                        "source_id": chunk_id,
-                        "belongs_to,part_of,contained_in",
+                        "weight": 10.0,
-                        "source_id":
+                        "file_path": chunk_data.get("file_path", "manual_creation"),
                        chunk_id,
                        "weight":
                        10.0,
                        "file_path":
                        chunk_data.get("file_path", "manual_creation"),
                    }
                    await self.knowledge_graph_inst.upsert_edge(
-                        entity_name, modal_entity_name, relation_data)
+                        entity_name, modal_entity_name, relation_data
                    )
-                    relation_id = compute_mdhash_id(entity_name +
+                    relation_id = compute_mdhash_id(
-                                                    modal_entity_name,
+                        entity_name + modal_entity_name, prefix="rel-"
-                                                    prefix="rel-")
+                    )
                    relation_vdb_data = {
                        relation_id: {
-                            "src_id":
+                            "src_id": entity_name,
-                            entity_name,
+                            "tgt_id": modal_entity_name,
-                            "tgt_id":
+                            "keywords": relation_data["keywords"],
-                            modal_entity_name,
+                            "content": f"{relation_data['keywords']}\t{entity_name}\n{modal_entity_name}\n{relation_data['description']}",
-                            "keywords":
+                            "source_id": chunk_id,
-                            relation_data["keywords"],
+                            "file_path": chunk_data.get("file_path", "manual_creation"),
                            "content":
                            f"{relation_data['keywords']}\t{entity_name}\n{modal_entity_name}\n{relation_data['description']}",
                            "source_id":
                            chunk_id,
                            "file_path":
                            chunk_data.get("file_path", "manual_creation"),
                        }
                    }
                    await self.relationships_vdb.upsert(relation_vdb_data)
@ -215,16 +205,18 @@ class BaseModalProcessor:
        )
    async def _insert_done(self) -> None:
-        await asyncio.gather(*[
+        await asyncio.gather(
-            cast(StorageNameSpace, storage_inst).index_done_callback()
+            *[
-            for storage_inst in [
+                cast(StorageNameSpace, storage_inst).index_done_callback()
-                self.text_chunks_db,
+                for storage_inst in [
-                self.chunks_vdb,
+                    self.text_chunks_db,
-                self.entities_vdb,
+                    self.chunks_vdb,
-                self.relationships_vdb,
+                    self.entities_vdb,
-                self.knowledge_graph_inst,
+                    self.relationships_vdb,
                    self.knowledge_graph_inst,
                ]
            ]
-        ])
+        )
 class ImageModalProcessor(BaseModalProcessor):
@ -243,8 +235,7 @@ class ImageModalProcessor(BaseModalProcessor):
        """Encode image to base64"""
        try:
            with open(image_path, "rb") as image_file:
-                encoded_string = base64.b64encode(
+                encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
                    image_file.read()).decode('utf-8')
            return encoded_string
        except Exception as e:
            logger.error(f"Failed to encode image {image_path}: {e}")
@ -309,8 +300,7 @@ class ImageModalProcessor(BaseModalProcessor):
                response = await self.modal_caption_func(
                    vision_prompt,
                    image_data=image_base64,
-                    system_prompt=
+                    system_prompt="You are an expert image analyst. Provide detailed, accurate descriptions.",
                    "You are an expert image analyst. Provide detailed, accurate descriptions."
                )
            else:
                # Analyze based on existing text information
@ -324,13 +314,11 @@ class ImageModalProcessor(BaseModalProcessor):
                response = await self.modal_caption_func(
                    text_prompt,
-                    system_prompt=
+                    system_prompt="You are an expert image analyst. Provide detailed analysis based on available information.",
                    "You are an expert image analyst. Provide detailed analysis based on available information."
                )
            # Parse response
-            enhanced_caption, entity_info = self._parse_response(
+            enhanced_caption, entity_info = self._parse_response(response, entity_name)
                response, entity_name)
            # Build complete image content
            modal_chunk = f"""
@ -341,27 +329,30 @@ class ImageModalProcessor(BaseModalProcessor):
                    Visual Analysis: {enhanced_caption}"""
-            return await self._create_entity_and_chunk(modal_chunk,
+            return await self._create_entity_and_chunk(
-                                                       entity_info, file_path)
+                modal_chunk, entity_info, file_path
            )
        except Exception as e:
            logger.error(f"Error processing image content: {e}")
            # Fallback processing
            fallback_entity = {
-                "entity_name": entity_name if entity_name else
+                "entity_name": entity_name
-                f"image_{compute_mdhash_id(str(modal_content))}",
+                if entity_name
                else f"image_{compute_mdhash_id(str(modal_content))}",
                "entity_type": "image",
-                "summary": f"Image content: {str(modal_content)[:100]}"
+                "summary": f"Image content: {str(modal_content)[:100]}",
            }
            return str(modal_content), fallback_entity
-    def _parse_response(self,
+    def _parse_response(
-                        response: str,
+        self, response: str, entity_name: str = None
-                        entity_name: str = None) -> Tuple[str, Dict[str, Any]]:
+    ) -> Tuple[str, Dict[str, Any]]:
        """Parse model response"""
        try:
            response_data = json.loads(
-                re.search(r"\{.*\}", response, re.DOTALL).group(0))
+                re.search(r"\{.*\}", response, re.DOTALL).group(0)
            )
            description = response_data.get("detailed_description", "")
            entity_data = response_data.get("entity_info", {})
@ -369,11 +360,14 @@ class ImageModalProcessor(BaseModalProcessor):
            if not description or not entity_data:
                raise ValueError("Missing required fields in response")
-            if not all(key in entity_data
+            if not all(
-                       for key in ["entity_name", "entity_type", "summary"]):
+                key in entity_data for key in ["entity_name", "entity_type", "summary"]
            ):
                raise ValueError("Missing required fields in entity_info")
-            entity_data["entity_name"] = entity_data["entity_name"] + f" ({entity_data['entity_type']})"
+            entity_data["entity_name"] = (
                entity_data["entity_name"] + f" ({entity_data['entity_type']})"
            )
            if entity_name:
                entity_data["entity_name"] = entity_name
@ -382,13 +376,11 @@ class ImageModalProcessor(BaseModalProcessor):
        except (json.JSONDecodeError, AttributeError, ValueError) as e:
            logger.error(f"Error parsing image analysis response: {e}")
            fallback_entity = {
-                "entity_name":
+                "entity_name": entity_name
-                entity_name
+                if entity_name
-                if entity_name else f"image_{compute_mdhash_id(response)}",
+                else f"image_{compute_mdhash_id(response)}",
-                "entity_type":
+                "entity_type": "image",
-                "image",
+                "summary": response[:100] + "..." if len(response) > 100 else response,
                "summary":
                response[:100] + "..." if len(response) > 100 else response
            }
            return response, fallback_entity
@ -447,15 +439,15 @@ class TableModalProcessor(BaseModalProcessor):
        response = await self.modal_caption_func(
            table_prompt,
-            system_prompt=
+            system_prompt="You are an expert data analyst. Provide detailed table analysis with specific insights.",
            "You are an expert data analyst. Provide detailed table analysis with specific insights."
        )
        # Parse response
        enhanced_caption, entity_info = self._parse_table_response(
-            response, entity_name)
+            response, entity_name
        )
-        #TODO: Add Retry Mechanism
+        # TODO: Add Retry Mechanism
        # Build complete table content
        modal_chunk = f"""Table Analysis:
@ -466,17 +458,16 @@ class TableModalProcessor(BaseModalProcessor):
            Analysis: {enhanced_caption}"""
-        return await self._create_entity_and_chunk(modal_chunk, entity_info,
+        return await self._create_entity_and_chunk(modal_chunk, entity_info, file_path)
                                                   file_path)
    def _parse_table_response(
-            self,
+        self, response: str, entity_name: str = None
-            response: str,
+    ) -> Tuple[str, Dict[str, Any]]:
            entity_name: str = None) -> Tuple[str, Dict[str, Any]]:
        """Parse table analysis response"""
        try:
            response_data = json.loads(
-                re.search(r"\{.*\}", response, re.DOTALL).group(0))
+                re.search(r"\{.*\}", response, re.DOTALL).group(0)
            )
            description = response_data.get("detailed_description", "")
            entity_data = response_data.get("entity_info", {})
@ -484,11 +475,14 @@ class TableModalProcessor(BaseModalProcessor):
            if not description or not entity_data:
                raise ValueError("Missing required fields in response")
-            if not all(key in entity_data
+            if not all(
-                       for key in ["entity_name", "entity_type", "summary"]):
+                key in entity_data for key in ["entity_name", "entity_type", "summary"]
            ):
                raise ValueError("Missing required fields in entity_info")
-            entity_data["entity_name"] = entity_data["entity_name"] + f" ({entity_data['entity_type']})"
+            entity_data["entity_name"] = (
                entity_data["entity_name"] + f" ({entity_data['entity_type']})"
            )
            if entity_name:
                entity_data["entity_name"] = entity_name
@ -497,13 +491,11 @@ class TableModalProcessor(BaseModalProcessor):
        except (json.JSONDecodeError, AttributeError, ValueError) as e:
            logger.error(f"Error parsing table analysis response: {e}")
            fallback_entity = {
-                "entity_name":
+                "entity_name": entity_name
-                entity_name
+                if entity_name
-                if entity_name else f"table_{compute_mdhash_id(response)}",
+                else f"table_{compute_mdhash_id(response)}",
-                "entity_type":
+                "entity_type": "table",
-                "table",
+                "summary": response[:100] + "..." if len(response) > 100 else response,
                "summary":
                response[:100] + "..." if len(response) > 100 else response
            }
            return response, fallback_entity
@ -559,13 +551,13 @@ class EquationModalProcessor(BaseModalProcessor):
        response = await self.modal_caption_func(
            equation_prompt,
-            system_prompt=
+            system_prompt="You are an expert mathematician. Provide detailed mathematical analysis.",
            "You are an expert mathematician. Provide detailed mathematical analysis."
        )
        # Parse response
        enhanced_caption, entity_info = self._parse_equation_response(
-            response, entity_name)
+            response, entity_name
        )
        # Build complete equation content
        modal_chunk = f"""Mathematical Equation Analysis:
@ -574,17 +566,16 @@ class EquationModalProcessor(BaseModalProcessor):
            Mathematical Analysis: {enhanced_caption}"""
-        return await self._create_entity_and_chunk(modal_chunk, entity_info,
+        return await self._create_entity_and_chunk(modal_chunk, entity_info, file_path)
                                                   file_path)
    def _parse_equation_response(
-            self,
+        self, response: str, entity_name: str = None
-            response: str,
+    ) -> Tuple[str, Dict[str, Any]]:
            entity_name: str = None) -> Tuple[str, Dict[str, Any]]:
        """Parse equation analysis response"""
        try:
            response_data = json.loads(
-                re.search(r"\{.*\}", response, re.DOTALL).group(0))
+                re.search(r"\{.*\}", response, re.DOTALL).group(0)
            )
            description = response_data.get("detailed_description", "")
            entity_data = response_data.get("entity_info", {})
@ -592,11 +583,14 @@ class EquationModalProcessor(BaseModalProcessor):
            if not description or not entity_data:
                raise ValueError("Missing required fields in response")
-            if not all(key in entity_data
+            if not all(
-                       for key in ["entity_name", "entity_type", "summary"]):
+                key in entity_data for key in ["entity_name", "entity_type", "summary"]
            ):
                raise ValueError("Missing required fields in entity_info")
-            entity_data["entity_name"] = entity_data["entity_name"] + f" ({entity_data['entity_type']})"
+            entity_data["entity_name"] = (
                entity_data["entity_name"] + f" ({entity_data['entity_type']})"
            )
            if entity_name:
                entity_data["entity_name"] = entity_name
@ -605,13 +599,11 @@ class EquationModalProcessor(BaseModalProcessor):
        except (json.JSONDecodeError, AttributeError, ValueError) as e:
            logger.error(f"Error parsing equation analysis response: {e}")
            fallback_entity = {
-                "entity_name":
+                "entity_name": entity_name
-                entity_name
+                if entity_name
-                if entity_name else f"equation_{compute_mdhash_id(response)}",
+                else f"equation_{compute_mdhash_id(response)}",
-                "entity_type":
+                "entity_type": "equation",
-                "equation",
+                "summary": response[:100] + "..." if len(response) > 100 else response,
                "summary":
                response[:100] + "..." if len(response) > 100 else response
            }
            return response, fallback_entity
@ -651,13 +643,13 @@ class GenericModalProcessor(BaseModalProcessor):
        response = await self.modal_caption_func(
            generic_prompt,
-            system_prompt=
+            system_prompt=f"You are an expert content analyst specializing in {content_type} content.",
            f"You are an expert content analyst specializing in {content_type} content."
        )
        # Parse response
        enhanced_caption, entity_info = self._parse_generic_response(
-            response, entity_name, content_type)
+            response, entity_name, content_type
        )
        # Build complete content
        modal_chunk = f"""{content_type.title()} Content Analysis:
@ -665,18 +657,16 @@ class GenericModalProcessor(BaseModalProcessor):
            Analysis: {enhanced_caption}"""
-        return await self._create_entity_and_chunk(modal_chunk, entity_info,
+        return await self._create_entity_and_chunk(modal_chunk, entity_info, file_path)
                                                   file_path)
    def _parse_generic_response(
-            self,
+        self, response: str, entity_name: str = None, content_type: str = "content"
-            response: str,
+    ) -> Tuple[str, Dict[str, Any]]:
            entity_name: str = None,
            content_type: str = "content") -> Tuple[str, Dict[str, Any]]:
        """Parse generic analysis response"""
        try:
            response_data = json.loads(
-                re.search(r"\{.*\}", response, re.DOTALL).group(0))
+                re.search(r"\{.*\}", response, re.DOTALL).group(0)
            )
            description = response_data.get("detailed_description", "")
            entity_data = response_data.get("entity_info", {})
@ -684,11 +674,14 @@ class GenericModalProcessor(BaseModalProcessor):
            if not description or not entity_data:
                raise ValueError("Missing required fields in response")
-            if not all(key in entity_data
+            if not all(
-                       for key in ["entity_name", "entity_type", "summary"]):
+                key in entity_data for key in ["entity_name", "entity_type", "summary"]
            ):
                raise ValueError("Missing required fields in entity_info")
-            entity_data["entity_name"] = entity_data["entity_name"] + f" ({entity_data['entity_type']})"
+            entity_data["entity_name"] = (
                entity_data["entity_name"] + f" ({entity_data['entity_type']})"
            )
            if entity_name:
                entity_data["entity_name"] = entity_name
@ -697,12 +690,10 @@ class GenericModalProcessor(BaseModalProcessor):
        except (json.JSONDecodeError, AttributeError, ValueError) as e:
            logger.error(f"Error parsing generic analysis response: {e}")
            fallback_entity = {
-                "entity_name":
+                "entity_name": entity_name
-                entity_name if entity_name else
+                if entity_name
-                f"{content_type}_{compute_mdhash_id(response)}",
+                else f"{content_type}_{compute_mdhash_id(response)}",
-                "entity_type":
+                "entity_type": content_type,
-                content_type,
+                "summary": response[:100] + "..." if len(response) > 100 else response,
                "summary":
                response[:100] + "..." if len(response) > 100 else response
            }
            return response, fallback_entity
--- a/lightrag/raganything.py
+++ b/lightrag/raganything.py
@ -28,7 +28,7 @@ from lightrag.modalprocessors import (
    ImageModalProcessor,
    TableModalProcessor,
    EquationModalProcessor,
-    GenericModalProcessor
+    GenericModalProcessor,
 )
@ -43,7 +43,7 @@ class RAGAnything:
        embedding_func: Optional[Callable] = None,
        working_dir: str = "./rag_storage",
        embedding_dim: int = 3072,
-        max_token_size: int = 8192
+        max_token_size: int = 8192,
    ):
        """
        Initialize Multimodal Document Processing Pipeline
@ -83,26 +83,25 @@ class RAGAnything:
    def _initialize_processors(self):
        """Initialize multimodal processors with appropriate model functions"""
        if self.lightrag is None:
-            raise ValueError("LightRAG instance must be initialized before creating processors")
+            raise ValueError(
                "LightRAG instance must be initialized before creating processors"
            )
        # Create different multimodal processors
        self.modal_processors = {
            "image": ImageModalProcessor(
                lightrag=self.lightrag,
-                modal_caption_func=self.vision_model_func or self.llm_model_func
+                modal_caption_func=self.vision_model_func or self.llm_model_func,
            ),
            "table": TableModalProcessor(
-                lightrag=self.lightrag,
+                lightrag=self.lightrag, modal_caption_func=self.llm_model_func
                modal_caption_func=self.llm_model_func
            ),
            "equation": EquationModalProcessor(
-                lightrag=self.lightrag,
+                lightrag=self.lightrag, modal_caption_func=self.llm_model_func
                modal_caption_func=self.llm_model_func
            ),
            "generic": GenericModalProcessor(
-                lightrag=self.lightrag,
+                lightrag=self.lightrag, modal_caption_func=self.llm_model_func
-                modal_caption_func=self.llm_model_func
+            ),
            )
        }
        self.logger.info("Multimodal processors initialized")
@ -115,9 +114,13 @@ class RAGAnything:
        # Validate required functions
        if self.llm_model_func is None:
-            raise ValueError("llm_model_func must be provided when LightRAG is not pre-initialized")
+            raise ValueError(
                "llm_model_func must be provided when LightRAG is not pre-initialized"
            )
        if self.embedding_func is None:
-            raise ValueError("embedding_func must be provided when LightRAG is not pre-initialized")
+            raise ValueError(
                "embedding_func must be provided when LightRAG is not pre-initialized"
            )
        from lightrag.kg.shared_storage import initialize_pipeline_status
@ -145,7 +148,7 @@ class RAGAnything:
        file_path: str,
        output_dir: str = "./output",
        parse_method: str = "auto",
-        display_stats: bool = True
+        display_stats: bool = True,
    ) -> Tuple[List[Dict[str, Any]], str]:
        """
        Parse document using MinerU
@ -170,31 +173,29 @@ class RAGAnything:
        try:
            if ext in [".pdf"]:
-                self.logger.info(f"Detected PDF file, using PDF parser (OCR={parse_method == 'ocr'})...")
+                self.logger.info(
                    f"Detected PDF file, using PDF parser (OCR={parse_method == 'ocr'})..."
                )
                content_list, md_content = MineruParser.parse_pdf(
-                    file_path,
+                    file_path, output_dir, use_ocr=(parse_method == "ocr")
                    output_dir,
                    use_ocr=(parse_method == "ocr")
                )
            elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
                self.logger.info("Detected image file, using image parser...")
                content_list, md_content = MineruParser.parse_image(
-                    file_path,
+                    file_path, output_dir
                    output_dir
                )
            elif ext in [".doc", ".docx", ".ppt", ".pptx"]:
                self.logger.info("Detected Office document, using Office parser...")
                content_list, md_content = MineruParser.parse_office_doc(
-                    file_path,
+                    file_path, output_dir
                    output_dir
                )
            else:
                # For other or unknown formats, use generic parser
-                self.logger.info(f"Using generic parser for {ext} file (method={parse_method})...")
+                self.logger.info(
                    f"Using generic parser for {ext} file (method={parse_method})..."
                )
                content_list, md_content = MineruParser.parse_document(
-                    file_path,
+                    file_path, parse_method=parse_method, output_dir=output_dir
                    parse_method=parse_method,
                    output_dir=output_dir
                )
        except Exception as e:
@ -202,12 +203,12 @@ class RAGAnything:
            self.logger.warning("Falling back to generic parser...")
            # If specific parser fails, fall back to generic parser
            content_list, md_content = MineruParser.parse_document(
-                file_path,
+                file_path, parse_method=parse_method, output_dir=output_dir
                parse_method=parse_method,
                output_dir=output_dir
            )
-        self.logger.info(f"Parsing complete! Extracted {len(content_list)} content blocks")
+        self.logger.info(
            f"Parsing complete! Extracted {len(content_list)} content blocks"
        )
        self.logger.info(f"Markdown text length: {len(md_content)} characters")
        # Display content statistics if requested
@ -230,7 +231,9 @@ class RAGAnything:
        return content_list, md_content
-    def _separate_content(self, content_list: List[Dict[str, Any]]) -> Tuple[str, List[Dict[str, Any]]]:
+    def _separate_content(
        self, content_list: List[Dict[str, Any]]
    ) -> Tuple[str, List[Dict[str, Any]]]:
        """
        Separate text content and multimodal content
@ -258,7 +261,7 @@ class RAGAnything:
        # Merge all text content
        text_content = "\n\n".join(text_parts)
-        self.logger.info(f"Content separation complete:")
+        self.logger.info("Content separation complete:")
        self.logger.info(f"  - Text content length: {len(text_content)} characters")
        self.logger.info(f"  - Multimodal items count: {len(multimodal_items)}")
@ -301,12 +304,14 @@ class RAGAnything:
            file_paths=file_paths,
            split_by_character=split_by_character,
            split_by_character_only=split_by_character_only,
-            ids=ids
+            ids=ids,
        )
        self.logger.info("Text content insertion complete")
-    async def _process_multimodal_content(self, multimodal_items: List[Dict[str, Any]], file_path: str):
+    async def _process_multimodal_content(
        self, multimodal_items: List[Dict[str, Any]], file_path: str
    ):
        """
        Process multimodal content (using specialized processors)
@ -325,20 +330,29 @@ class RAGAnything:
        for i, item in enumerate(multimodal_items):
            try:
                content_type = item.get("type", "unknown")
-                self.logger.info(f"Processing item {i+1}/{len(multimodal_items)}: {content_type} content")
+                self.logger.info(
                    f"Processing item {i+1}/{len(multimodal_items)}: {content_type} content"
                )
                # Select appropriate processor
                processor = self._get_processor_for_type(content_type)
                if processor:
-                    enhanced_caption, entity_info = await processor.process_multimodal_content(
+                    (
                        enhanced_caption,
                        entity_info,
                    ) = await processor.process_multimodal_content(
                        modal_content=item,
                        content_type=content_type,
-                        file_path=file_name
+                        file_path=file_name,
                    )
                    self.logger.info(
                        f"{content_type} processing complete: {entity_info.get('entity_name', 'Unknown')}"
                    )
                    self.logger.info(f"{content_type} processing complete: {entity_info.get('entity_name', 'Unknown')}")
                else:
-                    self.logger.warning(f"No suitable processor found for {content_type} type content")
+                    self.logger.warning(
                        f"No suitable processor found for {content_type} type content"
                    )
            except Exception as e:
                self.logger.error(f"Error processing multimodal content: {str(e)}")
@ -376,7 +390,7 @@ class RAGAnything:
        display_stats: bool = True,
        split_by_character: str | None = None,
        split_by_character_only: bool = False,
-        doc_id: str | None = None
+        doc_id: str | None = None,
    ):
        """
        Complete document processing workflow
@ -397,10 +411,7 @@ class RAGAnything:
        # Step 1: Parse document using MinerU
        content_list, md_content = self.parse_document(
-            file_path, 
+            file_path, output_dir, parse_method, display_stats
            output_dir, 
            parse_method,
            display_stats
        )
        # Step 2: Separate text and multimodal content
@ -414,7 +425,7 @@ class RAGAnything:
                file_paths=file_name,
                split_by_character=split_by_character,
                split_by_character_only=split_by_character_only,
-                ids=doc_id
+                ids=doc_id,
            )
        # Step 4: Process multimodal content (using specialized processors)
@ -433,7 +444,7 @@ class RAGAnything:
        split_by_character_only: bool = False,
        file_extensions: Optional[List[str]] = None,
        recursive: bool = True,
-        max_workers: int = 1
+        max_workers: int = 1,
    ):
        """
        Process all files in a folder in batch
@ -454,12 +465,25 @@ class RAGAnything:
        folder_path = Path(folder_path)
        if not folder_path.exists() or not folder_path.is_dir():
-            raise ValueError(f"Folder does not exist or is not a valid directory: {folder_path}")
+            raise ValueError(
                f"Folder does not exist or is not a valid directory: {folder_path}"
            )
        # Supported file formats
        supported_extensions = {
-            ".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif",
+            ".pdf",
-            ".doc", ".docx", ".ppt", ".pptx", ".txt", ".md"
+            ".jpg",
            ".jpeg",
            ".png",
            ".bmp",
            ".tiff",
            ".tif",
            ".doc",
            ".docx",
            ".ppt",
            ".pptx",
            ".txt",
            ".md",
        }
        # Use specified extensions or all supported formats
@ -468,7 +492,9 @@ class RAGAnything:
            # Validate if all are supported formats
            unsupported = target_extensions - supported_extensions
            if unsupported:
-                self.logger.warning(f"The following file formats may not be fully supported: {unsupported}")
+                self.logger.warning(
                    f"The following file formats may not be fully supported: {unsupported}"
                )
        else:
            target_extensions = supported_extensions
@ -478,12 +504,18 @@ class RAGAnything:
        if recursive:
            # Recursively traverse all subfolders
            for file_path in folder_path.rglob("*"):
-                if file_path.is_file() and file_path.suffix.lower() in target_extensions:
+                if (
                    file_path.is_file()
                    and file_path.suffix.lower() in target_extensions
                ):
                    files_to_process.append(file_path)
        else:
            # Process only current folder
            for file_path in folder_path.glob("*"):
-                if file_path.is_file() and file_path.suffix.lower() in target_extensions:
+                if (
                    file_path.is_file()
                    and file_path.suffix.lower() in target_extensions
                ):
                    files_to_process.append(file_path)
        if not files_to_process:
@ -491,7 +523,7 @@ class RAGAnything:
            return
        self.logger.info(f"Found {len(files_to_process)} files to process")
-        self.logger.info(f"File type distribution:")
+        self.logger.info("File type distribution:")
        # Count file types
        file_type_count = {}
@ -514,7 +546,9 @@ class RAGAnything:
            async with semaphore:
                nonlocal processed_count
                try:
-                    self.logger.info(f"[{index}/{len(files_to_process)}] Processing: {file_path}")
+                    self.logger.info(
                        f"[{index}/{len(files_to_process)}] Processing: {file_path}"
                    )
                    # Create separate output directory for each file
                    file_output_dir = Path(output_dir) / file_path.stem
@ -527,14 +561,18 @@ class RAGAnything:
                        parse_method=parse_method,
                        display_stats=display_stats,
                        split_by_character=split_by_character,
-                        split_by_character_only=split_by_character_only
+                        split_by_character_only=split_by_character_only,
                    )
                    processed_count += 1
-                    self.logger.info(f"[{index}/{len(files_to_process)}] Successfully processed: {file_path}")
+                    self.logger.info(
                        f"[{index}/{len(files_to_process)}] Successfully processed: {file_path}"
                    )
                except Exception as e:
-                    self.logger.error(f"[{index}/{len(files_to_process)}] Failed to process: {file_path}")
+                    self.logger.error(
                        f"[{index}/{len(files_to_process)}] Failed to process: {file_path}"
                    )
                    self.logger.error(f"Error: {str(e)}")
                    failed_files.append((file_path, str(e)))
@ -562,14 +600,10 @@ class RAGAnything:
            "total": len(files_to_process),
            "success": processed_count,
            "failed": len(failed_files),
-            "failed_files": failed_files
+            "failed_files": failed_files,
        }
-    async def query_with_multimodal(
+    async def query_with_multimodal(self, query: str, mode: str = "hybrid") -> str:
        self, 
        query: str, 
        mode: str = "hybrid"
    ) -> str:
        """
        Query with multimodal content support
@ -589,10 +623,7 @@ class RAGAnything:
                "to create and populate the LightRAG instance."
            )
-        result = await self.lightrag.aquery(
+        result = await self.lightrag.aquery(query, param=QueryParam(mode=mode))
            query,
            param=QueryParam(mode=mode)
        )
        return result
@ -605,16 +636,22 @@ class RAGAnything:
            "status": "Initialized",
            "processors": {},
            "models": {
-                "llm_model": "External function" if self.llm_model_func else "Not provided",
+                "llm_model": "External function"
-                "vision_model": "External function" if self.vision_model_func else "Not provided",
+                if self.llm_model_func
-                "embedding_model": "External function" if self.embedding_func else "Not provided"
+                else "Not provided",
-            }
+                "vision_model": "External function"
                if self.vision_model_func
                else "Not provided",
                "embedding_model": "External function"
                if self.embedding_func
                else "Not provided",
            },
        }
        for proc_type, processor in self.modal_processors.items():
            info["processors"][proc_type] = {
                "class": processor.__class__.__name__,
-                "supports": self._get_processor_supports(proc_type)
+                "supports": self._get_processor_supports(proc_type),
            }
        return info
@ -622,11 +659,28 @@ class RAGAnything:
    def _get_processor_supports(self, proc_type: str) -> List[str]:
        """Get processor supported features"""
        supports_map = {
-            "image": ["Image content analysis", "Visual understanding", "Image description generation", "Image entity extraction"],
+            "image": [
-            "table": ["Table structure analysis", "Data statistics", "Trend identification", "Table entity extraction"],
+                "Image content analysis",
-            "equation": ["Mathematical formula parsing", "Variable identification", "Formula meaning explanation", "Formula entity extraction"],
+                "Visual understanding",
-            "generic": ["General content analysis", "Structured processing", "Entity extraction"]
+                "Image description generation",
                "Image entity extraction",
            ],
            "table": [
                "Table structure analysis",
                "Data statistics",
                "Trend identification",
                "Table entity extraction",
            ],
            "equation": [
                "Mathematical formula parsing",
                "Variable identification",
                "Formula meaning explanation",
                "Formula entity extraction",
            ],
            "generic": [
                "General content analysis",
                "Structured processing",
                "Entity extraction",
            ],
        }
        return supports_map.get(proc_type, ["Basic processing"])