Improve docling integration with macOS compatibility and CLI flag
- Add --docling CLI flag for easier setup
- Add numpy version constraints
- Exclude docling on macOS (fork-safety)
(cherry picked from commit a24d8181c2)
This commit is contained in:
parent
033ee5c0f5
commit
95d47566c1
4 changed files with 2713 additions and 745 deletions
|
|
@ -265,6 +265,14 @@ def parse_args() -> argparse.Namespace:
|
||||||
help=f"Rerank binding type (default: from env or {DEFAULT_RERANK_BINDING})",
|
help=f"Rerank binding type (default: from env or {DEFAULT_RERANK_BINDING})",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Document loading engine configuration
|
||||||
|
parser.add_argument(
|
||||||
|
"--docling",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Enable DOCLING document loading engine (default: from env or DEFAULT)",
|
||||||
|
)
|
||||||
|
|
||||||
# Conditionally add binding options defined in binding_options module
|
# Conditionally add binding options defined in binding_options module
|
||||||
# This will add command line arguments for all binding options (e.g., --ollama-embedding-num_ctx)
|
# This will add command line arguments for all binding options (e.g., --ollama-embedding-num_ctx)
|
||||||
# and corresponding environment variables (e.g., OLLAMA_EMBEDDING_NUM_CTX)
|
# and corresponding environment variables (e.g., OLLAMA_EMBEDDING_NUM_CTX)
|
||||||
|
|
@ -364,8 +372,13 @@ def parse_args() -> argparse.Namespace:
|
||||||
)
|
)
|
||||||
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
|
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
|
||||||
|
|
||||||
# Select Document loading tool (DOCLING, DEFAULT)
|
# Set document_loading_engine from --docling flag
|
||||||
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
if args.docling:
|
||||||
|
args.document_loading_engine = "DOCLING"
|
||||||
|
else:
|
||||||
|
args.document_loading_engine = get_env_value(
|
||||||
|
"DOCUMENT_LOADING_ENGINE", "DEFAULT"
|
||||||
|
)
|
||||||
|
|
||||||
# Add environment variables that were previously read directly
|
# Add environment variables that were previously read directly
|
||||||
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
|
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -24,10 +24,12 @@ dependencies = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"configparser",
|
"configparser",
|
||||||
"future",
|
"future",
|
||||||
|
"google-api-core>=2.0.0,<3.0.0",
|
||||||
|
"google-genai>=1.0.0,<2.0.0",
|
||||||
"json_repair",
|
"json_repair",
|
||||||
"nano-vectordb",
|
"nano-vectordb",
|
||||||
"networkx",
|
"networkx",
|
||||||
"numpy",
|
"numpy>=1.24.0,<2.0.0",
|
||||||
"pandas>=2.0.0,<2.4.0",
|
"pandas>=2.0.0,<2.4.0",
|
||||||
"pipmaster",
|
"pipmaster",
|
||||||
"pydantic",
|
"pydantic",
|
||||||
|
|
@ -48,7 +50,7 @@ api = [
|
||||||
"json_repair",
|
"json_repair",
|
||||||
"nano-vectordb",
|
"nano-vectordb",
|
||||||
"networkx",
|
"networkx",
|
||||||
"numpy",
|
"numpy>=1.24.0,<2.0.0",
|
||||||
"openai>=1.0.0,<3.0.0",
|
"openai>=1.0.0,<3.0.0",
|
||||||
"pandas>=2.0.0,<2.4.0",
|
"pandas>=2.0.0,<2.4.0",
|
||||||
"pipmaster",
|
"pipmaster",
|
||||||
|
|
@ -59,6 +61,8 @@ api = [
|
||||||
"tenacity",
|
"tenacity",
|
||||||
"tiktoken",
|
"tiktoken",
|
||||||
"xlsxwriter>=3.1.0",
|
"xlsxwriter>=3.1.0",
|
||||||
|
"google-api-core>=2.0.0,<3.0.0",
|
||||||
|
"google-genai>=1.0.0,<2.0.0",
|
||||||
# API-specific dependencies
|
# API-specific dependencies
|
||||||
"aiofiles",
|
"aiofiles",
|
||||||
"ascii_colors",
|
"ascii_colors",
|
||||||
|
|
@ -75,18 +79,23 @@ api = [
|
||||||
"python-multipart",
|
"python-multipart",
|
||||||
"pytz",
|
"pytz",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
|
"gunicorn",
|
||||||
|
# Document processing dependencies (required for API document upload functionality)
|
||||||
|
"openpyxl>=3.0.0,<4.0.0", # XLSX processing
|
||||||
|
"pycryptodome>=3.0.0,<4.0.0", # PDF encryption support
|
||||||
|
"pypdf>=6.1.0", # PDF processing
|
||||||
|
"python-docx>=0.8.11,<2.0.0", # DOCX processing
|
||||||
|
"python-pptx>=0.6.21,<2.0.0", # PPTX processing
|
||||||
|
]
|
||||||
|
|
||||||
|
# Advanced document processing engine (optional)
|
||||||
|
docling = [
|
||||||
|
# On macOS, pytorch and frameworks use Objective-C are not fork-safe,
|
||||||
|
# and not compatible to gunicorn multi-worker mode
|
||||||
|
"docling>=2.0.0,<3.0.0; sys_platform != 'darwin'",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Offline deployment dependencies (layered design for flexibility)
|
# Offline deployment dependencies (layered design for flexibility)
|
||||||
offline-docs = [
|
|
||||||
# Document processing dependencies
|
|
||||||
"openpyxl>=3.0.0,<4.0.0",
|
|
||||||
"pycryptodome>=3.0.0,<4.0.0",
|
|
||||||
"pypdf2>=3.0.0",
|
|
||||||
"python-docx>=0.8.11,<2.0.0",
|
|
||||||
"python-pptx>=0.6.21,<2.0.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
offline-storage = [
|
offline-storage = [
|
||||||
# Storage backend dependencies
|
# Storage backend dependencies
|
||||||
"redis>=5.0.0,<8.0.0",
|
"redis>=5.0.0,<8.0.0",
|
||||||
|
|
@ -94,7 +103,7 @@ offline-storage = [
|
||||||
"pymilvus>=2.6.2,<3.0.0",
|
"pymilvus>=2.6.2,<3.0.0",
|
||||||
"pymongo>=4.0.0,<5.0.0",
|
"pymongo>=4.0.0,<5.0.0",
|
||||||
"asyncpg>=0.29.0,<1.0.0",
|
"asyncpg>=0.29.0,<1.0.0",
|
||||||
"qdrant-client>=1.7.0,<2.0.0",
|
"qdrant-client>=1.11.0,<2.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
offline-llm = [
|
offline-llm = [
|
||||||
|
|
@ -106,11 +115,22 @@ offline-llm = [
|
||||||
"aioboto3>=12.0.0,<16.0.0",
|
"aioboto3>=12.0.0,<16.0.0",
|
||||||
"voyageai>=0.2.0,<1.0.0",
|
"voyageai>=0.2.0,<1.0.0",
|
||||||
"llama-index>=0.9.0,<1.0.0",
|
"llama-index>=0.9.0,<1.0.0",
|
||||||
|
"google-api-core>=2.0.0,<3.0.0",
|
||||||
|
"google-genai>=1.0.0,<2.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
offline = [
|
offline = [
|
||||||
# Complete offline package (includes all offline dependencies)
|
# Complete offline package (includes api for document processing, plus storage and LLM)
|
||||||
"lightrag-hku[offline-docs,offline-storage,offline-llm]",
|
"lightrag-hku[api,offline-storage,offline-llm]",
|
||||||
|
]
|
||||||
|
|
||||||
|
evaluation = [
|
||||||
|
# RAG evaluation dependencies (RAGAS framework)
|
||||||
|
"ragas>=0.3.7",
|
||||||
|
"datasets>=4.3.0",
|
||||||
|
"httpx>=0.28.1",
|
||||||
|
"pytest>=8.4.2",
|
||||||
|
"pytest-asyncio>=1.2.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
observability = [
|
observability = [
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue