Improve docling integration with macOS compatibility and CLI flag
- Add --docling CLI flag for easier setup
- Add numpy version constraints
- Exclude docling on macOS (fork-safety)
(cherry picked from commit c246eff725)
This commit is contained in:
parent
95d47566c1
commit
9cf7476dd4
1 changed files with 125 additions and 41 deletions
|
|
@ -8,6 +8,8 @@ import logging
|
|||
from dotenv import load_dotenv
|
||||
from lightrag.utils import get_env_value
|
||||
from lightrag.llm.binding_options import (
|
||||
GeminiEmbeddingOptions,
|
||||
GeminiLLMOptions,
|
||||
OllamaEmbeddingOptions,
|
||||
OllamaLLMOptions,
|
||||
OpenAILLMOptions,
|
||||
|
|
@ -41,14 +43,6 @@ from lightrag.constants import (
|
|||
DEFAULT_ENTITY_TYPES,
|
||||
)
|
||||
|
||||
# Multi-tenant security configuration
|
||||
# When True, enforces tenant context on all data endpoints (SEC-001 fix)
|
||||
MULTI_TENANT_STRICT_MODE = get_env_value("LIGHTRAG_MULTI_TENANT_STRICT", True, bool)
|
||||
# When True, requires user authentication for tenant access (SEC-003 fix)
|
||||
REQUIRE_USER_AUTH = get_env_value("LIGHTRAG_REQUIRE_USER_AUTH", True, bool)
|
||||
# Comma-separated list of super-admin usernames (SEC-002 fix)
|
||||
SUPER_ADMIN_USERS = get_env_value("LIGHTRAG_SUPER_ADMIN_USERS", "admin")
|
||||
|
||||
# use the .env that is inside the current folder
|
||||
# allows to use different .env file for each lightrag instance
|
||||
# the OS environment variables take precedence over the .env file
|
||||
|
|
@ -71,23 +65,15 @@ def get_default_host(binding_type: str) -> str:
|
|||
"lollms": os.getenv("LLM_BINDING_HOST", "http://localhost:9600"),
|
||||
"azure_openai": os.getenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com/v1"),
|
||||
"openai": os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
|
||||
"gemini": os.getenv(
|
||||
"LLM_BINDING_HOST", "https://generativelanguage.googleapis.com"
|
||||
),
|
||||
}
|
||||
return default_hosts.get(
|
||||
binding_type, os.getenv("LLM_BINDING_HOST", "http://localhost:11434")
|
||||
) # fallback to ollama if unknown
|
||||
|
||||
|
||||
def _is_running_under_test() -> bool:
|
||||
"""Check if we're running under pytest or other test framework."""
|
||||
return (
|
||||
"pytest" in sys.modules or
|
||||
"py.test" in sys.modules or
|
||||
"_pytest" in sys.modules or
|
||||
os.getenv("PYTEST_CURRENT_TEST") is not None or
|
||||
any("pytest" in arg for arg in sys.argv)
|
||||
)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""
|
||||
Parse command line arguments with environment variable fallback
|
||||
|
|
@ -98,8 +84,6 @@ def parse_args() -> argparse.Namespace:
|
|||
Returns:
|
||||
argparse.Namespace: Parsed arguments
|
||||
"""
|
||||
# When running under pytest, use parse_known_args to avoid conflicts
|
||||
use_known_args = _is_running_under_test()
|
||||
|
||||
parser = argparse.ArgumentParser(description="LightRAG API Server")
|
||||
|
||||
|
|
@ -239,7 +223,7 @@ def parse_args() -> argparse.Namespace:
|
|||
parser.add_argument(
|
||||
"--llm-binding",
|
||||
type=str,
|
||||
default=get_env_value("LLM_BINDING", "openai"),
|
||||
default=get_env_value("LLM_BINDING", "ollama"),
|
||||
choices=[
|
||||
"lollms",
|
||||
"ollama",
|
||||
|
|
@ -247,15 +231,24 @@ def parse_args() -> argparse.Namespace:
|
|||
"openai-ollama",
|
||||
"azure_openai",
|
||||
"aws_bedrock",
|
||||
"gemini",
|
||||
],
|
||||
help="LLM binding type (default: from env or openai)",
|
||||
help="LLM binding type (default: from env or ollama)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding-binding",
|
||||
type=str,
|
||||
default=get_env_value("EMBEDDING_BINDING", "openai"),
|
||||
choices=["lollms", "ollama", "openai", "azure_openai", "aws_bedrock", "jina"],
|
||||
help="Embedding binding type (default: from env or openai)",
|
||||
default=get_env_value("EMBEDDING_BINDING", "ollama"),
|
||||
choices=[
|
||||
"lollms",
|
||||
"ollama",
|
||||
"openai",
|
||||
"azure_openai",
|
||||
"aws_bedrock",
|
||||
"jina",
|
||||
"gemini",
|
||||
],
|
||||
help="Embedding binding type (default: from env or ollama)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rerank-binding",
|
||||
|
|
@ -289,12 +282,19 @@ def parse_args() -> argparse.Namespace:
|
|||
if "--embedding-binding" in sys.argv:
|
||||
try:
|
||||
idx = sys.argv.index("--embedding-binding")
|
||||
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
if idx + 1 < len(sys.argv):
|
||||
if sys.argv[idx + 1] == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
elif sys.argv[idx + 1] == "gemini":
|
||||
GeminiEmbeddingOptions.add_args(parser)
|
||||
except IndexError:
|
||||
pass
|
||||
elif os.environ.get("EMBEDDING_BINDING") == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
else:
|
||||
env_embedding_binding = os.environ.get("EMBEDDING_BINDING")
|
||||
if env_embedding_binding == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
elif env_embedding_binding == "gemini":
|
||||
GeminiEmbeddingOptions.add_args(parser)
|
||||
|
||||
# Add OpenAI LLM options when llm-binding is openai or azure_openai
|
||||
if "--llm-binding" in sys.argv:
|
||||
|
|
@ -310,11 +310,17 @@ def parse_args() -> argparse.Namespace:
|
|||
elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]:
|
||||
OpenAILLMOptions.add_args(parser)
|
||||
|
||||
# Use parse_known_args under test frameworks to avoid argument conflicts
|
||||
if use_known_args:
|
||||
args, _ = parser.parse_known_args()
|
||||
else:
|
||||
args = parser.parse_args()
|
||||
if "--llm-binding" in sys.argv:
|
||||
try:
|
||||
idx = sys.argv.index("--llm-binding")
|
||||
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == "gemini":
|
||||
GeminiLLMOptions.add_args(parser)
|
||||
except IndexError:
|
||||
pass
|
||||
elif os.environ.get("LLM_BINDING") == "gemini":
|
||||
GeminiLLMOptions.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# convert relative path to absolute path
|
||||
args.working_dir = os.path.abspath(args.working_dir)
|
||||
|
|
@ -361,6 +367,7 @@ def parse_args() -> argparse.Namespace:
|
|||
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
||||
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
||||
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
||||
args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool)
|
||||
|
||||
# Inject chunk configuration
|
||||
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
||||
|
|
@ -380,6 +387,9 @@ def parse_args() -> argparse.Namespace:
|
|||
"DOCUMENT_LOADING_ENGINE", "DEFAULT"
|
||||
)
|
||||
|
||||
# PDF decryption password
|
||||
args.pdf_decrypt_password = get_env_value("PDF_DECRYPT_PASSWORD", None)
|
||||
|
||||
# Add environment variables that were previously read directly
|
||||
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
|
||||
args.summary_language = get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE)
|
||||
|
|
@ -388,11 +398,6 @@ def parse_args() -> argparse.Namespace:
|
|||
|
||||
# For JWT Auth
|
||||
args.auth_accounts = get_env_value("AUTH_ACCOUNTS", "")
|
||||
if not args.auth_accounts:
|
||||
auth_user = get_env_value("AUTH_USER", "")
|
||||
auth_pass = get_env_value("AUTH_PASS", "")
|
||||
if auth_user and auth_pass:
|
||||
args.auth_accounts = f"{auth_user}:{auth_pass}"
|
||||
args.token_secret = get_env_value("TOKEN_SECRET", "lightrag-jwt-default-secret")
|
||||
args.token_expire_hours = get_env_value("TOKEN_EXPIRE_HOURS", 48, int)
|
||||
args.guest_token_expire_hours = get_env_value("GUEST_TOKEN_EXPIRE_HOURS", 24, int)
|
||||
|
|
@ -457,4 +462,83 @@ def update_uvicorn_mode_config():
|
|||
)
|
||||
|
||||
|
||||
global_args = parse_args()
|
||||
# Global configuration with lazy initialization
|
||||
_global_args = None
|
||||
_initialized = False
|
||||
|
||||
|
||||
def initialize_config(args=None, force=False):
|
||||
"""Initialize global configuration
|
||||
|
||||
This function allows explicit initialization of the configuration,
|
||||
which is useful for programmatic usage, testing, or embedding LightRAG
|
||||
in other applications.
|
||||
|
||||
Args:
|
||||
args: Pre-parsed argparse.Namespace or None to parse from sys.argv
|
||||
force: Force re-initialization even if already initialized
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: The configured arguments
|
||||
|
||||
Example:
|
||||
# Use parsed command line arguments (default)
|
||||
initialize_config()
|
||||
|
||||
# Use custom configuration programmatically
|
||||
custom_args = argparse.Namespace(
|
||||
host='localhost',
|
||||
port=8080,
|
||||
working_dir='./custom_rag',
|
||||
# ... other config
|
||||
)
|
||||
initialize_config(custom_args)
|
||||
"""
|
||||
global _global_args, _initialized
|
||||
|
||||
if _initialized and not force:
|
||||
return _global_args
|
||||
|
||||
_global_args = args if args is not None else parse_args()
|
||||
_initialized = True
|
||||
return _global_args
|
||||
|
||||
|
||||
def get_config():
|
||||
"""Get global configuration, auto-initializing if needed
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: The configured arguments
|
||||
"""
|
||||
if not _initialized:
|
||||
initialize_config()
|
||||
return _global_args
|
||||
|
||||
|
||||
class _GlobalArgsProxy:
|
||||
"""Proxy object that auto-initializes configuration on first access
|
||||
|
||||
This maintains backward compatibility with existing code while
|
||||
allowing programmatic control over initialization timing.
|
||||
"""
|
||||
|
||||
def __getattr__(self, name):
|
||||
if not _initialized:
|
||||
initialize_config()
|
||||
return getattr(_global_args, name)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if not _initialized:
|
||||
initialize_config()
|
||||
setattr(_global_args, name, value)
|
||||
|
||||
def __repr__(self):
|
||||
if not _initialized:
|
||||
return "<GlobalArgsProxy: Not initialized>"
|
||||
return repr(_global_args)
|
||||
|
||||
|
||||
# Create proxy instance for backward compatibility
|
||||
# Existing code like `from config import global_args` continues to work
|
||||
# The proxy will auto-initialize on first attribute access
|
||||
global_args = _GlobalArgsProxy()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue