diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 4f59d3c1..4d8ab1e1 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -365,8 +365,12 @@ def parse_args() -> argparse.Namespace: # Inject model configuration args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest") - args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest") - args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int) + # EMBEDDING_MODEL defaults to None - each binding will use its own default model + # e.g., OpenAI uses "text-embedding-3-small", Jina uses "jina-embeddings-v4" + args.embedding_model = get_env_value("EMBEDDING_MODEL", None, special_none=True) + # EMBEDDING_DIM defaults to None - each binding will use its own default dimension + # Value is inherited from provider defaults via wrap_embedding_func_with_attrs decorator + args.embedding_dim = get_env_value("EMBEDDING_DIM", None, int, special_none=True) args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool) # Inject chunk configuration diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index fc3bb67e..930358a7 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -56,7 +56,8 @@ from lightrag.api.routers.ollama_api import OllamaAPI from lightrag.utils import logger, set_verbose_debug from lightrag.kg.shared_storage import ( get_namespace_data, - initialize_pipeline_status, + get_default_workspace, + # set_default_workspace, cleanup_keyed_lock, finalize_share_data, ) @@ -158,19 +159,22 @@ def check_frontend_build(): """Check if frontend is built and optionally check if source is up-to-date Returns: - bool: True if frontend is outdated, False if up-to-date or production environment + tuple: (assets_exist: bool, is_outdated: bool) + - assets_exist: True if WebUI build files exist + - is_outdated: True if source is newer than build (only in dev environment) """ webui_dir = Path(__file__).parent / "webui" index_html = webui_dir / "index.html" - # 1. Check if build files exist (required) + # 1. Check if build files exist if not index_html.exists(): - ASCIIColors.red("\n" + "=" * 80) - ASCIIColors.red("ERROR: Frontend Not Built") - ASCIIColors.red("=" * 80) + ASCIIColors.yellow("\n" + "=" * 80) + ASCIIColors.yellow("WARNING: Frontend Not Built") + ASCIIColors.yellow("=" * 80) ASCIIColors.yellow("The WebUI frontend has not been built yet.") + ASCIIColors.yellow("The API server will start without the WebUI interface.") ASCIIColors.yellow( - "Please build the frontend code first using the following commands:\n" + "\nTo enable WebUI, build the frontend using these commands:\n" ) ASCIIColors.cyan(" cd lightrag_webui") ASCIIColors.cyan(" bun install --frozen-lockfile") @@ -180,8 +184,8 @@ def check_frontend_build(): ASCIIColors.cyan( "Note: Make sure you have Bun installed. Visit https://bun.sh for installation." ) - ASCIIColors.red("=" * 80 + "\n") - sys.exit(1) # Exit immediately + ASCIIColors.yellow("=" * 80 + "\n") + return (False, False) # Assets don't exist, not outdated # 2. Check if this is a development environment (source directory exists) try: @@ -194,7 +198,7 @@ def check_frontend_build(): logger.debug( "Production environment detected, skipping source freshness check" ) - return False + return (True, False) # Assets exist, not outdated (prod environment) # Development environment, perform source code timestamp check logger.debug("Development environment detected, checking source freshness") @@ -269,20 +273,20 @@ def check_frontend_build(): ASCIIColors.cyan(" cd ..") ASCIIColors.yellow("\nThe server will continue with the current build.") ASCIIColors.yellow("=" * 80 + "\n") - return True # Frontend is outdated + return (True, True) # Assets exist, outdated else: logger.info("Frontend build is up-to-date") - return False # Frontend is up-to-date + return (True, False) # Assets exist, up-to-date except Exception as e: # If check fails, log warning but don't affect startup logger.warning(f"Failed to check frontend source freshness: {e}") - return False # Assume up-to-date on error + return (True, False) # Assume assets exist and up-to-date on error def create_app(args): - # Check frontend build first and get outdated status - is_frontend_outdated = check_frontend_build() + # Check frontend build first and get status + webui_assets_exist, is_frontend_outdated = check_frontend_build() # Create unified API version display with warning symbol if frontend is outdated api_version_display = ( @@ -350,8 +354,8 @@ def create_app(args): try: # Initialize database connections + # Note: initialize_storages() now auto-initializes pipeline_status for rag.workspace await rag.initialize_storages() - await initialize_pipeline_status() # Data migration regardless of storage implementation await rag.check_and_migrate_data() @@ -452,7 +456,7 @@ def create_app(args): # Create combined auth dependency for all endpoints combined_auth = get_combined_auth_dependency(api_key) - def get_workspace_from_request(request: Request) -> str: + def get_workspace_from_request(request: Request) -> str | None: """ Extract workspace from HTTP request header or use default. @@ -469,9 +473,8 @@ def create_app(args): # Check custom header first workspace = request.headers.get("LIGHTRAG-WORKSPACE", "").strip() - # Fall back to server default if header not provided if not workspace: - workspace = args.workspace + workspace = None return workspace @@ -710,6 +713,7 @@ def create_app(args): ) # Step 3: Create optimized embedding function (calls underlying function directly) + # Note: When model is None, each binding will use its own default model async def optimized_embedding_function(texts, embedding_dim=None): try: if binding == "lollms": @@ -721,9 +725,9 @@ def create_app(args): if isinstance(lollms_embed, EmbeddingFunc) else lollms_embed ) - return await actual_func( - texts, embed_model=model, host=host, api_key=api_key - ) + # lollms embed_model is not used (server uses configured vectorizer) + # Only pass base_url and api_key + return await actual_func(texts, base_url=host, api_key=api_key) elif binding == "ollama": from lightrag.llm.ollama import ollama_embed @@ -742,13 +746,16 @@ def create_app(args): ollama_options = OllamaEmbeddingOptions.options_dict(args) - return await actual_func( - texts, - embed_model=model, - host=host, - api_key=api_key, - options=ollama_options, - ) + # Pass embed_model only if provided, let function use its default (bge-m3:latest) + kwargs = { + "texts": texts, + "host": host, + "api_key": api_key, + "options": ollama_options, + } + if model: + kwargs["embed_model"] = model + return await actual_func(**kwargs) elif binding == "azure_openai": from lightrag.llm.azure_openai import azure_openai_embed @@ -757,7 +764,11 @@ def create_app(args): if isinstance(azure_openai_embed, EmbeddingFunc) else azure_openai_embed ) - return await actual_func(texts, model=model, api_key=api_key) + # Pass model only if provided, let function use its default otherwise + kwargs = {"texts": texts, "api_key": api_key} + if model: + kwargs["model"] = model + return await actual_func(**kwargs) elif binding == "aws_bedrock": from lightrag.llm.bedrock import bedrock_embed @@ -766,7 +777,11 @@ def create_app(args): if isinstance(bedrock_embed, EmbeddingFunc) else bedrock_embed ) - return await actual_func(texts, model=model) + # Pass model only if provided, let function use its default otherwise + kwargs = {"texts": texts} + if model: + kwargs["model"] = model + return await actual_func(**kwargs) elif binding == "jina": from lightrag.llm.jina import jina_embed @@ -775,12 +790,16 @@ def create_app(args): if isinstance(jina_embed, EmbeddingFunc) else jina_embed ) - return await actual_func( - texts, - embedding_dim=embedding_dim, - base_url=host, - api_key=api_key, - ) + # Pass model only if provided, let function use its default (jina-embeddings-v4) + kwargs = { + "texts": texts, + "embedding_dim": embedding_dim, + "base_url": host, + "api_key": api_key, + } + if model: + kwargs["model"] = model + return await actual_func(**kwargs) elif binding == "gemini": from lightrag.llm.gemini import gemini_embed @@ -798,14 +817,19 @@ def create_app(args): gemini_options = GeminiEmbeddingOptions.options_dict(args) - return await actual_func( - texts, - model=model, - base_url=host, - api_key=api_key, - embedding_dim=embedding_dim, - task_type=gemini_options.get("task_type", "RETRIEVAL_DOCUMENT"), - ) + # Pass model only if provided, let function use its default (gemini-embedding-001) + kwargs = { + "texts": texts, + "base_url": host, + "api_key": api_key, + "embedding_dim": embedding_dim, + "task_type": gemini_options.get( + "task_type", "RETRIEVAL_DOCUMENT" + ), + } + if model: + kwargs["model"] = model + return await actual_func(**kwargs) else: # openai and compatible from lightrag.llm.openai import openai_embed @@ -814,13 +838,16 @@ def create_app(args): if isinstance(openai_embed, EmbeddingFunc) else openai_embed ) - return await actual_func( - texts, - model=model, - base_url=host, - api_key=api_key, - embedding_dim=embedding_dim, - ) + # Pass model only if provided, let function use its default (text-embedding-3-small) + kwargs = { + "texts": texts, + "base_url": host, + "api_key": api_key, + "embedding_dim": embedding_dim, + } + if model: + kwargs["model"] = model + return await actual_func(**kwargs) except ImportError as e: raise Exception(f"Failed to import {binding} embedding: {e}") @@ -929,11 +956,6 @@ def create_app(args): else: logger.info("Embedding max_token_size: not set (90% token warning disabled)") - # Set max_token_size if EMBEDDING_TOKEN_LIMIT is provided - if args.embedding_token_limit is not None: - embedding_func.max_token_size = args.embedding_token_limit - logger.info(f"Set embedding max_token_size to {args.embedding_token_limit}") - # Configure rerank function based on args.rerank_bindingparameter rerank_model_func = None if args.rerank_binding != "null": @@ -1072,8 +1094,11 @@ def create_app(args): @app.get("/") async def redirect_to_webui(): - """Redirect root path to /webui""" - return RedirectResponse(url="/webui") + """Redirect root path based on WebUI availability""" + if webui_assets_exist: + return RedirectResponse(url="/webui") + else: + return RedirectResponse(url="/docs") @app.get("/auth-status") async def get_auth_status(): @@ -1140,18 +1165,49 @@ def create_app(args): "webui_description": webui_description, } - @app.get("/health", dependencies=[Depends(combined_auth)]) + @app.get( + "/health", + dependencies=[Depends(combined_auth)], + summary="Get system health and configuration status", + description="Returns comprehensive system status including WebUI availability, configuration, and operational metrics", + response_description="System health status with configuration details", + responses={ + 200: { + "description": "Successful response with system status", + "content": { + "application/json": { + "example": { + "status": "healthy", + "webui_available": True, + "working_directory": "/path/to/working/dir", + "input_directory": "/path/to/input/dir", + "configuration": { + "llm_binding": "openai", + "llm_model": "gpt-4", + "embedding_binding": "openai", + "embedding_model": "text-embedding-ada-002", + "workspace": "default", + }, + "auth_mode": "enabled", + "pipeline_busy": False, + "core_version": "0.0.1", + "api_version": "0.0.1", + } + } + }, + } + }, + ) async def get_status(request: Request): - """Get current system status""" + """Get current system status including WebUI availability""" try: - # Extract workspace from request header or use default workspace = get_workspace_from_request(request) - - # Construct namespace (following GraphDB pattern) - namespace = f"{workspace}:pipeline" if workspace else "pipeline_status" - - # Get workspace-specific pipeline status - pipeline_status = await get_namespace_data(namespace) + default_workspace = get_default_workspace() + if workspace is None: + workspace = default_workspace + pipeline_status = await get_namespace_data( + "pipeline_status", workspace=workspace + ) if not auth_configured: auth_mode = "disabled" @@ -1163,6 +1219,7 @@ def create_app(args): return { "status": "healthy", + "webui_available": webui_assets_exist, "working_directory": str(args.working_dir), "input_directory": str(args.input_dir), "configuration": { @@ -1182,8 +1239,7 @@ def create_app(args): "vector_storage": args.vector_storage, "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract, "enable_llm_cache": args.enable_llm_cache, - "workspace": workspace, - "default_workspace": args.workspace, + "workspace": default_workspace, "max_graph_nodes": args.max_graph_nodes, # Rerank configuration "enable_rerank": rerank_model_func is not None, @@ -1253,16 +1309,27 @@ def create_app(args): name="swagger-ui-static", ) - # Webui mount webui/index.html - static_dir = Path(__file__).parent / "webui" - static_dir.mkdir(exist_ok=True) - app.mount( - "/webui", - SmartStaticFiles( - directory=static_dir, html=True, check_dir=True - ), # Use SmartStaticFiles - name="webui", - ) + # Conditionally mount WebUI only if assets exist + if webui_assets_exist: + static_dir = Path(__file__).parent / "webui" + static_dir.mkdir(exist_ok=True) + app.mount( + "/webui", + SmartStaticFiles( + directory=static_dir, html=True, check_dir=True + ), # Use SmartStaticFiles + name="webui", + ) + logger.info("WebUI assets mounted at /webui") + else: + logger.info("WebUI assets not available, /webui route not mounted") + + # Add redirect for /webui when assets are not available + @app.get("/webui") + @app.get("/webui/") + async def webui_redirect_to_docs(): + """Redirect /webui to /docs when WebUI is not available""" + return RedirectResponse(url="/docs") return app diff --git a/lightrag/llm/ollama.py b/lightrag/llm/ollama.py index e35dc293..cd633e80 100644 --- a/lightrag/llm/ollama.py +++ b/lightrag/llm/ollama.py @@ -173,7 +173,9 @@ async def ollama_model_complete( @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) -async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray: +async def ollama_embed( + texts: list[str], embed_model: str = "bge-m3:latest", **kwargs +) -> np.ndarray: api_key = kwargs.pop("api_key", None) if not api_key: api_key = os.getenv("OLLAMA_API_KEY")