From 72b29659c94e0dc9874ce34531ec383547e98e3f Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Oct 2025 13:33:21 +0800 Subject: [PATCH] Fix worker process cleanup to prevent shared resource conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add worker_exit hook in gunicorn config • Add shutdown_manager parameter in finalize_share_data of share_storage • Prevent Manager shutdown in workers • Remove custom signal handlers --- lightrag/api/gunicorn_config.py | 21 +++++++++++++++++++++ lightrag/api/lightrag_server.py | 25 ++++--------------------- lightrag/api/run_with_gunicorn.py | 24 +++++------------------- lightrag/kg/shared_storage.py | 10 +++++++--- 4 files changed, 37 insertions(+), 43 deletions(-) diff --git a/lightrag/api/gunicorn_config.py b/lightrag/api/gunicorn_config.py index 7b25b5b9..a19a0b39 100644 --- a/lightrag/api/gunicorn_config.py +++ b/lightrag/api/gunicorn_config.py @@ -162,3 +162,24 @@ def post_fork(server, worker): uvicorn_error_logger.handlers = [] uvicorn_error_logger.setLevel(logging.CRITICAL) uvicorn_error_logger.propagate = False + + +def worker_exit(server, worker): + """ + Executed when a worker is about to exit. + + This is called for each worker process when it exits. We should only + clean up worker-local resources here, NOT the shared Manager. + The Manager should only be shut down by the main process in on_exit(). + """ + print("=" * 80) + print(f"GUNICORN WORKER PROCESS: Shutting down worker {worker.pid}") + print(f"Process ID: {os.getpid()}") + print("=" * 80) + + # Clean up worker-local resources without shutting down the Manager + # Pass shutdown_manager=False to prevent Manager shutdown + finalize_share_data(shutdown_manager=False) + + print(f"Worker {worker.pid} cleanup complete") + print("=" * 80) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 4dd5edaa..cee831d0 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -12,7 +12,6 @@ from fastapi.openapi.docs import ( import os import logging import logging.config -import signal import sys import uvicorn import pipmaster as pm @@ -82,24 +81,6 @@ config.read("config.ini") auth_configured = bool(auth_handler.accounts) -def setup_signal_handlers(): - """Setup signal handlers for graceful shutdown""" - - def signal_handler(sig, frame): - print(f"\n\nReceived signal {sig}, shutting down gracefully...") - print(f"Process ID: {os.getpid()}") - - # Release shared resources - finalize_share_data() - - # Exit with success status - sys.exit(0) - - # Register signal handlers - signal.signal(signal.SIGINT, signal_handler) # Ctrl+C - signal.signal(signal.SIGTERM, signal_handler) # kill command - - class LLMConfigCache: """Smart LLM and Embedding configuration cache class""" @@ -1108,8 +1089,10 @@ def main(): update_uvicorn_mode_config() display_splash_screen(global_args) - # Setup signal handlers for graceful shutdown - setup_signal_handlers() + # Note: Signal handlers are NOT registered here because: + # - Uvicorn has built-in signal handling that properly calls lifespan shutdown + # - Custom signal handlers can interfere with uvicorn's graceful shutdown + # - Cleanup is handled by the lifespan context manager's finally block # Create application instance directly instead of using factory function app = create_app(global_args) diff --git a/lightrag/api/run_with_gunicorn.py b/lightrag/api/run_with_gunicorn.py index 929db019..5ad0c5b5 100644 --- a/lightrag/api/run_with_gunicorn.py +++ b/lightrag/api/run_with_gunicorn.py @@ -5,12 +5,11 @@ Start LightRAG server with Gunicorn import os import sys -import signal import pipmaster as pm from lightrag.api.utils_api import display_splash_screen, check_env_file from lightrag.api.config import global_args from lightrag.utils import get_env_value -from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data +from lightrag.kg.shared_storage import initialize_share_data from lightrag.constants import ( DEFAULT_WOKERS, @@ -34,20 +33,6 @@ def check_and_install_dependencies(): print(f"{package} installed successfully") -# Signal handler for graceful shutdown -def signal_handler(sig, frame): - print("\n\n" + "=" * 80) - print("RECEIVED TERMINATION SIGNAL") - print(f"Process ID: {os.getpid()}") - print("=" * 80 + "\n") - - # Release shared resources - finalize_share_data() - - # Exit with success status - sys.exit(0) - - def main(): # Check .env file if not check_env_file(): @@ -56,9 +41,10 @@ def main(): # Check and install dependencies check_and_install_dependencies() - # Register signal handlers for graceful shutdown - signal.signal(signal.SIGINT, signal_handler) # Ctrl+C - signal.signal(signal.SIGTERM, signal_handler) # kill command + # Note: Signal handlers are NOT registered here because: + # - Worker cleanup is handled by gunicorn_config.worker_exit() + # - Master cleanup is handled by gunicorn_config.on_exit() + # This prevents race conditions when multiple processes try to finalize shared data # Display startup information display_splash_screen(global_args) diff --git a/lightrag/kg/shared_storage.py b/lightrag/kg/shared_storage.py index 33d43bfa..e7c170d8 100644 --- a/lightrag/kg/shared_storage.py +++ b/lightrag/kg/shared_storage.py @@ -1443,7 +1443,7 @@ async def get_namespace_data( return _shared_dicts[namespace] -def finalize_share_data(): +def finalize_share_data(shutdown_manager: bool = True): """ Release shared resources and clean up. @@ -1452,6 +1452,10 @@ def finalize_share_data(): In multi-process mode, it shuts down the Manager and releases all shared objects. In single-process mode, it simply resets the global variables. + + Args: + shutdown_manager: If True, shut down the multiprocessing Manager. + Should be True only for the main process, False for worker processes. """ global \ _manager, \ @@ -1478,8 +1482,8 @@ def finalize_share_data(): f"Process {os.getpid()} finalizing storage data (multiprocess={_is_multiprocess})" ) - # In multi-process mode, shut down the Manager - if _is_multiprocess and _manager is not None: + # In multi-process mode, shut down the Manager only if requested + if _is_multiprocess and _manager is not None and shutdown_manager: try: # Clear shared resources before shutting down Manager if _shared_dicts is not None: