Fix worker process cleanup to prevent shared resource conflicts

• Add worker_exit hook in gunicorn config
• Add shutdown_manager parameter in finalize_share_data of share_storage
• Prevent Manager shutdown in workers
• Remove custom signal handlers
This commit is contained in:
yangdx 2025-10-29 13:33:21 +08:00
parent 0692175c7b
commit 72b29659c9
4 changed files with 37 additions and 43 deletions

View file

@ -162,3 +162,24 @@ def post_fork(server, worker):
uvicorn_error_logger.handlers = []
uvicorn_error_logger.setLevel(logging.CRITICAL)
uvicorn_error_logger.propagate = False
def worker_exit(server, worker):
"""
Executed when a worker is about to exit.
This is called for each worker process when it exits. We should only
clean up worker-local resources here, NOT the shared Manager.
The Manager should only be shut down by the main process in on_exit().
"""
print("=" * 80)
print(f"GUNICORN WORKER PROCESS: Shutting down worker {worker.pid}")
print(f"Process ID: {os.getpid()}")
print("=" * 80)
# Clean up worker-local resources without shutting down the Manager
# Pass shutdown_manager=False to prevent Manager shutdown
finalize_share_data(shutdown_manager=False)
print(f"Worker {worker.pid} cleanup complete")
print("=" * 80)

View file

@ -12,7 +12,6 @@ from fastapi.openapi.docs import (
import os
import logging
import logging.config
import signal
import sys
import uvicorn
import pipmaster as pm
@ -82,24 +81,6 @@ config.read("config.ini")
auth_configured = bool(auth_handler.accounts)
def setup_signal_handlers():
"""Setup signal handlers for graceful shutdown"""
def signal_handler(sig, frame):
print(f"\n\nReceived signal {sig}, shutting down gracefully...")
print(f"Process ID: {os.getpid()}")
# Release shared resources
finalize_share_data()
# Exit with success status
sys.exit(0)
# Register signal handlers
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
signal.signal(signal.SIGTERM, signal_handler) # kill command
class LLMConfigCache:
"""Smart LLM and Embedding configuration cache class"""
@ -1108,8 +1089,10 @@ def main():
update_uvicorn_mode_config()
display_splash_screen(global_args)
# Setup signal handlers for graceful shutdown
setup_signal_handlers()
# Note: Signal handlers are NOT registered here because:
# - Uvicorn has built-in signal handling that properly calls lifespan shutdown
# - Custom signal handlers can interfere with uvicorn's graceful shutdown
# - Cleanup is handled by the lifespan context manager's finally block
# Create application instance directly instead of using factory function
app = create_app(global_args)

View file

@ -5,12 +5,11 @@ Start LightRAG server with Gunicorn
import os
import sys
import signal
import pipmaster as pm
from lightrag.api.utils_api import display_splash_screen, check_env_file
from lightrag.api.config import global_args
from lightrag.utils import get_env_value
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
from lightrag.kg.shared_storage import initialize_share_data
from lightrag.constants import (
DEFAULT_WOKERS,
@ -34,20 +33,6 @@ def check_and_install_dependencies():
print(f"{package} installed successfully")
# Signal handler for graceful shutdown
def signal_handler(sig, frame):
print("\n\n" + "=" * 80)
print("RECEIVED TERMINATION SIGNAL")
print(f"Process ID: {os.getpid()}")
print("=" * 80 + "\n")
# Release shared resources
finalize_share_data()
# Exit with success status
sys.exit(0)
def main():
# Check .env file
if not check_env_file():
@ -56,9 +41,10 @@ def main():
# Check and install dependencies
check_and_install_dependencies()
# Register signal handlers for graceful shutdown
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
signal.signal(signal.SIGTERM, signal_handler) # kill command
# Note: Signal handlers are NOT registered here because:
# - Worker cleanup is handled by gunicorn_config.worker_exit()
# - Master cleanup is handled by gunicorn_config.on_exit()
# This prevents race conditions when multiple processes try to finalize shared data
# Display startup information
display_splash_screen(global_args)

View file

@ -1443,7 +1443,7 @@ async def get_namespace_data(
return _shared_dicts[namespace]
def finalize_share_data():
def finalize_share_data(shutdown_manager: bool = True):
"""
Release shared resources and clean up.
@ -1452,6 +1452,10 @@ def finalize_share_data():
In multi-process mode, it shuts down the Manager and releases all shared objects.
In single-process mode, it simply resets the global variables.
Args:
shutdown_manager: If True, shut down the multiprocessing Manager.
Should be True only for the main process, False for worker processes.
"""
global \
_manager, \
@ -1478,8 +1482,8 @@ def finalize_share_data():
f"Process {os.getpid()} finalizing storage data (multiprocess={_is_multiprocess})"
)
# In multi-process mode, shut down the Manager
if _is_multiprocess and _manager is not None:
# In multi-process mode, shut down the Manager only if requested
if _is_multiprocess and _manager is not None and shutdown_manager:
try:
# Clear shared resources before shutting down Manager
if _shared_dicts is not None: