LightRAG/tests/test_finalization_cleanup.py
chengjie 0bd162a416 fix: ensure finalize_share_data properly cleans up workspace locks
Why this change is needed:
The finalize_share_data() function was not properly cleaning up workspace
lock-related global variables (_sync_locks, _workspace_async_locks, and
lock registry variables). This caused stale references to remain after
finalization, leading to EOFError or BrokenPipeError when trying to
re-initialize or when processes tried to use locks after the Manager
was shut down.

How it solves it:
1. Added comprehensive cleanup of all Manager.dict proxies before Manager
   shutdown (_sync_locks, _lock_registry, _lock_registry_count,
   _lock_cleanup_data)
2. Added cleanup of per-process _workspace_async_locks dictionary
3. Reset all lock-related globals to None at end of finalization:
   - _workers, _lock_registry, _lock_registry_count, _lock_cleanup_data
   - _registry_guard, _storage_keyed_lock, _sync_locks
   - _workspace_async_locks, _earliest_mp_cleanup_time,
     _last_mp_cleanup_time

Impact:
- Prevents EOFError/BrokenPipeError in production deployments
- Enables safe re-initialization after finalization
- Critical for proper resource cleanup in multi-process deployments
- Fixes memory leaks from stale lock references

Testing:
- Added 3 comprehensive tests in test_finalization_cleanup.py
- All 23 workspace lock tests pass (17 original + 3 bug fixes + 3 finalization)
- Tests verify clean re-initialization after finalization in both
  single-process and multiprocess modes
2025-11-11 00:23:42 +08:00

110 lines
3.8 KiB
Python

"""
Test finalization cleanup for workspace locks.
This test module verifies that finalize_share_data() properly cleans up
all lock-related global variables, including:
- _sync_locks (Manager.dict in multiprocess mode)
- _workspace_async_locks (per-process dict)
- _lock_registry, _lock_registry_count, _lock_cleanup_data
- _storage_keyed_lock
Bug: Previously, these weren't properly cleaned up, causing EOFError/BrokenPipeError
when re-initializing after finalization.
"""
import pytest
from lightrag.kg import shared_storage
@pytest.fixture(autouse=True)
def cleanup_shared_storage():
"""Ensure shared storage is cleaned up after each test."""
yield
shared_storage.finalize_share_data()
def test_finalization_clears_workspace_locks():
"""Test that finalize_share_data() clears workspace lock dictionaries.
Bug Fix: Previously, _sync_locks and _workspace_async_locks were not
cleared during finalization, causing stale references to shut-down Manager.
"""
# Initialize in multiprocess mode
shared_storage.initialize_share_data(workers=2)
# Create some workspace locks
lock1 = shared_storage.get_storage_lock(workspace="tenant1")
lock2 = shared_storage.get_pipeline_status_lock(workspace="tenant2")
# Verify locks were created
assert "tenant1:storage_lock" in shared_storage._sync_locks
assert "tenant2:pipeline_status_lock" in shared_storage._sync_locks
assert "tenant1:storage_lock" in shared_storage._workspace_async_locks
# Finalize
shared_storage.finalize_share_data()
# Verify all lock-related globals are None
assert shared_storage._sync_locks is None
assert shared_storage._workspace_async_locks is None
assert shared_storage._lock_registry is None
assert shared_storage._lock_registry_count is None
assert shared_storage._lock_cleanup_data is None
assert shared_storage._registry_guard is None
assert shared_storage._storage_keyed_lock is None
assert shared_storage._manager is None
def test_reinitialize_after_finalization():
"""Test that re-initialization works after finalization.
Bug Fix: Previously, stale references to shut-down Manager caused
EOFError/BrokenPipeError when creating locks after re-initialization.
"""
# First initialization
shared_storage.initialize_share_data(workers=2)
lock1 = shared_storage.get_storage_lock(workspace="tenant1")
assert "tenant1:storage_lock" in shared_storage._sync_locks
# Finalize
shared_storage.finalize_share_data()
assert shared_storage._manager is None
# Re-initialize
shared_storage.initialize_share_data(workers=2)
# Should work without EOFError/BrokenPipeError
lock2 = shared_storage.get_storage_lock(workspace="tenant2")
assert "tenant2:storage_lock" in shared_storage._sync_locks
# Clean up
shared_storage.finalize_share_data()
def test_single_process_finalization():
"""Test finalization in single-process mode.
Ensures finalization works correctly when not using multiprocess Manager.
"""
# Initialize in single-process mode
shared_storage.initialize_share_data(workers=1)
# Create some workspace locks
lock1 = shared_storage.get_storage_lock(workspace="tenant1")
assert "tenant1:storage_lock" in shared_storage._sync_locks
# Finalize
shared_storage.finalize_share_data()
# Verify globals are None
assert shared_storage._sync_locks is None
assert shared_storage._workspace_async_locks is None
assert shared_storage._manager is None # Should be None even in single-process
# Re-initialize should work
shared_storage.initialize_share_data(workers=1)
lock2 = shared_storage.get_storage_lock(workspace="tenant2")
assert "tenant2:storage_lock" in shared_storage._sync_locks
# Clean up
shared_storage.finalize_share_data()