fix: prevent double-release in UnifiedLock.__aexit__ error recovery

Problem:
When UnifiedLock.__aexit__ encountered an exception during async_lock.release(),
the error recovery logic would incorrectly attempt to release async_lock again
because it only checked main_lock_released flag. This could cause:
- Double-release attempts on already-failed locks
- Masking of original exceptions
- Undefined behavior in lock state

Root Cause:
The recovery logic used only main_lock_released to determine whether to attempt
async_lock release, without tracking whether async_lock.release() had already
been attempted and failed.

Fix:
- Added async_lock_released flag to track async_lock release attempts
- Updated recovery logic condition to check both main_lock_released AND
  async_lock_released before attempting async_lock release
- This ensures async_lock.release() is only called once, even if it fails

Testing:
- Added test_aexit_no_double_release_on_async_lock_failure:
  Verifies async_lock.release() is called only once when it fails
- Added test_aexit_recovery_on_main_lock_failure:
  Verifies recovery logic still works when main lock fails
- All 5 UnifiedLock safety tests pass

Impact:
- Eliminates double-release bugs in multiprocess lock scenarios
- Preserves correct error propagation
- Maintains recovery logic for legitimate failure cases

Files Modified:
- lightrag/kg/shared_storage.py: Added async_lock_released tracking
- tests/test_unified_lock_safety.py: Added 2 new tests (5 total now pass)
This commit is contained in:
BukeLy 2025-11-23 16:34:08 +08:00
parent 49bbb3a4d7
commit 204a2535c8
2 changed files with 108 additions and 2 deletions

View file

@ -206,6 +206,7 @@ class UnifiedLock(Generic[T]):
async def __aexit__(self, exc_type, exc_val, exc_tb):
main_lock_released = False
async_lock_released = False
try:
# Release main lock first
if self._lock is not None:
@ -229,6 +230,7 @@ class UnifiedLock(Generic[T]):
level="DEBUG",
enable_output=self._enable_logging,
)
async_lock_released = True
except Exception as e:
direct_log(
@ -237,9 +239,10 @@ class UnifiedLock(Generic[T]):
enable_output=True,
)
# If main lock release failed but async lock hasn't been released, try to release it
# If main lock release failed but async lock hasn't been attempted yet, try to release it
if (
not main_lock_released
and not async_lock_released
and not self._is_async
and self._async_lock is not None
):

View file

@ -5,12 +5,16 @@ This test module verifies that UnifiedLock raises RuntimeError instead of
allowing unprotected execution when the underlying lock is None, preventing
false security and potential race conditions.
Critical Bug: When self._lock is None, __aenter__ used to log WARNING but
Critical Bug 1: When self._lock is None, __aenter__ used to log WARNING but
still return successfully, allowing critical sections to run without lock
protection, causing race conditions and data corruption.
Critical Bug 2: In __aexit__, when async_lock.release() fails, the error
recovery logic would attempt to release it again, causing double-release issues.
"""
import pytest
from unittest.mock import MagicMock, AsyncMock
from lightrag.kg.shared_storage import UnifiedLock
@ -86,3 +90,102 @@ class TestUnifiedLockSafety:
or "lock" in error_message.lower()
)
assert "initialize_share_data" in error_message or "None" in error_message
@pytest.mark.asyncio
async def test_aexit_no_double_release_on_async_lock_failure(self):
"""
Test that __aexit__ doesn't attempt to release async_lock twice when it fails.
Scenario: async_lock.release() fails during normal release.
Expected: Recovery logic should NOT attempt to release async_lock again,
preventing double-release issues.
This tests Bug 2 fix: async_lock_released tracking prevents double release.
"""
# Create mock locks
main_lock = MagicMock()
main_lock.acquire = MagicMock()
main_lock.release = MagicMock()
async_lock = AsyncMock()
async_lock.acquire = AsyncMock()
# Make async_lock.release() fail
release_call_count = 0
def mock_release_fail():
nonlocal release_call_count
release_call_count += 1
raise RuntimeError("Async lock release failed")
async_lock.release = MagicMock(side_effect=mock_release_fail)
# Create UnifiedLock with both locks (sync mode with async_lock)
lock = UnifiedLock(
lock=main_lock,
is_async=False,
name="test_double_release",
enable_logging=False,
)
lock._async_lock = async_lock
# Try to use the lock - should fail during __aexit__
try:
async with lock:
pass
except RuntimeError as e:
# Should get the async lock release error
assert "Async lock release failed" in str(e)
# Verify async_lock.release() was called only ONCE, not twice
assert release_call_count == 1, (
f"async_lock.release() should be called only once, but was called {release_call_count} times"
)
# Main lock should have been released successfully
main_lock.release.assert_called_once()
@pytest.mark.asyncio
async def test_aexit_recovery_on_main_lock_failure(self):
"""
Test that __aexit__ recovery logic works when main lock release fails.
Scenario: main_lock.release() fails before async_lock is attempted.
Expected: Recovery logic should attempt to release async_lock to prevent
resource leaks.
This verifies the recovery logic still works correctly with async_lock_released tracking.
"""
# Create mock locks
main_lock = MagicMock()
main_lock.acquire = MagicMock()
# Make main_lock.release() fail
def mock_main_release_fail():
raise RuntimeError("Main lock release failed")
main_lock.release = MagicMock(side_effect=mock_main_release_fail)
async_lock = AsyncMock()
async_lock.acquire = AsyncMock()
async_lock.release = MagicMock()
# Create UnifiedLock with both locks (sync mode with async_lock)
lock = UnifiedLock(
lock=main_lock, is_async=False, name="test_recovery", enable_logging=False
)
lock._async_lock = async_lock
# Try to use the lock - should fail during __aexit__
try:
async with lock:
pass
except RuntimeError as e:
# Should get the main lock release error
assert "Main lock release failed" in str(e)
# Main lock release should have been attempted
main_lock.release.assert_called_once()
# Recovery logic should have attempted to release async_lock
async_lock.release.assert_called_once()