""" Test workspace isolation for lock functions in shared_storage. This test module verifies that all lock functions support workspace-based isolation and maintain backward compatibility with the global lock behavior. """ import pytest import asyncio import time from lightrag.kg.shared_storage import ( initialize_share_data, get_storage_lock, get_pipeline_status_lock, get_graph_db_lock, get_data_init_lock, get_internal_lock, ) @pytest.fixture(scope="module", autouse=True) def initialize_storage(): """Initialize shared storage before running tests.""" initialize_share_data(workers=1) yield # ============================================================================ # 1. Basic Functionality Tests # ============================================================================ def test_global_lock_backward_compatibility(): """Test that not passing workspace parameter uses global lock.""" lock1 = get_storage_lock() lock2 = get_storage_lock() assert lock1._name == lock2._name == "storage_lock" def test_workspace_specific_locks(): """Test workspace-specific locks.""" lock_ws1 = get_storage_lock(workspace="tenant1") lock_ws2 = get_storage_lock(workspace="tenant2") assert lock_ws1._name == "tenant1:storage_lock" assert lock_ws2._name == "tenant2:storage_lock" assert lock_ws1._name != lock_ws2._name def test_same_workspace_returns_same_lock(): """Test that the same workspace returns the same lock instance.""" lock1 = get_storage_lock(workspace="tenant1") lock2 = get_storage_lock(workspace="tenant1") # Same name assert lock1._name == lock2._name == "tenant1:storage_lock" def test_all_lock_functions(): """Test that all lock functions support workspace parameter.""" workspace = "test_ws" locks = [ get_internal_lock(workspace=workspace), get_storage_lock(workspace=workspace), get_pipeline_status_lock(workspace=workspace), get_graph_db_lock(workspace=workspace), get_data_init_lock(workspace=workspace), ] expected_names = [ f"{workspace}:internal_lock", f"{workspace}:storage_lock", f"{workspace}:pipeline_status_lock", f"{workspace}:graph_db_lock", f"{workspace}:data_init_lock", ] for lock, expected_name in zip(locks, expected_names): assert lock._name == expected_name def test_empty_workspace_uses_global_lock(): """Test that empty string workspace uses global lock.""" lock_empty = get_storage_lock(workspace="") lock_default = get_storage_lock() assert lock_empty._name == lock_default._name == "storage_lock" # ============================================================================ # 2. Isolation Tests # ============================================================================ @pytest.mark.asyncio async def test_workspace_lock_isolation(): """Test lock isolation between workspaces.""" results = [] async def task_with_lock(workspace: str, task_id: int): lock = get_pipeline_status_lock(workspace=workspace) async with lock: results.append(f"{workspace}:{task_id}:start") await asyncio.sleep(0.1) # Simulate work results.append(f"{workspace}:{task_id}:end") # Two workspaces executing concurrently await asyncio.gather( task_with_lock("ws1", 1), task_with_lock("ws2", 2), ) # Verify both workspaces executed (possibly interleaved) assert "ws1:1:start" in results assert "ws2:2:start" in results assert "ws1:1:end" in results assert "ws2:2:end" in results @pytest.mark.asyncio async def test_same_workspace_lock_serialization(): """Test that operations within the same workspace are serialized.""" results = [] async def task_with_lock(workspace: str, task_id: int): lock = get_pipeline_status_lock(workspace=workspace) async with lock: results.append(f"{workspace}:{task_id}:start") await asyncio.sleep(0.05) results.append(f"{workspace}:{task_id}:end") # Two tasks in the same workspace should be serialized await asyncio.gather( task_with_lock("ws1", 1), task_with_lock("ws1", 2), ) # Find indices ws1_1_start = results.index("ws1:1:start") ws1_1_end = results.index("ws1:1:end") ws1_2_start = results.index("ws1:2:start") ws1_2_end = results.index("ws1:2:end") # One task should complete before the other starts (serialization) # Either task1 completes before task2 starts, or vice versa assert (ws1_1_end < ws1_2_start) or (ws1_2_end < ws1_1_start) # ============================================================================ # 3. Backward Compatibility Tests # ============================================================================ @pytest.mark.asyncio async def test_legacy_code_still_works(): """Test that existing code without workspace parameter still works.""" # Simulate legacy code that doesn't pass workspace lock = get_storage_lock() async with lock: # Should work without any issues await asyncio.sleep(0.01) assert lock._name == "storage_lock" def test_all_lock_functions_without_workspace(): """Test that all lock functions work without workspace parameter.""" locks = [ get_internal_lock(), get_storage_lock(), get_pipeline_status_lock(), get_graph_db_lock(), get_data_init_lock(), ] expected_names = [ "internal_lock", "storage_lock", "pipeline_status_lock", "graph_db_lock", "data_init_lock", ] for lock, expected_name in zip(locks, expected_names): assert lock._name == expected_name # ============================================================================ # 4. Concurrent Scenario Tests # ============================================================================ @pytest.mark.asyncio async def test_concurrent_workspace_operations(): """Test that multiple workspaces can operate concurrently without blocking.""" async def simulate_document_upload(workspace: str): start_time = time.time() lock = get_pipeline_status_lock(workspace=workspace) async with lock: await asyncio.sleep(0.2) # Simulate document processing return time.time() - start_time # Three workspaces uploading concurrently durations = await asyncio.gather( simulate_document_upload("tenant1"), simulate_document_upload("tenant2"), simulate_document_upload("tenant3"), ) # If concurrent, total time should be close to 0.2s (not 0.6s) max_duration = max(durations) assert max_duration < 0.35, "Workspaces should not block each other" @pytest.mark.asyncio async def test_mixed_global_and_workspace_locks(): """Test that global and workspace-specific locks don't interfere.""" results = [] async def task_with_global_lock(): lock = get_storage_lock() # Global lock async with lock: results.append("global:start") await asyncio.sleep(0.1) results.append("global:end") async def task_with_workspace_lock(workspace: str): lock = get_storage_lock(workspace=workspace) async with lock: results.append(f"{workspace}:start") await asyncio.sleep(0.1) results.append(f"{workspace}:end") # Run concurrently await asyncio.gather( task_with_global_lock(), task_with_workspace_lock("ws1"), task_with_workspace_lock("ws2"), ) # All should have executed assert "global:start" in results assert "global:end" in results assert "ws1:start" in results assert "ws1:end" in results assert "ws2:start" in results assert "ws2:end" in results # ============================================================================ # 5. Performance Tests # ============================================================================ def test_lock_creation_performance(): """Test performance of creating locks for 1000 workspaces.""" start_time = time.time() for i in range(1000): workspace = f"tenant_{i}" get_storage_lock(workspace=workspace) get_pipeline_status_lock(workspace=workspace) duration = time.time() - start_time # 2000 lock creations should complete within 2 seconds assert duration < 2.0, f"Lock creation too slow: {duration}s" @pytest.mark.asyncio async def test_lock_acquisition_performance(): """Test performance of acquiring and releasing locks.""" workspace = "perf_test" lock = get_storage_lock(workspace=workspace) start_time = time.time() for _ in range(100): async with lock: pass # Just acquire and release duration = time.time() - start_time # 100 acquisitions should be fast assert duration < 1.0, f"Lock acquisition too slow: {duration}s" # ============================================================================ # 6. Edge Cases # ============================================================================ def test_special_characters_in_workspace(): """Test workspace names with special characters.""" special_workspaces = [ "tenant-123", "tenant_abc", "tenant.xyz", "tenant:colon", # Contains colon like our separator ] for workspace in special_workspaces: lock = get_storage_lock(workspace=workspace) expected_name = f"{workspace}:storage_lock" assert lock._name == expected_name def test_very_long_workspace_name(): """Test workspace with very long name.""" long_workspace = "a" * 1000 lock = get_storage_lock(workspace=long_workspace) assert lock._name == f"{long_workspace}:storage_lock" def test_unicode_workspace_name(): """Test workspace with unicode characters.""" unicode_workspace = "租户_测试" lock = get_storage_lock(workspace=unicode_workspace) assert lock._name == f"{unicode_workspace}:storage_lock" # ============================================================================ # 7. Multiple Lock Types # ============================================================================ @pytest.mark.asyncio async def test_different_lock_types_same_workspace(): """Test that different lock types in the same workspace don't interfere.""" workspace = "multi_lock_ws" results = [] async def use_storage_lock(): lock = get_storage_lock(workspace=workspace) async with lock: results.append("storage:start") await asyncio.sleep(0.1) results.append("storage:end") async def use_pipeline_lock(): lock = get_pipeline_status_lock(workspace=workspace) async with lock: results.append("pipeline:start") await asyncio.sleep(0.1) results.append("pipeline:end") # Different lock types should not block each other await asyncio.gather( use_storage_lock(), use_pipeline_lock(), ) # Both should have executed assert "storage:start" in results assert "storage:end" in results assert "pipeline:start" in results assert "pipeline:end" in results