test: Enhance E2E workspace isolation detection with content verification

Add specific content assertions to detect cross-contamination between workspaces. Previously only checked that workspaces had different data, now verifies: - Each workspace contains only its own text content - Each workspace does NOT contain the other workspace's content - Cross-contamination would be immediately detected This ensures the test can find problems, not just pass. Changes: - Add assertions for "Artificial Intelligence" and "Machine Learning" in project_a - Add assertions for "Deep Learning" and "Neural Networks" in project_b - Add negative assertions to verify data leakage doesn't occur - Add detailed output messages showing what was verified Testing: - pytest tests/test_workspace_isolation.py::test_lightrag_end_to_end_workspace_isolation - Test passes with proper content isolation verified (cherry picked from commit 3ec736932e)
2025-11-17 18:55:45 +08:00 · 2025-11-17 18:55:45 +08:00 · f1fa1cd340
commit f1fa1cd340
parent f2771cc953
1 changed files with 371 additions and 742 deletions
--- a/tests/test_workspace_isolation.py
+++ b/tests/test_workspace_isolation.py
@ -15,6 +15,7 @@ import os
 import shutil
 import tempfile
 import numpy as np
 import pytest
 from pathlib import Path
 from lightrag.kg.shared_storage import (
    get_final_namespace,
@ -32,37 +33,17 @@ from lightrag.kg.shared_storage import (
 from lightrag.kg.json_kv_impl import JsonKVStorage
-class TestResults:
+# =============================================================================
-    """Track test results"""
+# Pytest Fixtures
-
+# =============================================================================
    def __init__(self):
        self.results = []
    def add(self, test_name, passed, message=""):
        self.results.append({"name": test_name, "passed": passed, "message": message})
        status = "✅ PASSED" if passed else "❌ FAILED"
        print(f"\n{status}: {test_name}")
        if message:
            print(f"   {message}")
    def summary(self):
        print("\n" + "=" * 60)
        print("TEST SUMMARY")
        print("=" * 60)
        passed = sum(1 for r in self.results if r["passed"])
        total = len(self.results)
        print(f"Passed: {passed}/{total}")
        print()
        for r in self.results:
            status = "✅" if r["passed"] else "❌"
            print(f"{status} {r['name']}")
            if r["message"]:
                print(f"   {r['message']}")
        print("=" * 60)
        return passed == total
-results = TestResults()
+@pytest.fixture(autouse=True)
 def setup_shared_data():
    """Initialize shared data before each test"""
    initialize_share_data()
    yield
    # Cleanup after test if needed
 # =============================================================================
@ -70,6 +51,7 @@ results = TestResults()
 # =============================================================================
@pytest.mark.asyncio
 async def test_pipeline_status_isolation():
    """
    Test that pipeline status is isolated between different workspaces.
@ -78,7 +60,6 @@ async def test_pipeline_status_isolation():
    print("TEST 1: Pipeline Status Isolation")
    print("=" * 60)
    try:
    # Initialize shared storage
    initialize_share_data()
@ -94,13 +75,7 @@ async def test_pipeline_status_isolation():
    data2 = await get_namespace_data("pipeline_status", workspace=workspace2)
    # Verify they are independent objects
-        if data1 is data2:
+    assert data1 is not data2, "Pipeline status data objects are the same (should be different)"
            results.add(
                "Pipeline Status Isolation",
                False,
                "Pipeline status data objects are the same (should be different)",
            )
            return False
    # Modify workspace1's data and verify workspace2 is not affected
    data1["test_key"] = "workspace1_value"
@ -109,31 +84,12 @@ async def test_pipeline_status_isolation():
    data1_check = await get_namespace_data("pipeline_status", workspace=workspace1)
    data2_check = await get_namespace_data("pipeline_status", workspace=workspace2)
-        if (
+    assert "test_key" in data1_check, "test_key not found in workspace1"
-            "test_key" in data1_check
+    assert data1_check["test_key"] == "workspace1_value", f"workspace1 test_key value incorrect: {data1_check.get('test_key')}"
-            and data1_check["test_key"] == "workspace1_value"
+    assert "test_key" not in data2_check, f"test_key leaked to workspace2: {data2_check.get('test_key')}"
            and "test_key" not in data2_check
        ):
            results.add(
                "Pipeline Status Isolation",
                True,
                "Different workspaces have isolated pipeline status",
            )
            return True
        else:
            results.add(
                "Pipeline Status Isolation",
                False,
                f"Pipeline status not properly isolated: ws1={data1_check.get('test_key')}, ws2={data2_check.get('test_key')}",
            )
            return False
-    except Exception as e:
+    print("✅ PASSED: Pipeline Status Isolation")
-        results.add("Pipeline Status Isolation", False, f"Exception: {str(e)}")
+    print("   Different workspaces have isolated pipeline status")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -141,6 +97,7 @@ async def test_pipeline_status_isolation():
 # =============================================================================
@pytest.mark.asyncio
 async def test_lock_mechanism():
    """
    Test that the new keyed lock mechanism works correctly without deadlocks.
@ -151,7 +108,6 @@ async def test_lock_mechanism():
    print("TEST 2: Lock Mechanism (No Deadlocks)")
    print("=" * 60)
    try:
    # Test 2.1: Different workspaces should run in parallel
    print("\nTest 2.1: Different workspaces locks should be parallel")
@ -178,20 +134,10 @@ async def test_lock_mechanism():
    # If locks are properly isolated by workspace, this should take ~0.5s (parallel)
    # If they block each other, it would take ~1.5s (serial)
-        parallel_ok = elapsed < 1.0
+    assert elapsed < 1.0, f"Locks blocked each other: {elapsed:.2f}s (expected < 1.0s)"
-        if parallel_ok:
+    print(f"✅ PASSED: Lock Mechanism - Parallel (Different Workspaces)")
-            results.add(
+    print(f"   Locks ran in parallel: {elapsed:.2f}s")
                "Lock Mechanism - Parallel (Different Workspaces)",
                True,
                f"Locks ran in parallel: {elapsed:.2f}s",
            )
        else:
            results.add(
                "Lock Mechanism - Parallel (Different Workspaces)",
                False,
                f"Locks blocked each other: {elapsed:.2f}s (expected < 1.0s)",
            )
    # Test 2.2: Same workspace should serialize
    print("\nTest 2.2: Same workspace locks should serialize")
@ -204,29 +150,10 @@ async def test_lock_mechanism():
    elapsed = time.time() - start
    # Same workspace should serialize, taking ~0.6s
-        serial_ok = elapsed >= 0.5
+    assert elapsed >= 0.5, f"Locks didn't serialize: {elapsed:.2f}s (expected >= 0.5s)"
-        if serial_ok:
+    print(f"✅ PASSED: Lock Mechanism - Serial (Same Workspace)")
-            results.add(
+    print(f"   Locks serialized correctly: {elapsed:.2f}s")
                "Lock Mechanism - Serial (Same Workspace)",
                True,
                f"Locks serialized correctly: {elapsed:.2f}s",
            )
        else:
            results.add(
                "Lock Mechanism - Serial (Same Workspace)",
                False,
                f"Locks didn't serialize: {elapsed:.2f}s (expected >= 0.5s)",
            )
        return parallel_ok and serial_ok
    except Exception as e:
        results.add("Lock Mechanism", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -234,6 +161,7 @@ async def test_lock_mechanism():
 # =============================================================================
@pytest.mark.asyncio
 async def test_backward_compatibility():
    """
    Test that legacy code without workspace parameter still works correctly.
@ -242,7 +170,6 @@ async def test_backward_compatibility():
    print("TEST 3: Backward Compatibility")
    print("=" * 60)
    try:
    # Test 3.1: get_final_namespace with None should use default workspace
    print("\nTest 3.1: get_final_namespace with workspace=None")
@ -250,20 +177,10 @@ async def test_backward_compatibility():
    final_ns = get_final_namespace("pipeline_status", workspace=None)
    expected = "my_default_workspace:pipeline_status"
-        if final_ns == expected:
+    assert final_ns == expected, f"Expected {expected}, got {final_ns}"
-            results.add(
+
-                "Backward Compatibility - get_final_namespace",
+    print(f"✅ PASSED: Backward Compatibility - get_final_namespace")
-                True,
+    print(f"   Correctly uses default workspace: {final_ns}")
                f"Correctly uses default workspace: {final_ns}",
            )
            compat_1_ok = True
        else:
            results.add(
                "Backward Compatibility - get_final_namespace",
                False,
                f"Expected {expected}, got {final_ns}",
            )
            compat_1_ok = False
    # Test 3.2: get_default_workspace
    print("\nTest 3.2: get/set default workspace")
@ -271,20 +188,10 @@ async def test_backward_compatibility():
    set_default_workspace("test_default")
    retrieved = get_default_workspace()
-        if retrieved == "test_default":
+    assert retrieved == "test_default", f"Expected 'test_default', got {retrieved}"
-            results.add(
+
-                "Backward Compatibility - default workspace",
+    print(f"✅ PASSED: Backward Compatibility - default workspace")
-                True,
+    print(f"   Default workspace set/get correctly: {retrieved}")
                f"Default workspace set/get correctly: {retrieved}",
            )
            compat_2_ok = True
        else:
            results.add(
                "Backward Compatibility - default workspace",
                False,
                f"Expected 'test_default', got {retrieved}",
            )
            compat_2_ok = False
    # Test 3.3: Empty workspace handling
    print("\nTest 3.3: Empty workspace handling")
@ -293,20 +200,10 @@ async def test_backward_compatibility():
    final_ns_empty = get_final_namespace("pipeline_status", workspace=None)
    expected_empty = "pipeline_status"  # Should be just the namespace without ':'
-        if final_ns_empty == expected_empty:
+    assert final_ns_empty == expected_empty, f"Expected '{expected_empty}', got '{final_ns_empty}'"
-            results.add(
+
-                "Backward Compatibility - empty workspace",
+    print(f"✅ PASSED: Backward Compatibility - empty workspace")
-                True,
+    print(f"   Empty workspace handled correctly: '{final_ns_empty}'")
                f"Empty workspace handled correctly: '{final_ns_empty}'",
            )
            compat_3_ok = True
        else:
            results.add(
                "Backward Compatibility - empty workspace",
                False,
                f"Expected '{expected_empty}', got '{final_ns_empty}'",
            )
            compat_3_ok = False
    # Test 3.4: None workspace with default set
    print("\nTest 3.4: initialize_pipeline_status with workspace=None")
@ -319,29 +216,10 @@ async def test_backward_compatibility():
        "pipeline_status", workspace="compat_test_workspace"
    )
-        if data is not None:
+    assert data is not None, "Failed to initialize pipeline status with default workspace"
            results.add(
                "Backward Compatibility - pipeline init with None",
                True,
                "Pipeline status initialized with default workspace",
            )
            compat_4_ok = True
        else:
            results.add(
                "Backward Compatibility - pipeline init with None",
                False,
                "Failed to initialize pipeline status with default workspace",
            )
            compat_4_ok = False
-        return compat_1_ok and compat_2_ok and compat_3_ok and compat_4_ok
+    print(f"✅ PASSED: Backward Compatibility - pipeline init with None")
-
+    print(f"   Pipeline status initialized with default workspace")
    except Exception as e:
        results.add("Backward Compatibility", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -349,6 +227,7 @@ async def test_backward_compatibility():
 # =============================================================================
@pytest.mark.asyncio
 async def test_multi_workspace_concurrency():
    """
    Test that multiple workspaces can operate concurrently without interference.
@ -358,7 +237,6 @@ async def test_multi_workspace_concurrency():
    print("TEST 4: Multi-Workspace Concurrency")
    print("=" * 60)
    try:
    initialize_share_data()
    async def workspace_operations(workspace_id):
@ -397,55 +275,25 @@ async def test_multi_workspace_concurrency():
    print(f"\n   All workspaces completed in {elapsed:.2f}s")
    # Verify all workspaces completed
-        if set(results_list) == set(workspaces):
+    assert set(results_list) == set(workspaces), "Not all workspaces completed"
-            results.add(
+
-                "Multi-Workspace Concurrency - Execution",
+    print(f"✅ PASSED: Multi-Workspace Concurrency - Execution")
-                True,
+    print(f"   All {len(workspaces)} workspaces completed successfully in {elapsed:.2f}s")
                f"All {len(workspaces)} workspaces completed successfully in {elapsed:.2f}s",
            )
            exec_ok = True
        else:
            results.add(
                "Multi-Workspace Concurrency - Execution",
                False,
                f"Not all workspaces completed",
            )
            exec_ok = False
    # Verify data isolation - each workspace should have its own data
    print("\n   Verifying data isolation...")
        isolation_ok = True
    for ws in workspaces:
        data = await get_namespace_data("pipeline_status", workspace=ws)
        expected_key = f"{ws}_key"
        expected_value = f"{ws}_value"
-            if expected_key not in data or data[expected_key] != expected_value:
+        assert expected_key in data, f"Data not properly isolated for {ws}: missing {expected_key}"
-                results.add(
+        assert data[expected_key] == expected_value, f"Data not properly isolated for {ws}: {expected_key}={data[expected_key]} (expected {expected_value})"
                    f"Multi-Workspace Concurrency - Data Isolation ({ws})",
                    False,
                    f"Data not properly isolated for {ws}",
                )
                isolation_ok = False
            else:
        print(f"   [{ws}] Data correctly isolated: {expected_key}={data[expected_key]}")
-        if isolation_ok:
+    print(f"✅ PASSED: Multi-Workspace Concurrency - Data Isolation")
-            results.add(
+    print(f"   All workspaces have properly isolated data")
                "Multi-Workspace Concurrency - Data Isolation",
                True,
                "All workspaces have properly isolated data",
            )
        return exec_ok and isolation_ok
    except Exception as e:
        results.add("Multi-Workspace Concurrency", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -453,6 +301,7 @@ async def test_multi_workspace_concurrency():
 # =============================================================================
@pytest.mark.asyncio
 async def test_namespace_lock_reentrance():
    """
    Test that NamespaceLock prevents re-entrance in the same coroutine
@ -462,7 +311,6 @@ async def test_namespace_lock_reentrance():
    print("TEST 5: NamespaceLock Re-entrance Protection")
    print("=" * 60)
    try:
    # Test 5.1: Same coroutine re-entrance should fail
    print("\nTest 5.1: Same coroutine re-entrance should raise RuntimeError")
@ -480,20 +328,12 @@ async def test_namespace_lock_reentrance():
            print(f"   ✓ Re-entrance correctly blocked: {e}")
            reentrance_failed_correctly = True
        else:
-                print(f"   ✗ Unexpected RuntimeError: {e}")
+            raise
-        if reentrance_failed_correctly:
+    assert reentrance_failed_correctly, "Re-entrance protection not working"
-            results.add(
+
-                "NamespaceLock Re-entrance Protection",
+    print(f"✅ PASSED: NamespaceLock Re-entrance Protection")
-                True,
+    print(f"   Re-entrance correctly raises RuntimeError")
                "Re-entrance correctly raises RuntimeError",
            )
        else:
            results.add(
                "NamespaceLock Re-entrance Protection",
                False,
                "Re-entrance protection not working",
            )
    # Test 5.2: Same NamespaceLock instance in different coroutines should succeed
    print("\nTest 5.2: Same NamespaceLock instance in different coroutines")
@ -516,28 +356,10 @@ async def test_namespace_lock_reentrance():
    # Both coroutines should have completed
    expected_entries = 4  # 2 starts + 2 ends
-        if len(concurrent_results) == expected_entries:
+    assert len(concurrent_results) == expected_entries, f"Expected {expected_entries} entries, got {len(concurrent_results)}"
            results.add(
                "NamespaceLock Concurrent Reuse",
                True,
                f"Same NamespaceLock instance used successfully in {expected_entries//2} concurrent coroutines",
            )
            concurrent_ok = True
        else:
            results.add(
                "NamespaceLock Concurrent Reuse",
                False,
                f"Expected {expected_entries} entries, got {len(concurrent_results)}",
            )
            concurrent_ok = False
-        return reentrance_failed_correctly and concurrent_ok
+    print(f"✅ PASSED: NamespaceLock Concurrent Reuse")
-
+    print(f"   Same NamespaceLock instance used successfully in {expected_entries//2} concurrent coroutines")
    except Exception as e:
        results.add("NamespaceLock Re-entrance Protection", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -545,6 +367,7 @@ async def test_namespace_lock_reentrance():
 # =============================================================================
@pytest.mark.asyncio
 async def test_different_namespace_lock_isolation():
    """
    Test that locks for different namespaces (same workspace) are independent.
@ -553,7 +376,6 @@ async def test_different_namespace_lock_isolation():
    print("TEST 6: Different Namespace Lock Isolation")
    print("=" * 60)
    try:
    print("\nTesting locks with same workspace but different namespaces")
    async def acquire_lock_timed(workspace, namespace, hold_time, name):
@ -575,28 +397,10 @@ async def test_different_namespace_lock_isolation():
    elapsed = time.time() - start
    # If locks are properly isolated by namespace, this should take ~0.5s (parallel)
-        namespace_isolation_ok = elapsed < 1.0
+    assert elapsed < 1.0, f"Different namespace locks blocked each other: {elapsed:.2f}s (expected < 1.0s)"
-        if namespace_isolation_ok:
+    print(f"✅ PASSED: Different Namespace Lock Isolation")
-            results.add(
+    print(f"   Different namespace locks ran in parallel: {elapsed:.2f}s")
                "Different Namespace Lock Isolation",
                True,
                f"Different namespace locks ran in parallel: {elapsed:.2f}s",
            )
        else:
            results.add(
                "Different Namespace Lock Isolation",
                False,
                f"Different namespace locks blocked each other: {elapsed:.2f}s (expected < 1.0s)",
            )
        return namespace_isolation_ok
    except Exception as e:
        results.add("Different Namespace Lock Isolation", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -604,6 +408,7 @@ async def test_different_namespace_lock_isolation():
 # =============================================================================
@pytest.mark.asyncio
 async def test_error_handling():
    """
    Test error handling for invalid workspace configurations.
@ -612,7 +417,6 @@ async def test_error_handling():
    print("TEST 7: Error Handling")
    print("=" * 60)
    try:
    # Test 7.1: set_default_workspace(None) converts to empty string
    print("\nTest 7.1: set_default_workspace(None) converts to empty string")
@ -620,58 +424,31 @@ async def test_error_handling():
    default_ws = get_default_workspace()
    # Should convert None to "" automatically
-        conversion_ok = default_ws == ""
+    assert default_ws == "", f"Expected empty string, got: '{default_ws}'"
-        if conversion_ok:
+    print(f"✅ PASSED: Error Handling - None to Empty String")
-            results.add(
+    print(f"   set_default_workspace(None) correctly converts to empty string: '{default_ws}'")
                "Error Handling - None to Empty String",
                True,
                f"set_default_workspace(None) correctly converts to empty string: '{default_ws}'",
            )
        else:
            results.add(
                "Error Handling - None to Empty String",
                False,
                f"Expected empty string, got: '{default_ws}'",
            )
    # Test 7.2: Empty string workspace behavior
    print("\nTest 7.2: Empty string workspace creates valid namespace")
    # With empty workspace, should create namespace without colon
    final_ns = get_final_namespace("test_namespace", workspace="")
-        namespace_ok = final_ns == "test_namespace"
+    assert final_ns == "test_namespace", f"Unexpected namespace: '{final_ns}'"
-        if namespace_ok:
+    print(f"✅ PASSED: Error Handling - Empty Workspace Namespace")
-            results.add(
+    print(f"   Empty workspace creates valid namespace: '{final_ns}'")
                "Error Handling - Empty Workspace Namespace",
                True,
                f"Empty workspace creates valid namespace: '{final_ns}'",
            )
        else:
            results.add(
                "Error Handling - Empty Workspace Namespace",
                False,
                f"Unexpected namespace: '{final_ns}'",
            )
    # Restore default workspace for other tests
    set_default_workspace("")
        return conversion_ok and namespace_ok
    except Exception as e:
        results.add("Error Handling", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
 # Test 8: Update Flags Workspace Isolation
 # =============================================================================
@pytest.mark.asyncio
 async def test_update_flags_workspace_isolation():
    """
    Test that update flags are properly isolated between workspaces.
@ -680,7 +457,6 @@ async def test_update_flags_workspace_isolation():
    print("TEST 8: Update Flags Workspace Isolation")
    print("=" * 60)
    try:
    initialize_share_data()
    workspace1 = "update_flags_ws1"
@ -699,26 +475,18 @@ async def test_update_flags_workspace_isolation():
    flag2_obj = await get_update_flag(test_namespace, workspace=workspace2)
    # Initial state should be False
-        initial_ok = flag1_obj.value is False and flag2_obj.value is False
+    assert flag1_obj.value is False, "Flag1 initial value should be False"
    assert flag2_obj.value is False, "Flag2 initial value should be False"
    # Set all flags for workspace1
    await set_all_update_flags(test_namespace, workspace=workspace1)
    # Check that only workspace1's flags are set
-        set_flags_isolated = flag1_obj.value is True and flag2_obj.value is False
+    assert flag1_obj.value is True, f"Flag1 should be True after set_all_update_flags, got {flag1_obj.value}"
    assert flag2_obj.value is False, f"Flag2 should still be False, got {flag2_obj.value}"
-        if set_flags_isolated:
+    print(f"✅ PASSED: Update Flags - set_all_update_flags Isolation")
-            results.add(
+    print(f"   set_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}")
                "Update Flags - set_all_update_flags Isolation",
                True,
                f"set_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
            )
        else:
            results.add(
                "Update Flags - set_all_update_flags Isolation",
                False,
                f"Flags not isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
            )
    # Test 8.2: clear_all_update_flags isolation
    print("\nTest 8.2: clear_all_update_flags workspace isolation")
@ -728,26 +496,18 @@ async def test_update_flags_workspace_isolation():
    await set_all_update_flags(test_namespace, workspace=workspace2)
    # Verify both are set
-        both_set = flag1_obj.value is True and flag2_obj.value is True
+    assert flag1_obj.value is True, "Flag1 should be True"
    assert flag2_obj.value is True, "Flag2 should be True"
    # Clear only workspace1
    await clear_all_update_flags(test_namespace, workspace=workspace1)
    # Check that only workspace1's flags are cleared
-        clear_flags_isolated = flag1_obj.value is False and flag2_obj.value is True
+    assert flag1_obj.value is False, f"Flag1 should be False after clear, got {flag1_obj.value}"
    assert flag2_obj.value is True, f"Flag2 should still be True, got {flag2_obj.value}"
-        if clear_flags_isolated:
+    print(f"✅ PASSED: Update Flags - clear_all_update_flags Isolation")
-            results.add(
+    print(f"   clear_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}")
                "Update Flags - clear_all_update_flags Isolation",
                True,
                f"clear_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
            )
        else:
            results.add(
                "Update Flags - clear_all_update_flags Isolation",
                False,
                f"Flags not isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
            )
    # Test 8.3: get_all_update_flags_status workspace filtering
    print("\nTest 8.3: get_all_update_flags_status workspace filtering")
@ -772,28 +532,11 @@ async def test_update_flags_workspace_isolation():
    workspace1_keys = [k for k in status1.keys() if workspace1 in k]
    workspace2_keys = [k for k in status1.keys() if workspace2 in k]
-        status_filtered = len(workspace1_keys) > 0 and len(workspace2_keys) == 0
+    assert len(workspace1_keys) > 0, f"workspace1 keys should be present, got {len(workspace1_keys)}"
    assert len(workspace2_keys) == 0, f"workspace2 keys should not be present, got {len(workspace2_keys)}"
-        if status_filtered:
+    print(f"✅ PASSED: Update Flags - get_all_update_flags_status Filtering")
-            results.add(
+    print(f"   Status correctly filtered: ws1 keys={len(workspace1_keys)}, ws2 keys={len(workspace2_keys)}")
                "Update Flags - get_all_update_flags_status Filtering",
                True,
                f"Status correctly filtered: ws1 keys={len(workspace1_keys)}, ws2 keys={len(workspace2_keys)}",
            )
        else:
            results.add(
                "Update Flags - get_all_update_flags_status Filtering",
                False,
                f"Status not filtered correctly: ws1 keys={len(workspace1_keys)}, ws2 keys={len(workspace2_keys)}",
            )
        return set_flags_isolated and clear_flags_isolated and status_filtered
    except Exception as e:
        results.add("Update Flags Workspace Isolation", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -801,6 +544,7 @@ async def test_update_flags_workspace_isolation():
 # =============================================================================
@pytest.mark.asyncio
 async def test_empty_workspace_standardization():
    """
    Test that empty workspace is properly standardized to "" instead of "_".
@ -809,7 +553,6 @@ async def test_empty_workspace_standardization():
    print("TEST 9: Empty Workspace Standardization")
    print("=" * 60)
    try:
    # Test 9.1: Empty string workspace creates namespace without colon
    print("\nTest 9.1: Empty string workspace namespace format")
@ -817,20 +560,10 @@ async def test_empty_workspace_standardization():
    final_ns = get_final_namespace("test_namespace", workspace=None)
    # Should be just "test_namespace" without colon prefix
-        empty_ws_ok = final_ns == "test_namespace"
+    assert final_ns == "test_namespace", f"Unexpected namespace format: '{final_ns}' (expected 'test_namespace')"
-        if empty_ws_ok:
+    print(f"✅ PASSED: Empty Workspace Standardization - Format")
-            results.add(
+    print(f"   Empty workspace creates correct namespace: '{final_ns}'")
                "Empty Workspace Standardization - Format",
                True,
                f"Empty workspace creates correct namespace: '{final_ns}'",
            )
        else:
            results.add(
                "Empty Workspace Standardization - Format",
                False,
                f"Unexpected namespace format: '{final_ns}' (expected 'test_namespace')",
            )
    # Test 9.2: Empty workspace vs non-empty workspace behavior
    print("\nTest 9.2: Empty vs non-empty workspace behavior")
@ -846,28 +579,10 @@ async def test_empty_workspace_standardization():
    data_nonempty = await get_namespace_data("pipeline_status", workspace="test_ws")
    # They should be different objects
-        behavior_ok = data_empty is not data_nonempty
+    assert data_empty is not data_nonempty, "Empty and non-empty workspaces share data (should be independent)"
-        if behavior_ok:
+    print(f"✅ PASSED: Empty Workspace Standardization - Behavior")
-            results.add(
+    print(f"   Empty and non-empty workspaces have independent data")
                "Empty Workspace Standardization - Behavior",
                True,
                "Empty and non-empty workspaces have independent data",
            )
        else:
            results.add(
                "Empty Workspace Standardization - Behavior",
                False,
                "Empty and non-empty workspaces share data (should be independent)",
            )
        return empty_ws_ok and behavior_ok
    except Exception as e:
        results.add("Empty Workspace Standardization", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
 # =============================================================================
@ -875,6 +590,7 @@ async def test_empty_workspace_standardization():
 # =============================================================================
@pytest.mark.asyncio
 async def test_json_kv_storage_workspace_isolation():
    """
    Integration test: Verify JsonKVStorage properly isolates data between workspaces.
@ -962,31 +678,19 @@ async def test_json_kv_storage_workspace_isolation():
        print(f"   Storage2 entity2: {result2_entity2}")
        # Verify isolation (get_by_id returns dict)
-        isolated = (
+        assert result1_entity1 is not None, "Storage1 entity1 should not be None"
-            result1_entity1 is not None
+        assert result1_entity2 is not None, "Storage1 entity2 should not be None"
-            and result1_entity2 is not None
+        assert result2_entity1 is not None, "Storage2 entity1 should not be None"
-            and result2_entity1 is not None
+        assert result2_entity2 is not None, "Storage2 entity2 should not be None"
-            and result2_entity2 is not None
+        assert result1_entity1.get("content") == "Data from workspace1 - AI Research", f"Storage1 entity1 content mismatch"
-            and result1_entity1.get("content") == "Data from workspace1 - AI Research"
+        assert result1_entity2.get("content") == "Data from workspace1 - Machine Learning", f"Storage1 entity2 content mismatch"
-            and result1_entity2.get("content") == "Data from workspace1 - Machine Learning"
+        assert result2_entity1.get("content") == "Data from workspace2 - Deep Learning", f"Storage2 entity1 content mismatch"
-            and result2_entity1.get("content") == "Data from workspace2 - Deep Learning"
+        assert result2_entity2.get("content") == "Data from workspace2 - Neural Networks", f"Storage2 entity2 content mismatch"
-            and result2_entity2.get("content") == "Data from workspace2 - Neural Networks"
+        assert result1_entity1.get("content") != result2_entity1.get("content"), "Storage1 and Storage2 entity1 should have different content"
-            and result1_entity1.get("content") != result2_entity1.get("content")
+        assert result1_entity2.get("content") != result2_entity2.get("content"), "Storage1 and Storage2 entity2 should have different content"
            and result1_entity2.get("content") != result2_entity2.get("content")
        )
-        if isolated:
+        print(f"✅ PASSED: JsonKVStorage - Data Isolation")
-            results.add(
+        print(f"   Two storage instances correctly isolated: ws1 and ws2 have different data")
                "JsonKVStorage - Data Isolation",
                True,
                f"Two storage instances correctly isolated: ws1 and ws2 have different data",
            )
        else:
            results.add(
                "JsonKVStorage - Data Isolation",
                False,
                f"Data not properly isolated between workspaces",
            )
        # Test 10.4: Verify file structure
        print("\nTest 10.4: Verify file structure")
@ -999,28 +703,12 @@ async def test_json_kv_storage_workspace_isolation():
        print(f"   workspace1 directory exists: {ws1_exists}")
        print(f"   workspace2 directory exists: {ws2_exists}")
-        if ws1_exists and ws2_exists:
+        assert ws1_exists, "workspace1 directory should exist"
-            results.add(
+        assert ws2_exists, "workspace2 directory should exist"
                "JsonKVStorage - File Structure",
                True,
                f"Workspace directories correctly created: {ws1_dir} and {ws2_dir}",
            )
            file_structure_ok = True
        else:
            results.add(
                "JsonKVStorage - File Structure",
                False,
                f"Workspace directories not created properly",
            )
            file_structure_ok = False
-        return isolated and file_structure_ok
+        print(f"✅ PASSED: JsonKVStorage - File Structure")
        print(f"   Workspace directories correctly created: {ws1_dir} and {ws2_dir}")
    except Exception as e:
        results.add("JsonKVStorage Workspace Isolation", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
    finally:
        # Cleanup test directory
        if os.path.exists(test_dir):
@ -1033,6 +721,7 @@ async def test_json_kv_storage_workspace_isolation():
 # =============================================================================
@pytest.mark.asyncio
 async def test_lightrag_end_to_end_workspace_isolation():
    """
    End-to-end test: Create two LightRAG instances with different workspaces,
@ -1052,8 +741,13 @@ async def test_lightrag_end_to_end_workspace_isolation():
        async def mock_llm_func(
            prompt, system_prompt=None, history_messages=[], **kwargs
        ) -> str:
-            # Return a mock response that simulates entity extraction
+            # Return a mock response that simulates entity extraction in the correct format
-            return """{"entities": [{"name": "Test Entity", "type": "Concept"}], "relationships": []}"""
+            # Format: entity<|#|>entity_name<|#|>entity_type<|#|>entity_description
            # Format: relation<|#|>source_entity<|#|>target_entity<|#|>keywords<|#|>description
            return """entity<|#|>Artificial Intelligence<|#|>concept<|#|>AI is a field of computer science focused on creating intelligent machines.
 entity<|#|>Machine Learning<|#|>concept<|#|>Machine Learning is a subset of AI that enables systems to learn from data.
 relation<|#|>Machine Learning<|#|>Artificial Intelligence<|#|>subset, related field<|#|>Machine Learning is a key component and subset of Artificial Intelligence.
 <|COMPLETE|>"""
        # Mock embedding function
        async def mock_embedding_func(texts: list[str]) -> np.ndarray:
@ -1122,7 +816,9 @@ async def test_lightrag_end_to_end_workspace_isolation():
        print(f"   project_b directory: {project_b_dir}")
        print(f"   project_b exists: {project_b_exists}")
-        if project_a_exists and project_b_exists:
+        assert project_a_exists, "project_a directory should exist"
        assert project_b_exists, "project_b directory should exist"
        # List files in each directory
        print(f"\n   Files in project_a/:")
        for file in sorted(project_a_dir.glob("*")):
@ -1136,19 +832,8 @@ async def test_lightrag_end_to_end_workspace_isolation():
                size = file.stat().st_size
                print(f"     - {file.name} ({size} bytes)")
-            results.add(
+        print(f"✅ PASSED: LightRAG E2E - File Structure")
-                "LightRAG E2E - File Structure",
+        print(f"   Workspace directories correctly created and separated")
                True,
                f"Workspace directories correctly created and separated",
            )
            structure_ok = True
        else:
            results.add(
                "LightRAG E2E - File Structure",
                False,
                f"Workspace directories not created properly",
            )
            structure_ok = False
        # Test 11.4: Verify data isolation by checking file contents
        print("\nTest 11.4: Verify data isolation (check file contents)")
@ -1170,93 +855,37 @@ async def test_lightrag_end_to_end_workspace_isolation():
            print(f"   project_b doc count: {len(docs_b_content)}")
            # Verify they contain different data
-            docs_isolated = docs_a_content != docs_b_content
+            assert docs_a_content != docs_b_content, "Document storage not properly isolated"
-            if docs_isolated:
+            # Verify each workspace contains its own text content
-                results.add(
+            docs_a_str = json.dumps(docs_a_content)
-                    "LightRAG E2E - Data Isolation",
+            docs_b_str = json.dumps(docs_b_content)
                    True,
                    "Document storage correctly isolated between workspaces",
                )
            else:
                results.add(
                    "LightRAG E2E - Data Isolation",
                    False,
                    "Document storage not properly isolated",
                )
-            data_ok = docs_isolated
+            # Check project_a contains its text and NOT project_b's text
            assert "Artificial Intelligence" in docs_a_str, "project_a should contain 'Artificial Intelligence'"
            assert "Machine Learning" in docs_a_str, "project_a should contain 'Machine Learning'"
            assert "Deep Learning" not in docs_a_str, "project_a should NOT contain 'Deep Learning' from project_b"
            assert "Neural Networks" not in docs_a_str, "project_a should NOT contain 'Neural Networks' from project_b"
            # Check project_b contains its text and NOT project_a's text
            assert "Deep Learning" in docs_b_str, "project_b should contain 'Deep Learning'"
            assert "Neural Networks" in docs_b_str, "project_b should contain 'Neural Networks'"
            assert "Artificial Intelligence" not in docs_b_str, "project_b should NOT contain 'Artificial Intelligence' from project_a"
            # Note: "Machine Learning" might appear in project_b's text, so we skip that check
            print(f"✅ PASSED: LightRAG E2E - Data Isolation")
            print(f"   Document storage correctly isolated between workspaces")
            print(f"   project_a contains only its own data")
            print(f"   project_b contains only its own data")
        else:
            print(f"   Document storage files not found (may not be created yet)")
-            results.add(
+            print(f"✅ PASSED: LightRAG E2E - Data Isolation")
-                "LightRAG E2E - Data Isolation",
+            print(f"   Skipped file content check (files not created)")
                True,
                "Skipped file content check (files not created)",
            )
            data_ok = True
        print(f"\n   ✓ Test complete - workspace isolation verified at E2E level")
        return structure_ok and data_ok
    except Exception as e:
        results.add("LightRAG E2E Workspace Isolation", False, f"Exception: {str(e)}")
        import traceback
        traceback.print_exc()
        return False
    finally:
        # Cleanup test directory
        if os.path.exists(test_dir):
            shutil.rmtree(test_dir)
            print(f"\n   Cleaned up test directory: {test_dir}")
 # =============================================================================
 # Main Test Runner
 # =============================================================================
 async def main():
    """Run all tests"""
    print("\n")
    print("╔" + "═" * 58 + "╗")
    print("║" + " " * 10 + "Workspace Isolation Test Suite" + " " * 18 + "║")
    print("║" + " " * 15 + "PR #2366 - Complete Coverage" + " " * 15 + "║")
    print("╚" + "═" * 58 + "╝")
    # Run all tests (ordered by priority)
    # Core PR requirements (Tests 1-4)
    await test_pipeline_status_isolation()
    await test_lock_mechanism()
    await test_backward_compatibility()
    await test_multi_workspace_concurrency()
    # Additional comprehensive tests (Tests 5-9)
    await test_namespace_lock_reentrance()
    await test_different_namespace_lock_isolation()
    await test_error_handling()
    await test_update_flags_workspace_isolation()
    await test_empty_workspace_standardization()
    # Integration and E2E tests (Tests 10-11)
    print("\n" + "=" * 60)
    print("INTEGRATION & END-TO-END TESTS")
    print("=" * 60)
    await test_json_kv_storage_workspace_isolation()
    await test_lightrag_end_to_end_workspace_isolation()
    # Print summary
    all_passed = results.summary()
    if all_passed:
        print("\n🎉 All tests passed! The workspace isolation feature is working correctly.")
        print("   Coverage: 100% - Unit, Integration, and E2E validated")
        return 0
    else:
        print("\n⚠️  Some tests failed. Please review the results above.")
        return 1
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)