test: Enhance E2E workspace isolation detection with content verification

Add specific content assertions to detect cross-contamination between workspaces.
Previously only checked that workspaces had different data, now verifies:

- Each workspace contains only its own text content
- Each workspace does NOT contain the other workspace's content
- Cross-contamination would be immediately detected

This ensures the test can find problems, not just pass.

Changes:
- Add assertions for "Artificial Intelligence" and "Machine Learning" in project_a
- Add assertions for "Deep Learning" and "Neural Networks" in project_b
- Add negative assertions to verify data leakage doesn't occur
- Add detailed output messages showing what was verified

Testing:
- pytest tests/test_workspace_isolation.py::test_lightrag_end_to_end_workspace_isolation
- Test passes with proper content isolation verified

(cherry picked from commit 3ec736932e)
This commit is contained in:
BukeLy 2025-11-17 18:55:45 +08:00 committed by Raphaël MANSUY
parent f2771cc953
commit f1fa1cd340

View file

@ -15,6 +15,7 @@ import os
import shutil import shutil
import tempfile import tempfile
import numpy as np import numpy as np
import pytest
from pathlib import Path from pathlib import Path
from lightrag.kg.shared_storage import ( from lightrag.kg.shared_storage import (
get_final_namespace, get_final_namespace,
@ -32,37 +33,17 @@ from lightrag.kg.shared_storage import (
from lightrag.kg.json_kv_impl import JsonKVStorage from lightrag.kg.json_kv_impl import JsonKVStorage
class TestResults: # =============================================================================
"""Track test results""" # Pytest Fixtures
# =============================================================================
def __init__(self):
self.results = []
def add(self, test_name, passed, message=""):
self.results.append({"name": test_name, "passed": passed, "message": message})
status = "✅ PASSED" if passed else "❌ FAILED"
print(f"\n{status}: {test_name}")
if message:
print(f" {message}")
def summary(self):
print("\n" + "=" * 60)
print("TEST SUMMARY")
print("=" * 60)
passed = sum(1 for r in self.results if r["passed"])
total = len(self.results)
print(f"Passed: {passed}/{total}")
print()
for r in self.results:
status = "" if r["passed"] else ""
print(f"{status} {r['name']}")
if r["message"]:
print(f" {r['message']}")
print("=" * 60)
return passed == total
results = TestResults() @pytest.fixture(autouse=True)
def setup_shared_data():
"""Initialize shared data before each test"""
initialize_share_data()
yield
# Cleanup after test if needed
# ============================================================================= # =============================================================================
@ -70,6 +51,7 @@ results = TestResults()
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_pipeline_status_isolation(): async def test_pipeline_status_isolation():
""" """
Test that pipeline status is isolated between different workspaces. Test that pipeline status is isolated between different workspaces.
@ -78,7 +60,6 @@ async def test_pipeline_status_isolation():
print("TEST 1: Pipeline Status Isolation") print("TEST 1: Pipeline Status Isolation")
print("=" * 60) print("=" * 60)
try:
# Initialize shared storage # Initialize shared storage
initialize_share_data() initialize_share_data()
@ -94,13 +75,7 @@ async def test_pipeline_status_isolation():
data2 = await get_namespace_data("pipeline_status", workspace=workspace2) data2 = await get_namespace_data("pipeline_status", workspace=workspace2)
# Verify they are independent objects # Verify they are independent objects
if data1 is data2: assert data1 is not data2, "Pipeline status data objects are the same (should be different)"
results.add(
"Pipeline Status Isolation",
False,
"Pipeline status data objects are the same (should be different)",
)
return False
# Modify workspace1's data and verify workspace2 is not affected # Modify workspace1's data and verify workspace2 is not affected
data1["test_key"] = "workspace1_value" data1["test_key"] = "workspace1_value"
@ -109,31 +84,12 @@ async def test_pipeline_status_isolation():
data1_check = await get_namespace_data("pipeline_status", workspace=workspace1) data1_check = await get_namespace_data("pipeline_status", workspace=workspace1)
data2_check = await get_namespace_data("pipeline_status", workspace=workspace2) data2_check = await get_namespace_data("pipeline_status", workspace=workspace2)
if ( assert "test_key" in data1_check, "test_key not found in workspace1"
"test_key" in data1_check assert data1_check["test_key"] == "workspace1_value", f"workspace1 test_key value incorrect: {data1_check.get('test_key')}"
and data1_check["test_key"] == "workspace1_value" assert "test_key" not in data2_check, f"test_key leaked to workspace2: {data2_check.get('test_key')}"
and "test_key" not in data2_check
):
results.add(
"Pipeline Status Isolation",
True,
"Different workspaces have isolated pipeline status",
)
return True
else:
results.add(
"Pipeline Status Isolation",
False,
f"Pipeline status not properly isolated: ws1={data1_check.get('test_key')}, ws2={data2_check.get('test_key')}",
)
return False
except Exception as e: print("✅ PASSED: Pipeline Status Isolation")
results.add("Pipeline Status Isolation", False, f"Exception: {str(e)}") print(" Different workspaces have isolated pipeline status")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -141,6 +97,7 @@ async def test_pipeline_status_isolation():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_lock_mechanism(): async def test_lock_mechanism():
""" """
Test that the new keyed lock mechanism works correctly without deadlocks. Test that the new keyed lock mechanism works correctly without deadlocks.
@ -151,7 +108,6 @@ async def test_lock_mechanism():
print("TEST 2: Lock Mechanism (No Deadlocks)") print("TEST 2: Lock Mechanism (No Deadlocks)")
print("=" * 60) print("=" * 60)
try:
# Test 2.1: Different workspaces should run in parallel # Test 2.1: Different workspaces should run in parallel
print("\nTest 2.1: Different workspaces locks should be parallel") print("\nTest 2.1: Different workspaces locks should be parallel")
@ -178,20 +134,10 @@ async def test_lock_mechanism():
# If locks are properly isolated by workspace, this should take ~0.5s (parallel) # If locks are properly isolated by workspace, this should take ~0.5s (parallel)
# If they block each other, it would take ~1.5s (serial) # If they block each other, it would take ~1.5s (serial)
parallel_ok = elapsed < 1.0 assert elapsed < 1.0, f"Locks blocked each other: {elapsed:.2f}s (expected < 1.0s)"
if parallel_ok: print(f"✅ PASSED: Lock Mechanism - Parallel (Different Workspaces)")
results.add( print(f" Locks ran in parallel: {elapsed:.2f}s")
"Lock Mechanism - Parallel (Different Workspaces)",
True,
f"Locks ran in parallel: {elapsed:.2f}s",
)
else:
results.add(
"Lock Mechanism - Parallel (Different Workspaces)",
False,
f"Locks blocked each other: {elapsed:.2f}s (expected < 1.0s)",
)
# Test 2.2: Same workspace should serialize # Test 2.2: Same workspace should serialize
print("\nTest 2.2: Same workspace locks should serialize") print("\nTest 2.2: Same workspace locks should serialize")
@ -204,29 +150,10 @@ async def test_lock_mechanism():
elapsed = time.time() - start elapsed = time.time() - start
# Same workspace should serialize, taking ~0.6s # Same workspace should serialize, taking ~0.6s
serial_ok = elapsed >= 0.5 assert elapsed >= 0.5, f"Locks didn't serialize: {elapsed:.2f}s (expected >= 0.5s)"
if serial_ok: print(f"✅ PASSED: Lock Mechanism - Serial (Same Workspace)")
results.add( print(f" Locks serialized correctly: {elapsed:.2f}s")
"Lock Mechanism - Serial (Same Workspace)",
True,
f"Locks serialized correctly: {elapsed:.2f}s",
)
else:
results.add(
"Lock Mechanism - Serial (Same Workspace)",
False,
f"Locks didn't serialize: {elapsed:.2f}s (expected >= 0.5s)",
)
return parallel_ok and serial_ok
except Exception as e:
results.add("Lock Mechanism", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -234,6 +161,7 @@ async def test_lock_mechanism():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_backward_compatibility(): async def test_backward_compatibility():
""" """
Test that legacy code without workspace parameter still works correctly. Test that legacy code without workspace parameter still works correctly.
@ -242,7 +170,6 @@ async def test_backward_compatibility():
print("TEST 3: Backward Compatibility") print("TEST 3: Backward Compatibility")
print("=" * 60) print("=" * 60)
try:
# Test 3.1: get_final_namespace with None should use default workspace # Test 3.1: get_final_namespace with None should use default workspace
print("\nTest 3.1: get_final_namespace with workspace=None") print("\nTest 3.1: get_final_namespace with workspace=None")
@ -250,20 +177,10 @@ async def test_backward_compatibility():
final_ns = get_final_namespace("pipeline_status", workspace=None) final_ns = get_final_namespace("pipeline_status", workspace=None)
expected = "my_default_workspace:pipeline_status" expected = "my_default_workspace:pipeline_status"
if final_ns == expected: assert final_ns == expected, f"Expected {expected}, got {final_ns}"
results.add(
"Backward Compatibility - get_final_namespace", print(f"✅ PASSED: Backward Compatibility - get_final_namespace")
True, print(f" Correctly uses default workspace: {final_ns}")
f"Correctly uses default workspace: {final_ns}",
)
compat_1_ok = True
else:
results.add(
"Backward Compatibility - get_final_namespace",
False,
f"Expected {expected}, got {final_ns}",
)
compat_1_ok = False
# Test 3.2: get_default_workspace # Test 3.2: get_default_workspace
print("\nTest 3.2: get/set default workspace") print("\nTest 3.2: get/set default workspace")
@ -271,20 +188,10 @@ async def test_backward_compatibility():
set_default_workspace("test_default") set_default_workspace("test_default")
retrieved = get_default_workspace() retrieved = get_default_workspace()
if retrieved == "test_default": assert retrieved == "test_default", f"Expected 'test_default', got {retrieved}"
results.add(
"Backward Compatibility - default workspace", print(f"✅ PASSED: Backward Compatibility - default workspace")
True, print(f" Default workspace set/get correctly: {retrieved}")
f"Default workspace set/get correctly: {retrieved}",
)
compat_2_ok = True
else:
results.add(
"Backward Compatibility - default workspace",
False,
f"Expected 'test_default', got {retrieved}",
)
compat_2_ok = False
# Test 3.3: Empty workspace handling # Test 3.3: Empty workspace handling
print("\nTest 3.3: Empty workspace handling") print("\nTest 3.3: Empty workspace handling")
@ -293,20 +200,10 @@ async def test_backward_compatibility():
final_ns_empty = get_final_namespace("pipeline_status", workspace=None) final_ns_empty = get_final_namespace("pipeline_status", workspace=None)
expected_empty = "pipeline_status" # Should be just the namespace without ':' expected_empty = "pipeline_status" # Should be just the namespace without ':'
if final_ns_empty == expected_empty: assert final_ns_empty == expected_empty, f"Expected '{expected_empty}', got '{final_ns_empty}'"
results.add(
"Backward Compatibility - empty workspace", print(f"✅ PASSED: Backward Compatibility - empty workspace")
True, print(f" Empty workspace handled correctly: '{final_ns_empty}'")
f"Empty workspace handled correctly: '{final_ns_empty}'",
)
compat_3_ok = True
else:
results.add(
"Backward Compatibility - empty workspace",
False,
f"Expected '{expected_empty}', got '{final_ns_empty}'",
)
compat_3_ok = False
# Test 3.4: None workspace with default set # Test 3.4: None workspace with default set
print("\nTest 3.4: initialize_pipeline_status with workspace=None") print("\nTest 3.4: initialize_pipeline_status with workspace=None")
@ -319,29 +216,10 @@ async def test_backward_compatibility():
"pipeline_status", workspace="compat_test_workspace" "pipeline_status", workspace="compat_test_workspace"
) )
if data is not None: assert data is not None, "Failed to initialize pipeline status with default workspace"
results.add(
"Backward Compatibility - pipeline init with None",
True,
"Pipeline status initialized with default workspace",
)
compat_4_ok = True
else:
results.add(
"Backward Compatibility - pipeline init with None",
False,
"Failed to initialize pipeline status with default workspace",
)
compat_4_ok = False
return compat_1_ok and compat_2_ok and compat_3_ok and compat_4_ok print(f"✅ PASSED: Backward Compatibility - pipeline init with None")
print(f" Pipeline status initialized with default workspace")
except Exception as e:
results.add("Backward Compatibility", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -349,6 +227,7 @@ async def test_backward_compatibility():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_multi_workspace_concurrency(): async def test_multi_workspace_concurrency():
""" """
Test that multiple workspaces can operate concurrently without interference. Test that multiple workspaces can operate concurrently without interference.
@ -358,7 +237,6 @@ async def test_multi_workspace_concurrency():
print("TEST 4: Multi-Workspace Concurrency") print("TEST 4: Multi-Workspace Concurrency")
print("=" * 60) print("=" * 60)
try:
initialize_share_data() initialize_share_data()
async def workspace_operations(workspace_id): async def workspace_operations(workspace_id):
@ -397,55 +275,25 @@ async def test_multi_workspace_concurrency():
print(f"\n All workspaces completed in {elapsed:.2f}s") print(f"\n All workspaces completed in {elapsed:.2f}s")
# Verify all workspaces completed # Verify all workspaces completed
if set(results_list) == set(workspaces): assert set(results_list) == set(workspaces), "Not all workspaces completed"
results.add(
"Multi-Workspace Concurrency - Execution", print(f"✅ PASSED: Multi-Workspace Concurrency - Execution")
True, print(f" All {len(workspaces)} workspaces completed successfully in {elapsed:.2f}s")
f"All {len(workspaces)} workspaces completed successfully in {elapsed:.2f}s",
)
exec_ok = True
else:
results.add(
"Multi-Workspace Concurrency - Execution",
False,
f"Not all workspaces completed",
)
exec_ok = False
# Verify data isolation - each workspace should have its own data # Verify data isolation - each workspace should have its own data
print("\n Verifying data isolation...") print("\n Verifying data isolation...")
isolation_ok = True
for ws in workspaces: for ws in workspaces:
data = await get_namespace_data("pipeline_status", workspace=ws) data = await get_namespace_data("pipeline_status", workspace=ws)
expected_key = f"{ws}_key" expected_key = f"{ws}_key"
expected_value = f"{ws}_value" expected_value = f"{ws}_value"
if expected_key not in data or data[expected_key] != expected_value: assert expected_key in data, f"Data not properly isolated for {ws}: missing {expected_key}"
results.add( assert data[expected_key] == expected_value, f"Data not properly isolated for {ws}: {expected_key}={data[expected_key]} (expected {expected_value})"
f"Multi-Workspace Concurrency - Data Isolation ({ws})",
False,
f"Data not properly isolated for {ws}",
)
isolation_ok = False
else:
print(f" [{ws}] Data correctly isolated: {expected_key}={data[expected_key]}") print(f" [{ws}] Data correctly isolated: {expected_key}={data[expected_key]}")
if isolation_ok: print(f"✅ PASSED: Multi-Workspace Concurrency - Data Isolation")
results.add( print(f" All workspaces have properly isolated data")
"Multi-Workspace Concurrency - Data Isolation",
True,
"All workspaces have properly isolated data",
)
return exec_ok and isolation_ok
except Exception as e:
results.add("Multi-Workspace Concurrency", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -453,6 +301,7 @@ async def test_multi_workspace_concurrency():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_namespace_lock_reentrance(): async def test_namespace_lock_reentrance():
""" """
Test that NamespaceLock prevents re-entrance in the same coroutine Test that NamespaceLock prevents re-entrance in the same coroutine
@ -462,7 +311,6 @@ async def test_namespace_lock_reentrance():
print("TEST 5: NamespaceLock Re-entrance Protection") print("TEST 5: NamespaceLock Re-entrance Protection")
print("=" * 60) print("=" * 60)
try:
# Test 5.1: Same coroutine re-entrance should fail # Test 5.1: Same coroutine re-entrance should fail
print("\nTest 5.1: Same coroutine re-entrance should raise RuntimeError") print("\nTest 5.1: Same coroutine re-entrance should raise RuntimeError")
@ -480,20 +328,12 @@ async def test_namespace_lock_reentrance():
print(f" ✓ Re-entrance correctly blocked: {e}") print(f" ✓ Re-entrance correctly blocked: {e}")
reentrance_failed_correctly = True reentrance_failed_correctly = True
else: else:
print(f" ✗ Unexpected RuntimeError: {e}") raise
if reentrance_failed_correctly: assert reentrance_failed_correctly, "Re-entrance protection not working"
results.add(
"NamespaceLock Re-entrance Protection", print(f"✅ PASSED: NamespaceLock Re-entrance Protection")
True, print(f" Re-entrance correctly raises RuntimeError")
"Re-entrance correctly raises RuntimeError",
)
else:
results.add(
"NamespaceLock Re-entrance Protection",
False,
"Re-entrance protection not working",
)
# Test 5.2: Same NamespaceLock instance in different coroutines should succeed # Test 5.2: Same NamespaceLock instance in different coroutines should succeed
print("\nTest 5.2: Same NamespaceLock instance in different coroutines") print("\nTest 5.2: Same NamespaceLock instance in different coroutines")
@ -516,28 +356,10 @@ async def test_namespace_lock_reentrance():
# Both coroutines should have completed # Both coroutines should have completed
expected_entries = 4 # 2 starts + 2 ends expected_entries = 4 # 2 starts + 2 ends
if len(concurrent_results) == expected_entries: assert len(concurrent_results) == expected_entries, f"Expected {expected_entries} entries, got {len(concurrent_results)}"
results.add(
"NamespaceLock Concurrent Reuse",
True,
f"Same NamespaceLock instance used successfully in {expected_entries//2} concurrent coroutines",
)
concurrent_ok = True
else:
results.add(
"NamespaceLock Concurrent Reuse",
False,
f"Expected {expected_entries} entries, got {len(concurrent_results)}",
)
concurrent_ok = False
return reentrance_failed_correctly and concurrent_ok print(f"✅ PASSED: NamespaceLock Concurrent Reuse")
print(f" Same NamespaceLock instance used successfully in {expected_entries//2} concurrent coroutines")
except Exception as e:
results.add("NamespaceLock Re-entrance Protection", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -545,6 +367,7 @@ async def test_namespace_lock_reentrance():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_different_namespace_lock_isolation(): async def test_different_namespace_lock_isolation():
""" """
Test that locks for different namespaces (same workspace) are independent. Test that locks for different namespaces (same workspace) are independent.
@ -553,7 +376,6 @@ async def test_different_namespace_lock_isolation():
print("TEST 6: Different Namespace Lock Isolation") print("TEST 6: Different Namespace Lock Isolation")
print("=" * 60) print("=" * 60)
try:
print("\nTesting locks with same workspace but different namespaces") print("\nTesting locks with same workspace but different namespaces")
async def acquire_lock_timed(workspace, namespace, hold_time, name): async def acquire_lock_timed(workspace, namespace, hold_time, name):
@ -575,28 +397,10 @@ async def test_different_namespace_lock_isolation():
elapsed = time.time() - start elapsed = time.time() - start
# If locks are properly isolated by namespace, this should take ~0.5s (parallel) # If locks are properly isolated by namespace, this should take ~0.5s (parallel)
namespace_isolation_ok = elapsed < 1.0 assert elapsed < 1.0, f"Different namespace locks blocked each other: {elapsed:.2f}s (expected < 1.0s)"
if namespace_isolation_ok: print(f"✅ PASSED: Different Namespace Lock Isolation")
results.add( print(f" Different namespace locks ran in parallel: {elapsed:.2f}s")
"Different Namespace Lock Isolation",
True,
f"Different namespace locks ran in parallel: {elapsed:.2f}s",
)
else:
results.add(
"Different Namespace Lock Isolation",
False,
f"Different namespace locks blocked each other: {elapsed:.2f}s (expected < 1.0s)",
)
return namespace_isolation_ok
except Exception as e:
results.add("Different Namespace Lock Isolation", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -604,6 +408,7 @@ async def test_different_namespace_lock_isolation():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_error_handling(): async def test_error_handling():
""" """
Test error handling for invalid workspace configurations. Test error handling for invalid workspace configurations.
@ -612,7 +417,6 @@ async def test_error_handling():
print("TEST 7: Error Handling") print("TEST 7: Error Handling")
print("=" * 60) print("=" * 60)
try:
# Test 7.1: set_default_workspace(None) converts to empty string # Test 7.1: set_default_workspace(None) converts to empty string
print("\nTest 7.1: set_default_workspace(None) converts to empty string") print("\nTest 7.1: set_default_workspace(None) converts to empty string")
@ -620,58 +424,31 @@ async def test_error_handling():
default_ws = get_default_workspace() default_ws = get_default_workspace()
# Should convert None to "" automatically # Should convert None to "" automatically
conversion_ok = default_ws == "" assert default_ws == "", f"Expected empty string, got: '{default_ws}'"
if conversion_ok: print(f"✅ PASSED: Error Handling - None to Empty String")
results.add( print(f" set_default_workspace(None) correctly converts to empty string: '{default_ws}'")
"Error Handling - None to Empty String",
True,
f"set_default_workspace(None) correctly converts to empty string: '{default_ws}'",
)
else:
results.add(
"Error Handling - None to Empty String",
False,
f"Expected empty string, got: '{default_ws}'",
)
# Test 7.2: Empty string workspace behavior # Test 7.2: Empty string workspace behavior
print("\nTest 7.2: Empty string workspace creates valid namespace") print("\nTest 7.2: Empty string workspace creates valid namespace")
# With empty workspace, should create namespace without colon # With empty workspace, should create namespace without colon
final_ns = get_final_namespace("test_namespace", workspace="") final_ns = get_final_namespace("test_namespace", workspace="")
namespace_ok = final_ns == "test_namespace" assert final_ns == "test_namespace", f"Unexpected namespace: '{final_ns}'"
if namespace_ok: print(f"✅ PASSED: Error Handling - Empty Workspace Namespace")
results.add( print(f" Empty workspace creates valid namespace: '{final_ns}'")
"Error Handling - Empty Workspace Namespace",
True,
f"Empty workspace creates valid namespace: '{final_ns}'",
)
else:
results.add(
"Error Handling - Empty Workspace Namespace",
False,
f"Unexpected namespace: '{final_ns}'",
)
# Restore default workspace for other tests # Restore default workspace for other tests
set_default_workspace("") set_default_workspace("")
return conversion_ok and namespace_ok
except Exception as e:
results.add("Error Handling", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
# Test 8: Update Flags Workspace Isolation # Test 8: Update Flags Workspace Isolation
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_update_flags_workspace_isolation(): async def test_update_flags_workspace_isolation():
""" """
Test that update flags are properly isolated between workspaces. Test that update flags are properly isolated between workspaces.
@ -680,7 +457,6 @@ async def test_update_flags_workspace_isolation():
print("TEST 8: Update Flags Workspace Isolation") print("TEST 8: Update Flags Workspace Isolation")
print("=" * 60) print("=" * 60)
try:
initialize_share_data() initialize_share_data()
workspace1 = "update_flags_ws1" workspace1 = "update_flags_ws1"
@ -699,26 +475,18 @@ async def test_update_flags_workspace_isolation():
flag2_obj = await get_update_flag(test_namespace, workspace=workspace2) flag2_obj = await get_update_flag(test_namespace, workspace=workspace2)
# Initial state should be False # Initial state should be False
initial_ok = flag1_obj.value is False and flag2_obj.value is False assert flag1_obj.value is False, "Flag1 initial value should be False"
assert flag2_obj.value is False, "Flag2 initial value should be False"
# Set all flags for workspace1 # Set all flags for workspace1
await set_all_update_flags(test_namespace, workspace=workspace1) await set_all_update_flags(test_namespace, workspace=workspace1)
# Check that only workspace1's flags are set # Check that only workspace1's flags are set
set_flags_isolated = flag1_obj.value is True and flag2_obj.value is False assert flag1_obj.value is True, f"Flag1 should be True after set_all_update_flags, got {flag1_obj.value}"
assert flag2_obj.value is False, f"Flag2 should still be False, got {flag2_obj.value}"
if set_flags_isolated: print(f"✅ PASSED: Update Flags - set_all_update_flags Isolation")
results.add( print(f" set_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}")
"Update Flags - set_all_update_flags Isolation",
True,
f"set_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
)
else:
results.add(
"Update Flags - set_all_update_flags Isolation",
False,
f"Flags not isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
)
# Test 8.2: clear_all_update_flags isolation # Test 8.2: clear_all_update_flags isolation
print("\nTest 8.2: clear_all_update_flags workspace isolation") print("\nTest 8.2: clear_all_update_flags workspace isolation")
@ -728,26 +496,18 @@ async def test_update_flags_workspace_isolation():
await set_all_update_flags(test_namespace, workspace=workspace2) await set_all_update_flags(test_namespace, workspace=workspace2)
# Verify both are set # Verify both are set
both_set = flag1_obj.value is True and flag2_obj.value is True assert flag1_obj.value is True, "Flag1 should be True"
assert flag2_obj.value is True, "Flag2 should be True"
# Clear only workspace1 # Clear only workspace1
await clear_all_update_flags(test_namespace, workspace=workspace1) await clear_all_update_flags(test_namespace, workspace=workspace1)
# Check that only workspace1's flags are cleared # Check that only workspace1's flags are cleared
clear_flags_isolated = flag1_obj.value is False and flag2_obj.value is True assert flag1_obj.value is False, f"Flag1 should be False after clear, got {flag1_obj.value}"
assert flag2_obj.value is True, f"Flag2 should still be True, got {flag2_obj.value}"
if clear_flags_isolated: print(f"✅ PASSED: Update Flags - clear_all_update_flags Isolation")
results.add( print(f" clear_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}")
"Update Flags - clear_all_update_flags Isolation",
True,
f"clear_all_update_flags isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
)
else:
results.add(
"Update Flags - clear_all_update_flags Isolation",
False,
f"Flags not isolated: ws1={flag1_obj.value}, ws2={flag2_obj.value}",
)
# Test 8.3: get_all_update_flags_status workspace filtering # Test 8.3: get_all_update_flags_status workspace filtering
print("\nTest 8.3: get_all_update_flags_status workspace filtering") print("\nTest 8.3: get_all_update_flags_status workspace filtering")
@ -772,28 +532,11 @@ async def test_update_flags_workspace_isolation():
workspace1_keys = [k for k in status1.keys() if workspace1 in k] workspace1_keys = [k for k in status1.keys() if workspace1 in k]
workspace2_keys = [k for k in status1.keys() if workspace2 in k] workspace2_keys = [k for k in status1.keys() if workspace2 in k]
status_filtered = len(workspace1_keys) > 0 and len(workspace2_keys) == 0 assert len(workspace1_keys) > 0, f"workspace1 keys should be present, got {len(workspace1_keys)}"
assert len(workspace2_keys) == 0, f"workspace2 keys should not be present, got {len(workspace2_keys)}"
if status_filtered: print(f"✅ PASSED: Update Flags - get_all_update_flags_status Filtering")
results.add( print(f" Status correctly filtered: ws1 keys={len(workspace1_keys)}, ws2 keys={len(workspace2_keys)}")
"Update Flags - get_all_update_flags_status Filtering",
True,
f"Status correctly filtered: ws1 keys={len(workspace1_keys)}, ws2 keys={len(workspace2_keys)}",
)
else:
results.add(
"Update Flags - get_all_update_flags_status Filtering",
False,
f"Status not filtered correctly: ws1 keys={len(workspace1_keys)}, ws2 keys={len(workspace2_keys)}",
)
return set_flags_isolated and clear_flags_isolated and status_filtered
except Exception as e:
results.add("Update Flags Workspace Isolation", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -801,6 +544,7 @@ async def test_update_flags_workspace_isolation():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_empty_workspace_standardization(): async def test_empty_workspace_standardization():
""" """
Test that empty workspace is properly standardized to "" instead of "_". Test that empty workspace is properly standardized to "" instead of "_".
@ -809,7 +553,6 @@ async def test_empty_workspace_standardization():
print("TEST 9: Empty Workspace Standardization") print("TEST 9: Empty Workspace Standardization")
print("=" * 60) print("=" * 60)
try:
# Test 9.1: Empty string workspace creates namespace without colon # Test 9.1: Empty string workspace creates namespace without colon
print("\nTest 9.1: Empty string workspace namespace format") print("\nTest 9.1: Empty string workspace namespace format")
@ -817,20 +560,10 @@ async def test_empty_workspace_standardization():
final_ns = get_final_namespace("test_namespace", workspace=None) final_ns = get_final_namespace("test_namespace", workspace=None)
# Should be just "test_namespace" without colon prefix # Should be just "test_namespace" without colon prefix
empty_ws_ok = final_ns == "test_namespace" assert final_ns == "test_namespace", f"Unexpected namespace format: '{final_ns}' (expected 'test_namespace')"
if empty_ws_ok: print(f"✅ PASSED: Empty Workspace Standardization - Format")
results.add( print(f" Empty workspace creates correct namespace: '{final_ns}'")
"Empty Workspace Standardization - Format",
True,
f"Empty workspace creates correct namespace: '{final_ns}'",
)
else:
results.add(
"Empty Workspace Standardization - Format",
False,
f"Unexpected namespace format: '{final_ns}' (expected 'test_namespace')",
)
# Test 9.2: Empty workspace vs non-empty workspace behavior # Test 9.2: Empty workspace vs non-empty workspace behavior
print("\nTest 9.2: Empty vs non-empty workspace behavior") print("\nTest 9.2: Empty vs non-empty workspace behavior")
@ -846,28 +579,10 @@ async def test_empty_workspace_standardization():
data_nonempty = await get_namespace_data("pipeline_status", workspace="test_ws") data_nonempty = await get_namespace_data("pipeline_status", workspace="test_ws")
# They should be different objects # They should be different objects
behavior_ok = data_empty is not data_nonempty assert data_empty is not data_nonempty, "Empty and non-empty workspaces share data (should be independent)"
if behavior_ok: print(f"✅ PASSED: Empty Workspace Standardization - Behavior")
results.add( print(f" Empty and non-empty workspaces have independent data")
"Empty Workspace Standardization - Behavior",
True,
"Empty and non-empty workspaces have independent data",
)
else:
results.add(
"Empty Workspace Standardization - Behavior",
False,
"Empty and non-empty workspaces share data (should be independent)",
)
return empty_ws_ok and behavior_ok
except Exception as e:
results.add("Empty Workspace Standardization", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
# ============================================================================= # =============================================================================
@ -875,6 +590,7 @@ async def test_empty_workspace_standardization():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_json_kv_storage_workspace_isolation(): async def test_json_kv_storage_workspace_isolation():
""" """
Integration test: Verify JsonKVStorage properly isolates data between workspaces. Integration test: Verify JsonKVStorage properly isolates data between workspaces.
@ -962,31 +678,19 @@ async def test_json_kv_storage_workspace_isolation():
print(f" Storage2 entity2: {result2_entity2}") print(f" Storage2 entity2: {result2_entity2}")
# Verify isolation (get_by_id returns dict) # Verify isolation (get_by_id returns dict)
isolated = ( assert result1_entity1 is not None, "Storage1 entity1 should not be None"
result1_entity1 is not None assert result1_entity2 is not None, "Storage1 entity2 should not be None"
and result1_entity2 is not None assert result2_entity1 is not None, "Storage2 entity1 should not be None"
and result2_entity1 is not None assert result2_entity2 is not None, "Storage2 entity2 should not be None"
and result2_entity2 is not None assert result1_entity1.get("content") == "Data from workspace1 - AI Research", f"Storage1 entity1 content mismatch"
and result1_entity1.get("content") == "Data from workspace1 - AI Research" assert result1_entity2.get("content") == "Data from workspace1 - Machine Learning", f"Storage1 entity2 content mismatch"
and result1_entity2.get("content") == "Data from workspace1 - Machine Learning" assert result2_entity1.get("content") == "Data from workspace2 - Deep Learning", f"Storage2 entity1 content mismatch"
and result2_entity1.get("content") == "Data from workspace2 - Deep Learning" assert result2_entity2.get("content") == "Data from workspace2 - Neural Networks", f"Storage2 entity2 content mismatch"
and result2_entity2.get("content") == "Data from workspace2 - Neural Networks" assert result1_entity1.get("content") != result2_entity1.get("content"), "Storage1 and Storage2 entity1 should have different content"
and result1_entity1.get("content") != result2_entity1.get("content") assert result1_entity2.get("content") != result2_entity2.get("content"), "Storage1 and Storage2 entity2 should have different content"
and result1_entity2.get("content") != result2_entity2.get("content")
)
if isolated: print(f"✅ PASSED: JsonKVStorage - Data Isolation")
results.add( print(f" Two storage instances correctly isolated: ws1 and ws2 have different data")
"JsonKVStorage - Data Isolation",
True,
f"Two storage instances correctly isolated: ws1 and ws2 have different data",
)
else:
results.add(
"JsonKVStorage - Data Isolation",
False,
f"Data not properly isolated between workspaces",
)
# Test 10.4: Verify file structure # Test 10.4: Verify file structure
print("\nTest 10.4: Verify file structure") print("\nTest 10.4: Verify file structure")
@ -999,28 +703,12 @@ async def test_json_kv_storage_workspace_isolation():
print(f" workspace1 directory exists: {ws1_exists}") print(f" workspace1 directory exists: {ws1_exists}")
print(f" workspace2 directory exists: {ws2_exists}") print(f" workspace2 directory exists: {ws2_exists}")
if ws1_exists and ws2_exists: assert ws1_exists, "workspace1 directory should exist"
results.add( assert ws2_exists, "workspace2 directory should exist"
"JsonKVStorage - File Structure",
True,
f"Workspace directories correctly created: {ws1_dir} and {ws2_dir}",
)
file_structure_ok = True
else:
results.add(
"JsonKVStorage - File Structure",
False,
f"Workspace directories not created properly",
)
file_structure_ok = False
return isolated and file_structure_ok print(f"✅ PASSED: JsonKVStorage - File Structure")
print(f" Workspace directories correctly created: {ws1_dir} and {ws2_dir}")
except Exception as e:
results.add("JsonKVStorage Workspace Isolation", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
finally: finally:
# Cleanup test directory # Cleanup test directory
if os.path.exists(test_dir): if os.path.exists(test_dir):
@ -1033,6 +721,7 @@ async def test_json_kv_storage_workspace_isolation():
# ============================================================================= # =============================================================================
@pytest.mark.asyncio
async def test_lightrag_end_to_end_workspace_isolation(): async def test_lightrag_end_to_end_workspace_isolation():
""" """
End-to-end test: Create two LightRAG instances with different workspaces, End-to-end test: Create two LightRAG instances with different workspaces,
@ -1052,8 +741,13 @@ async def test_lightrag_end_to_end_workspace_isolation():
async def mock_llm_func( async def mock_llm_func(
prompt, system_prompt=None, history_messages=[], **kwargs prompt, system_prompt=None, history_messages=[], **kwargs
) -> str: ) -> str:
# Return a mock response that simulates entity extraction # Return a mock response that simulates entity extraction in the correct format
return """{"entities": [{"name": "Test Entity", "type": "Concept"}], "relationships": []}""" # Format: entity<|#|>entity_name<|#|>entity_type<|#|>entity_description
# Format: relation<|#|>source_entity<|#|>target_entity<|#|>keywords<|#|>description
return """entity<|#|>Artificial Intelligence<|#|>concept<|#|>AI is a field of computer science focused on creating intelligent machines.
entity<|#|>Machine Learning<|#|>concept<|#|>Machine Learning is a subset of AI that enables systems to learn from data.
relation<|#|>Machine Learning<|#|>Artificial Intelligence<|#|>subset, related field<|#|>Machine Learning is a key component and subset of Artificial Intelligence.
<|COMPLETE|>"""
# Mock embedding function # Mock embedding function
async def mock_embedding_func(texts: list[str]) -> np.ndarray: async def mock_embedding_func(texts: list[str]) -> np.ndarray:
@ -1122,7 +816,9 @@ async def test_lightrag_end_to_end_workspace_isolation():
print(f" project_b directory: {project_b_dir}") print(f" project_b directory: {project_b_dir}")
print(f" project_b exists: {project_b_exists}") print(f" project_b exists: {project_b_exists}")
if project_a_exists and project_b_exists: assert project_a_exists, "project_a directory should exist"
assert project_b_exists, "project_b directory should exist"
# List files in each directory # List files in each directory
print(f"\n Files in project_a/:") print(f"\n Files in project_a/:")
for file in sorted(project_a_dir.glob("*")): for file in sorted(project_a_dir.glob("*")):
@ -1136,19 +832,8 @@ async def test_lightrag_end_to_end_workspace_isolation():
size = file.stat().st_size size = file.stat().st_size
print(f" - {file.name} ({size} bytes)") print(f" - {file.name} ({size} bytes)")
results.add( print(f"✅ PASSED: LightRAG E2E - File Structure")
"LightRAG E2E - File Structure", print(f" Workspace directories correctly created and separated")
True,
f"Workspace directories correctly created and separated",
)
structure_ok = True
else:
results.add(
"LightRAG E2E - File Structure",
False,
f"Workspace directories not created properly",
)
structure_ok = False
# Test 11.4: Verify data isolation by checking file contents # Test 11.4: Verify data isolation by checking file contents
print("\nTest 11.4: Verify data isolation (check file contents)") print("\nTest 11.4: Verify data isolation (check file contents)")
@ -1170,93 +855,37 @@ async def test_lightrag_end_to_end_workspace_isolation():
print(f" project_b doc count: {len(docs_b_content)}") print(f" project_b doc count: {len(docs_b_content)}")
# Verify they contain different data # Verify they contain different data
docs_isolated = docs_a_content != docs_b_content assert docs_a_content != docs_b_content, "Document storage not properly isolated"
if docs_isolated: # Verify each workspace contains its own text content
results.add( docs_a_str = json.dumps(docs_a_content)
"LightRAG E2E - Data Isolation", docs_b_str = json.dumps(docs_b_content)
True,
"Document storage correctly isolated between workspaces",
)
else:
results.add(
"LightRAG E2E - Data Isolation",
False,
"Document storage not properly isolated",
)
data_ok = docs_isolated # Check project_a contains its text and NOT project_b's text
assert "Artificial Intelligence" in docs_a_str, "project_a should contain 'Artificial Intelligence'"
assert "Machine Learning" in docs_a_str, "project_a should contain 'Machine Learning'"
assert "Deep Learning" not in docs_a_str, "project_a should NOT contain 'Deep Learning' from project_b"
assert "Neural Networks" not in docs_a_str, "project_a should NOT contain 'Neural Networks' from project_b"
# Check project_b contains its text and NOT project_a's text
assert "Deep Learning" in docs_b_str, "project_b should contain 'Deep Learning'"
assert "Neural Networks" in docs_b_str, "project_b should contain 'Neural Networks'"
assert "Artificial Intelligence" not in docs_b_str, "project_b should NOT contain 'Artificial Intelligence' from project_a"
# Note: "Machine Learning" might appear in project_b's text, so we skip that check
print(f"✅ PASSED: LightRAG E2E - Data Isolation")
print(f" Document storage correctly isolated between workspaces")
print(f" project_a contains only its own data")
print(f" project_b contains only its own data")
else: else:
print(f" Document storage files not found (may not be created yet)") print(f" Document storage files not found (may not be created yet)")
results.add( print(f"✅ PASSED: LightRAG E2E - Data Isolation")
"LightRAG E2E - Data Isolation", print(f" Skipped file content check (files not created)")
True,
"Skipped file content check (files not created)",
)
data_ok = True
print(f"\n ✓ Test complete - workspace isolation verified at E2E level") print(f"\n ✓ Test complete - workspace isolation verified at E2E level")
return structure_ok and data_ok
except Exception as e:
results.add("LightRAG E2E Workspace Isolation", False, f"Exception: {str(e)}")
import traceback
traceback.print_exc()
return False
finally: finally:
# Cleanup test directory # Cleanup test directory
if os.path.exists(test_dir): if os.path.exists(test_dir):
shutil.rmtree(test_dir) shutil.rmtree(test_dir)
print(f"\n Cleaned up test directory: {test_dir}") print(f"\n Cleaned up test directory: {test_dir}")
# =============================================================================
# Main Test Runner
# =============================================================================
async def main():
"""Run all tests"""
print("\n")
print("" + "" * 58 + "")
print("" + " " * 10 + "Workspace Isolation Test Suite" + " " * 18 + "")
print("" + " " * 15 + "PR #2366 - Complete Coverage" + " " * 15 + "")
print("" + "" * 58 + "")
# Run all tests (ordered by priority)
# Core PR requirements (Tests 1-4)
await test_pipeline_status_isolation()
await test_lock_mechanism()
await test_backward_compatibility()
await test_multi_workspace_concurrency()
# Additional comprehensive tests (Tests 5-9)
await test_namespace_lock_reentrance()
await test_different_namespace_lock_isolation()
await test_error_handling()
await test_update_flags_workspace_isolation()
await test_empty_workspace_standardization()
# Integration and E2E tests (Tests 10-11)
print("\n" + "=" * 60)
print("INTEGRATION & END-TO-END TESTS")
print("=" * 60)
await test_json_kv_storage_workspace_isolation()
await test_lightrag_end_to_end_workspace_isolation()
# Print summary
all_passed = results.summary()
if all_passed:
print("\n🎉 All tests passed! The workspace isolation feature is working correctly.")
print(" Coverage: 100% - Unit, Integration, and E2E validated")
return 0
else:
print("\n⚠️ Some tests failed. Please review the results above.")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
exit(exit_code)