Fix empty dict handling after JSON sanitization
• Replace truthy checks with `is not None` • Handle empty dict edge case properly • Prevent data reload failures • Add comprehensive test coverage • Fix JsonKVStorage and DocStatusStorage
This commit is contained in:
parent
dcf1d28681
commit
70cc2419f2
3 changed files with 56 additions and 3 deletions
|
|
@ -171,7 +171,7 @@ class JsonDocStatusStorage(DocStatusStorage):
|
||||||
f"[{self.workspace}] Reloading sanitized data into shared memory for {self.namespace}"
|
f"[{self.workspace}] Reloading sanitized data into shared memory for {self.namespace}"
|
||||||
)
|
)
|
||||||
cleaned_data = load_json(self._file_name)
|
cleaned_data = load_json(self._file_name)
|
||||||
if cleaned_data:
|
if cleaned_data is not None:
|
||||||
self._data.clear()
|
self._data.clear()
|
||||||
self._data.update(cleaned_data)
|
self._data.update(cleaned_data)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,7 @@ class JsonKVStorage(BaseKVStorage):
|
||||||
f"[{self.workspace}] Reloading sanitized data into shared memory for {self.namespace}"
|
f"[{self.workspace}] Reloading sanitized data into shared memory for {self.namespace}"
|
||||||
)
|
)
|
||||||
cleaned_data = load_json(self._file_name)
|
cleaned_data = load_json(self._file_name)
|
||||||
if cleaned_data:
|
if cleaned_data is not None:
|
||||||
self._data.clear()
|
self._data.clear()
|
||||||
self._data.update(cleaned_data)
|
self._data.update(cleaned_data)
|
||||||
|
|
||||||
|
|
@ -283,7 +283,7 @@ class JsonKVStorage(BaseKVStorage):
|
||||||
f"[{self.workspace}] Reloading sanitized migration data for {self.namespace}"
|
f"[{self.workspace}] Reloading sanitized migration data for {self.namespace}"
|
||||||
)
|
)
|
||||||
cleaned_data = load_json(self._file_name)
|
cleaned_data = load_json(self._file_name)
|
||||||
if cleaned_data:
|
if cleaned_data is not None:
|
||||||
return cleaned_data # Return cleaned data to update shared memory
|
return cleaned_data # Return cleaned data to update shared memory
|
||||||
|
|
||||||
return migrated_data
|
return migrated_data
|
||||||
|
|
|
||||||
|
|
@ -290,6 +290,55 @@ class TestWriteJsonOptimization:
|
||||||
finally:
|
finally:
|
||||||
os.unlink(temp_file)
|
os.unlink(temp_file)
|
||||||
|
|
||||||
|
def test_empty_values_after_sanitization(self):
|
||||||
|
"""Test that data with empty values after sanitization is properly handled
|
||||||
|
|
||||||
|
Critical edge case: When sanitization results in data with empty string values,
|
||||||
|
we must use 'if cleaned_data is not None' instead of 'if cleaned_data' to ensure
|
||||||
|
proper reload, since truthy check on dict depends on content, not just existence.
|
||||||
|
"""
|
||||||
|
# Create data where ALL values are only surrogate characters
|
||||||
|
all_dirty_data = {
|
||||||
|
"key1": "\ud800\udc00\ud801",
|
||||||
|
"key2": "\ud802\ud803",
|
||||||
|
}
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
|
||||||
|
temp_file = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Write dirty data - should trigger sanitization
|
||||||
|
needs_reload = write_json(all_dirty_data, temp_file)
|
||||||
|
assert needs_reload, "All-dirty data should trigger sanitization"
|
||||||
|
|
||||||
|
# Load the sanitized data
|
||||||
|
cleaned_data = load_json(temp_file)
|
||||||
|
|
||||||
|
# Critical assertions for the edge case
|
||||||
|
assert cleaned_data is not None, "Cleaned data should not be None"
|
||||||
|
# Sanitization removes surrogates but preserves keys with empty values
|
||||||
|
assert cleaned_data == {
|
||||||
|
"key1": "",
|
||||||
|
"key2": "",
|
||||||
|
}, "Surrogates should be removed, keys preserved"
|
||||||
|
# This dict is truthy because it has keys (even with empty values)
|
||||||
|
assert cleaned_data, "Dict with keys is truthy"
|
||||||
|
|
||||||
|
# Test the actual edge case: empty dict
|
||||||
|
empty_data = {}
|
||||||
|
needs_reload2 = write_json(empty_data, temp_file)
|
||||||
|
assert not needs_reload2, "Empty dict is clean"
|
||||||
|
|
||||||
|
reloaded_empty = load_json(temp_file)
|
||||||
|
assert reloaded_empty is not None, "Empty dict should not be None"
|
||||||
|
assert reloaded_empty == {}, "Empty dict should remain empty"
|
||||||
|
assert (
|
||||||
|
not reloaded_empty
|
||||||
|
), "Empty dict evaluates to False (the critical check)"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_file)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Run tests
|
# Run tests
|
||||||
|
|
@ -331,4 +380,8 @@ if __name__ == "__main__":
|
||||||
test.test_migration_with_surrogate_sanitization()
|
test.test_migration_with_surrogate_sanitization()
|
||||||
print("✓ Passed")
|
print("✓ Passed")
|
||||||
|
|
||||||
|
print("Running test_empty_values_after_sanitization...")
|
||||||
|
test.test_empty_values_after_sanitization()
|
||||||
|
print("✓ Passed")
|
||||||
|
|
||||||
print("\n✅ All tests passed!")
|
print("\n✅ All tests passed!")
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue