Fix: prevent source data corruption by target upsert function

• Prevent mutations bugs by using copy() when storing cache values
• Protect filtered cache data and ensure batch data isolation
This commit is contained in:
yangdx 2025-11-09 00:02:19 +08:00
parent 987bc09cab
commit a75efb06dc

View file

@ -300,7 +300,7 @@ class MigrationTool:
if key.startswith("default:extract:") or key.startswith(
"default:summary:"
):
filtered[key] = value
filtered[key] = value.copy()
return filtered
async def get_default_caches_redis(
@ -475,7 +475,7 @@ class MigrationTool:
for field_name in ["namespace", "workspace", "_id", "content"]:
doc_copy.pop(field_name, None)
cache_data[key] = doc_copy
cache_data[key] = doc_copy.copy()
# Periodically yield control (every batch_size documents)
if len(cache_data) % batch_size == 0:
@ -660,7 +660,7 @@ class MigrationTool:
# Now iterate over snapshot without holding lock
batch = {}
for key, value in matching_items:
batch[key] = value
batch[key] = value.copy()
if len(batch) >= batch_size:
yield batch
batch = {}
@ -821,7 +821,7 @@ class MigrationTool:
for field_name in ["namespace", "workspace", "_id", "content"]:
doc_copy.pop(field_name, None)
batch[key] = doc_copy
batch[key] = doc_copy.copy()
if len(batch) >= batch_size:
yield batch