From a75efb06dcb6a783bfb39e039d2f4de53e162754 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 9 Nov 2025 00:02:19 +0800 Subject: [PATCH] Fix: prevent source data corruption by target upsert function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Prevent mutations bugs by using copy() when storing cache values • Protect filtered cache data and ensure batch data isolation --- lightrag/tools/migrate_llm_cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lightrag/tools/migrate_llm_cache.py b/lightrag/tools/migrate_llm_cache.py index 942f244c..b0d4823c 100644 --- a/lightrag/tools/migrate_llm_cache.py +++ b/lightrag/tools/migrate_llm_cache.py @@ -300,7 +300,7 @@ class MigrationTool: if key.startswith("default:extract:") or key.startswith( "default:summary:" ): - filtered[key] = value + filtered[key] = value.copy() return filtered async def get_default_caches_redis( @@ -475,7 +475,7 @@ class MigrationTool: for field_name in ["namespace", "workspace", "_id", "content"]: doc_copy.pop(field_name, None) - cache_data[key] = doc_copy + cache_data[key] = doc_copy.copy() # Periodically yield control (every batch_size documents) if len(cache_data) % batch_size == 0: @@ -660,7 +660,7 @@ class MigrationTool: # Now iterate over snapshot without holding lock batch = {} for key, value in matching_items: - batch[key] = value + batch[key] = value.copy() if len(batch) >= batch_size: yield batch batch = {} @@ -821,7 +821,7 @@ class MigrationTool: for field_name in ["namespace", "workspace", "_id", "content"]: doc_copy.pop(field_name, None) - batch[key] = doc_copy + batch[key] = doc_copy.copy() if len(batch) >= batch_size: yield batch