From abeaac84fa7fb38c6eed6cd6ce197e6886a3da92 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 12 Nov 2025 00:50:18 +0800 Subject: [PATCH] Improve JSON data sanitization to handle tuples and dict keys - Sanitize dictionary keys - Preserve tuple types - Handle nested structures better --- lightrag/utils.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index 7232a91c..4bfd20f2 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -961,19 +961,34 @@ def _sanitize_string_for_json(text: str) -> str: def _sanitize_json_data(data: Any) -> Any: """Recursively sanitize all string values in data structure for safe UTF-8 encoding + Handles all JSON-serializable types including: + - Dictionary keys and values + - Lists and tuples (preserves type) + - Nested structures + - Strings at any level + Args: - data: Data to sanitize (dict, list, str, or other types) + data: Data to sanitize (dict, list, tuple, str, or other types) Returns: Sanitized data with all strings cleaned of problematic characters """ if isinstance(data, dict): - return {k: _sanitize_json_data(v) for k, v in data.items()} - elif isinstance(data, list): - return [_sanitize_json_data(item) for item in data] + # Sanitize both keys and values + return { + _sanitize_string_for_json(k) + if isinstance(k, str) + else k: _sanitize_json_data(v) + for k, v in data.items() + } + elif isinstance(data, (list, tuple)): + # Handle both lists and tuples, preserve original type + sanitized = [_sanitize_json_data(item) for item in data] + return type(data)(sanitized) elif isinstance(data, str): return _sanitize_string_for_json(data) else: + # Numbers, booleans, None, etc. - return as-is return data