From 64015548dfb7d6b55d436cf8663d6f9df419266f Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 19 Aug 2025 17:49:23 +0800 Subject: [PATCH] Refactor MD5 hash functions and consolidate Unicode error handling --- lightrag/utils.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index bbafd9f1..87ccbea1 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -272,19 +272,26 @@ def compute_args_hash(*args: Any) -> str: Returns: str: Hash string """ - import hashlib - # Convert all arguments to strings and join them args_str = "".join([str(arg) for arg in args]) # Use 'replace' error handling to safely encode problematic Unicode characters # This replaces invalid characters with Unicode replacement character (U+FFFD) try: - return hashlib.md5(args_str.encode("utf-8")).hexdigest() + return md5(args_str.encode("utf-8")).hexdigest() except UnicodeEncodeError: # Handle surrogate characters and other encoding issues safe_bytes = args_str.encode("utf-8", errors="replace") - return hashlib.md5(safe_bytes).hexdigest() + return md5(safe_bytes).hexdigest() + + +def compute_mdhash_id(content: str, prefix: str = "") -> str: + """ + Compute a unique ID for a given content string. + + The ID is a combination of the given prefix and the MD5 hash of the content string. + """ + return prefix + compute_args_hash(content) def generate_cache_key(mode: str, cache_type: str, hash_value: str) -> str: @@ -316,15 +323,6 @@ def parse_cache_key(cache_key: str) -> tuple[str, str, str] | None: return None -def compute_mdhash_id(content: str, prefix: str = "") -> str: - """ - Compute a unique ID for a given content string. - - The ID is a combination of the given prefix and the MD5 hash of the content string. - """ - return prefix + md5(content.encode()).hexdigest() - - # Custom exception class class QueueFullError(Exception): """Raised when the queue is full and the wait times out"""