diff --git a/lightrag/utils.py b/lightrag/utils.py index bbafd9f1..87ccbea1 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -272,19 +272,26 @@ def compute_args_hash(*args: Any) -> str: Returns: str: Hash string """ - import hashlib - # Convert all arguments to strings and join them args_str = "".join([str(arg) for arg in args]) # Use 'replace' error handling to safely encode problematic Unicode characters # This replaces invalid characters with Unicode replacement character (U+FFFD) try: - return hashlib.md5(args_str.encode("utf-8")).hexdigest() + return md5(args_str.encode("utf-8")).hexdigest() except UnicodeEncodeError: # Handle surrogate characters and other encoding issues safe_bytes = args_str.encode("utf-8", errors="replace") - return hashlib.md5(safe_bytes).hexdigest() + return md5(safe_bytes).hexdigest() + + +def compute_mdhash_id(content: str, prefix: str = "") -> str: + """ + Compute a unique ID for a given content string. + + The ID is a combination of the given prefix and the MD5 hash of the content string. + """ + return prefix + compute_args_hash(content) def generate_cache_key(mode: str, cache_type: str, hash_value: str) -> str: @@ -316,15 +323,6 @@ def parse_cache_key(cache_key: str) -> tuple[str, str, str] | None: return None -def compute_mdhash_id(content: str, prefix: str = "") -> str: - """ - Compute a unique ID for a given content string. - - The ID is a combination of the given prefix and the MD5 hash of the content string. - """ - return prefix + md5(content.encode()).hexdigest() - - # Custom exception class class QueueFullError(Exception): """Raised when the queue is full and the wait times out"""