From 9a8f58826d177fa99cd02218ed4191e199461f7e Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 31 Jul 2025 18:07:53 +0800 Subject: [PATCH] fix: Add safe handling for missing file_path and metadata in PostgreSQL doc status functions - Add null-safe file_path handling with "no-file-path" fallback in get_docs_by_status and get_docs_by_track_id - Enhance metadata validation to ensure dict type after JSON parsing - Align PostgreSQL implementation with JSON implementation safety patterns - Prevent KeyError exceptions when database records have missing fields --- lightrag/kg/postgres_impl.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index fb2d114d..7fb1935c 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -1976,6 +1976,14 @@ class PGDocStatusStorage(DocStatusStorage): metadata = json.loads(metadata) except json.JSONDecodeError: metadata = {} + # Ensure metadata is a dict + if not isinstance(metadata, dict): + metadata = {} + + # Safe handling for file_path + file_path = element.get("file_path") + if file_path is None: + file_path = "no-file-path" # Convert datetime objects to ISO format strings with timezone info created_at = self._format_datetime_with_timezone(element["created_at"]) @@ -1988,7 +1996,7 @@ class PGDocStatusStorage(DocStatusStorage): created_at=created_at, updated_at=updated_at, chunks_count=element["chunks_count"], - file_path=element["file_path"], + file_path=file_path, chunks_list=chunks_list, metadata=metadata, error_msg=element.get("error_msg"), @@ -2022,6 +2030,14 @@ class PGDocStatusStorage(DocStatusStorage): metadata = json.loads(metadata) except json.JSONDecodeError: metadata = {} + # Ensure metadata is a dict + if not isinstance(metadata, dict): + metadata = {} + + # Safe handling for file_path + file_path = element.get("file_path") + if file_path is None: + file_path = "no-file-path" # Convert datetime objects to ISO format strings with timezone info created_at = self._format_datetime_with_timezone(element["created_at"]) @@ -2034,7 +2050,7 @@ class PGDocStatusStorage(DocStatusStorage): created_at=created_at, updated_at=updated_at, chunks_count=element["chunks_count"], - file_path=element["file_path"], + file_path=file_path, chunks_list=chunks_list, track_id=element.get("track_id"), metadata=metadata,