diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 5e539b9e..128e96be 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -209,20 +209,20 @@ class PostgreSQLDB: # Check column type data_type = column_info.get("data_type") - if data_type == "timestamp with time zone": - logger.info( + if data_type == "timestamp without time zone": + logger.debug( f"Column {table_name}.{column_name} is already timezone-aware, no migration needed" ) continue # Execute migration, explicitly specifying UTC timezone for interpreting original data logger.info( - f"Migrating {table_name}.{column_name} to timezone-aware type" + f"Migrating {table_name}.{column_name} from {data_type} to TIMESTAMP(0) type" ) migration_sql = f""" ALTER TABLE {table_name} - ALTER COLUMN {column_name} TYPE TIMESTAMP(0) WITH TIME ZONE - USING {column_name} AT TIME ZONE 'UTC' + ALTER COLUMN {column_name} TYPE TIMESTAMP(0), + ALTER COLUMN {column_name} SET DEFAULT CURRENT_TIMESTAMP """ await self.execute(migration_sql) @@ -569,7 +569,7 @@ class PostgreSQLDB: f"Successfully migrated {migration['table']}.{migration['column']}" ) else: - logger.info( + logger.debug( f"Column {migration['table']}.{migration['column']} already has correct type, no migration needed" ) @@ -1054,7 +1054,8 @@ class PGKVStorage(BaseKVStorage): return if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): - current_time = datetime.datetime.now(timezone.utc) + # Get current UTC time and convert to naive datetime for database storage + current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None) for k, v in data.items(): upsert_sql = SQL_TEMPLATES["upsert_text_chunk"] _data = { @@ -1292,8 +1293,8 @@ class PGVectorStorage(BaseVectorStorage): if not data: return - # Get current time with UTC timezone - current_time = datetime.datetime.now(timezone.utc) + # Get current UTC time and convert to naive datetime for database storage + current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None) list_data = [ { "__id__": k, @@ -1489,6 +1490,15 @@ class PGVectorStorage(BaseVectorStorage): class PGDocStatusStorage(DocStatusStorage): db: PostgreSQLDB = field(default=None) + def _format_datetime_with_timezone(self, dt): + """Convert datetime to ISO format string with timezone info""" + if dt is None: + return None + # If no timezone info, assume it's UTC time + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt.isoformat() + async def initialize(self): if self.db is None: self.db = await ClientManager.get_client() @@ -1548,14 +1558,18 @@ class PGDocStatusStorage(DocStatusStorage): except json.JSONDecodeError: chunks_list = [] + # Convert datetime objects to ISO format strings with timezone info + created_at = self._format_datetime_with_timezone(result[0]["created_at"]) + updated_at = self._format_datetime_with_timezone(result[0]["updated_at"]) + return dict( content=result[0]["content"], content_length=result[0]["content_length"], content_summary=result[0]["content_summary"], status=result[0]["status"], chunks_count=result[0]["chunks_count"], - created_at=result[0]["created_at"], - updated_at=result[0]["updated_at"], + created_at=created_at, + updated_at=updated_at, file_path=result[0]["file_path"], chunks_list=chunks_list, ) @@ -1583,6 +1597,10 @@ class PGDocStatusStorage(DocStatusStorage): except json.JSONDecodeError: chunks_list = [] + # Convert datetime objects to ISO format strings with timezone info + created_at = self._format_datetime_with_timezone(row["created_at"]) + updated_at = self._format_datetime_with_timezone(row["updated_at"]) + processed_results.append( { "content": row["content"], @@ -1590,8 +1608,8 @@ class PGDocStatusStorage(DocStatusStorage): "content_summary": row["content_summary"], "status": row["status"], "chunks_count": row["chunks_count"], - "created_at": row["created_at"], - "updated_at": row["updated_at"], + "created_at": created_at, + "updated_at": updated_at, "file_path": row["file_path"], "chunks_list": chunks_list, } @@ -1629,13 +1647,17 @@ class PGDocStatusStorage(DocStatusStorage): except json.JSONDecodeError: chunks_list = [] + # Convert datetime objects to ISO format strings with timezone info + created_at = self._format_datetime_with_timezone(element["created_at"]) + updated_at = self._format_datetime_with_timezone(element["updated_at"]) + docs_by_status[element["id"]] = DocProcessingStatus( content=element["content"], content_summary=element["content_summary"], content_length=element["content_length"], status=element["status"], - created_at=element["created_at"], - updated_at=element["updated_at"], + created_at=created_at, + updated_at=updated_at, chunks_count=element["chunks_count"], file_path=element["file_path"], chunks_list=chunks_list, @@ -1687,19 +1709,26 @@ class PGDocStatusStorage(DocStatusStorage): return def parse_datetime(dt_str): + """Parse datetime and ensure it's stored as UTC time in database""" if dt_str is None: return None if isinstance(dt_str, (datetime.date, datetime.datetime)): - # If it's a datetime object without timezone info, remove timezone info + # If it's a datetime object if isinstance(dt_str, datetime.datetime): - # Remove timezone info, return naive datetime object - return dt_str.replace(tzinfo=None) + # If no timezone info, assume it's UTC + if dt_str.tzinfo is None: + dt_str = dt_str.replace(tzinfo=timezone.utc) + # Convert to UTC and remove timezone info for storage + return dt_str.astimezone(timezone.utc).replace(tzinfo=None) return dt_str try: # Process ISO format string with timezone dt = datetime.datetime.fromisoformat(dt_str) - # Remove timezone info, return naive datetime object - return dt.replace(tzinfo=None) + # If no timezone info, assume it's UTC + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + # Convert to UTC and remove timezone info for storage + return dt.astimezone(timezone.utc).replace(tzinfo=None) except (ValueError, TypeError): logger.warning(f"Unable to parse datetime string: {dt_str}") return None @@ -3095,8 +3124,8 @@ TABLES = { doc_name VARCHAR(1024), content TEXT, meta JSONB, - create_time TIMESTAMP(0), - update_time TIMESTAMP(0), + create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, CONSTRAINT LIGHTRAG_DOC_FULL_PK PRIMARY KEY (workspace, id) )""" }, @@ -3110,8 +3139,8 @@ TABLES = { content TEXT, file_path TEXT NULL, llm_cache_list JSONB NULL DEFAULT '[]'::jsonb, - create_time TIMESTAMP(0) WITH TIME ZONE, - update_time TIMESTAMP(0) WITH TIME ZONE, + create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id) )""" }, @@ -3125,8 +3154,8 @@ TABLES = { content TEXT, content_vector VECTOR, file_path TEXT NULL, - create_time TIMESTAMP(0) WITH TIME ZONE, - update_time TIMESTAMP(0) WITH TIME ZONE, + create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, CONSTRAINT LIGHTRAG_VDB_CHUNKS_PK PRIMARY KEY (workspace, id) )""" }, @@ -3137,8 +3166,8 @@ TABLES = { entity_name VARCHAR(512), content TEXT, content_vector VECTOR, - create_time TIMESTAMP(0) WITH TIME ZONE, - update_time TIMESTAMP(0) WITH TIME ZONE, + create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, chunk_ids VARCHAR(255)[] NULL, file_path TEXT NULL, CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id) @@ -3152,8 +3181,8 @@ TABLES = { target_id VARCHAR(512), content TEXT, content_vector VECTOR, - create_time TIMESTAMP(0) WITH TIME ZONE, - update_time TIMESTAMP(0) WITH TIME ZONE, + create_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMP(0) DEFAULT CURRENT_TIMESTAMP, chunk_ids VARCHAR(255)[] NULL, file_path TEXT NULL, CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id) @@ -3168,7 +3197,7 @@ TABLES = { return_value TEXT, chunk_id VARCHAR(255) NULL, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - update_time TIMESTAMP, + update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id) )""" }, @@ -3183,8 +3212,8 @@ TABLES = { status varchar(64) NULL, file_path TEXT NULL, chunks_list JSONB NULL DEFAULT '[]'::jsonb, - created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NULL, - updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id) )""" }, @@ -3199,11 +3228,13 @@ SQL_TEMPLATES = { "get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content, chunk_order_index, full_doc_id, file_path, COALESCE(llm_cache_list, '[]'::jsonb) as llm_cache_list, - create_time, update_time + EXTRACT(EPOCH FROM create_time)::BIGINT as create_time, + EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id=$2 """, "get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type, - create_time, update_time + EXTRACT(EPOCH FROM create_time)::BIGINT as create_time, + EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2 """, "get_by_mode_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id @@ -3215,11 +3246,13 @@ SQL_TEMPLATES = { "get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content, chunk_order_index, full_doc_id, file_path, COALESCE(llm_cache_list, '[]'::jsonb) as llm_cache_list, - create_time, update_time + EXTRACT(EPOCH FROM create_time)::BIGINT as create_time, + EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id IN ({ids}) """, "get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type, - create_time, update_time + EXTRACT(EPOCH FROM create_time)::BIGINT as create_time, + EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id IN ({ids}) """, "filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})",