From 8294d6d1b775d6adefe41eff44a090a6cf8eca4f Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 5 Aug 2025 23:18:54 +0800 Subject: [PATCH] Remove deprecated mode field from LLM cache schema - Drop mode column from LLM cache table - Update primary key to exclude mode - Remove mode from all SQL queries - Deprecate mode-related methods - Update schema migration logic --- lightrag/kg/postgres_impl.py | 99 ++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 06079d71..3cc0ac9a 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -224,15 +224,15 @@ class PostgreSQLDB: ): pass - async def _migrate_llm_cache_add_columns(self): - """Add chunk_id, cache_type, and queryparam columns to LIGHTRAG_LLM_CACHE table if they don't exist""" + async def _migrate_llm_cache_schema(self): + """Migrate LLM cache schema: add new columns and remove deprecated mode field""" try: # Check if all columns exist check_columns_sql = """ SELECT column_name FROM information_schema.columns WHERE table_name = 'lightrag_llm_cache' - AND column_name IN ('chunk_id', 'cache_type', 'queryparam') + AND column_name IN ('chunk_id', 'cache_type', 'queryparam', 'mode') """ existing_columns = await self.query(check_columns_sql, multirows=True) @@ -305,8 +305,38 @@ class PostgreSQLDB: "queryparam column already exists in LIGHTRAG_LLM_CACHE table" ) + # Remove deprecated mode field if it exists + if "mode" in existing_column_names: + logger.info("Removing deprecated mode column from LIGHTRAG_LLM_CACHE table") + + # First, drop the primary key constraint that includes mode + drop_pk_sql = """ + ALTER TABLE LIGHTRAG_LLM_CACHE + DROP CONSTRAINT IF EXISTS LIGHTRAG_LLM_CACHE_PK + """ + await self.execute(drop_pk_sql) + logger.info("Dropped old primary key constraint") + + # Drop the mode column + drop_mode_sql = """ + ALTER TABLE LIGHTRAG_LLM_CACHE + DROP COLUMN mode + """ + await self.execute(drop_mode_sql) + logger.info("Successfully removed mode column from LIGHTRAG_LLM_CACHE table") + + # Create new primary key constraint without mode + add_pk_sql = """ + ALTER TABLE LIGHTRAG_LLM_CACHE + ADD CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, id) + """ + await self.execute(add_pk_sql) + logger.info("Created new primary key constraint (workspace, id)") + else: + logger.info("mode column does not exist in LIGHTRAG_LLM_CACHE table") + except Exception as e: - logger.warning(f"Failed to add columns to LIGHTRAG_LLM_CACHE: {e}") + logger.warning(f"Failed to migrate LLM cache schema: {e}") async def _migrate_timestamp_columns(self): """Migrate timestamp columns in tables to witimezone-free types, assuming original data is in UTC time""" @@ -872,11 +902,11 @@ class PostgreSQLDB: logger.error(f"PostgreSQL, Failed to migrate timestamp columns: {e}") # Don't throw an exception, allow the initialization process to continue - # Migrate LLM cache table to add chunk_id and cache_type columns if needed + # Migrate LLM cache schema: add new columns and remove deprecated mode field try: - await self._migrate_llm_cache_add_columns() + await self._migrate_llm_cache_schema() except Exception as e: - logger.error(f"PostgreSQL, Failed to migrate LLM cache columns: {e}") + logger.error(f"PostgreSQL, Failed to migrate LLM cache schema: {e}") # Don't throw an exception, allow the initialization process to continue # Finally, attempt to migrate old doc chunks data if needed @@ -1402,14 +1432,13 @@ class PGKVStorage(BaseKVStorage): queryparam = json.loads(queryparam) except json.JSONDecodeError: queryparam = None - # Map field names and add cache_type for compatibility + # Map field names for compatibility (mode field removed) response = { **response, "return": response.get("return_value", ""), "cache_type": response.get("cache_type"), "original_prompt": response.get("original_prompt", ""), "chunk_id": response.get("chunk_id"), - "mode": response.get("mode", "default"), "queryparam": queryparam, "create_time": create_time, "update_time": create_time if update_time == 0 else update_time, @@ -1486,14 +1515,13 @@ class PGKVStorage(BaseKVStorage): queryparam = json.loads(queryparam) except json.JSONDecodeError: queryparam = None - # Map field names and add cache_type for compatibility + # Map field names for compatibility (mode field removed) processed_row = { **row, "return": row.get("return_value", ""), "cache_type": row.get("cache_type"), "original_prompt": row.get("original_prompt", ""), "chunk_id": row.get("chunk_id"), - "mode": row.get("mode", "default"), "queryparam": queryparam, "create_time": create_time, "update_time": create_time if update_time == 0 else update_time, @@ -1597,7 +1625,6 @@ class PGKVStorage(BaseKVStorage): "id": k, # Use flattened key as id "original_prompt": v["original_prompt"], "return_value": v["return"], - "mode": v.get("mode", "default"), # Get mode from data "chunk_id": v.get("chunk_id"), "cache_type": v.get( "cache_type", "extract" @@ -1671,37 +1698,16 @@ class PGKVStorage(BaseKVStorage): logger.error(f"Error while deleting records from {self.namespace}: {e}") async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool: - """Delete specific records from storage by cache mode + """Delete specific records from storage by cache mode (deprecated - mode field removed) Args: - modes (list[str]): List of cache modes to be dropped from storage + modes (list[str]): List of cache modes (deprecated, no longer used) Returns: - bool: True if successful, False otherwise + bool: False (method deprecated due to mode field removal) """ - if not modes: - return False - - try: - table_name = namespace_to_table_name(self.namespace) - if not table_name: - return False - - if table_name != "LIGHTRAG_LLM_CACHE": - return False - - sql = f""" - DELETE FROM {table_name} - WHERE workspace = $1 AND mode = ANY($2) - """ - params = {"workspace": self.db.workspace, "modes": modes} - - logger.info(f"Deleting cache by modes: {modes}") - await self.db.execute(sql, params) - return True - except Exception as e: - logger.error(f"Error deleting cache by modes {modes}: {e}") - return False + logger.warning("drop_cache_by_modes is deprecated: mode field has been removed from LLM cache") + return False async def drop(self) -> dict[str, str]: """Drop the storage""" @@ -4084,7 +4090,6 @@ TABLES = { "ddl": """CREATE TABLE LIGHTRAG_LLM_CACHE ( workspace varchar(255) NOT NULL, id varchar(255) NOT NULL, - mode varchar(32) NOT NULL, original_prompt TEXT, return_value TEXT, chunk_id VARCHAR(255) NULL, @@ -4092,7 +4097,7 @@ TABLES = { queryparam JSONB NULL, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id) + CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, id) )""" }, "LIGHTRAG_DOC_STATUS": { @@ -4150,14 +4155,11 @@ SQL_TEMPLATES = { EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id=$2 """, - "get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type, queryparam, + "get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, chunk_id, cache_type, queryparam, EXTRACT(EPOCH FROM create_time)::BIGINT as create_time, EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2 """, - "get_by_mode_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id - FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND mode=$2 AND id=$3 - """, "get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids}) """, @@ -4168,7 +4170,7 @@ SQL_TEMPLATES = { EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id IN ({ids}) """, - "get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type, queryparam, + "get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, chunk_id, cache_type, queryparam, EXTRACT(EPOCH FROM create_time)::BIGINT as create_time, EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id IN ({ids}) @@ -4199,12 +4201,11 @@ SQL_TEMPLATES = { ON CONFLICT (workspace,id) DO UPDATE SET content = $2, update_time = CURRENT_TIMESTAMP """, - "upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,mode,chunk_id,cache_type,queryparam) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) - ON CONFLICT (workspace,mode,id) DO UPDATE + "upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,chunk_id,cache_type,queryparam) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (workspace,id) DO UPDATE SET original_prompt = EXCLUDED.original_prompt, return_value=EXCLUDED.return_value, - mode=EXCLUDED.mode, chunk_id=EXCLUDED.chunk_id, cache_type=EXCLUDED.cache_type, queryparam=EXCLUDED.queryparam,