Remove deprecated mode field from LLM cache schema

- Drop mode column from LLM cache table
- Update primary key to exclude mode
- Remove mode from all SQL queries
- Deprecate mode-related methods
- Update schema migration logic
This commit is contained in:
yangdx 2025-08-05 23:18:54 +08:00
parent 0b5c708660
commit 8294d6d1b7

View file

@ -224,15 +224,15 @@ class PostgreSQLDB:
):
pass
async def _migrate_llm_cache_add_columns(self):
"""Add chunk_id, cache_type, and queryparam columns to LIGHTRAG_LLM_CACHE table if they don't exist"""
async def _migrate_llm_cache_schema(self):
"""Migrate LLM cache schema: add new columns and remove deprecated mode field"""
try:
# Check if all columns exist
check_columns_sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_name = 'lightrag_llm_cache'
AND column_name IN ('chunk_id', 'cache_type', 'queryparam')
AND column_name IN ('chunk_id', 'cache_type', 'queryparam', 'mode')
"""
existing_columns = await self.query(check_columns_sql, multirows=True)
@ -305,8 +305,38 @@ class PostgreSQLDB:
"queryparam column already exists in LIGHTRAG_LLM_CACHE table"
)
# Remove deprecated mode field if it exists
if "mode" in existing_column_names:
logger.info("Removing deprecated mode column from LIGHTRAG_LLM_CACHE table")
# First, drop the primary key constraint that includes mode
drop_pk_sql = """
ALTER TABLE LIGHTRAG_LLM_CACHE
DROP CONSTRAINT IF EXISTS LIGHTRAG_LLM_CACHE_PK
"""
await self.execute(drop_pk_sql)
logger.info("Dropped old primary key constraint")
# Drop the mode column
drop_mode_sql = """
ALTER TABLE LIGHTRAG_LLM_CACHE
DROP COLUMN mode
"""
await self.execute(drop_mode_sql)
logger.info("Successfully removed mode column from LIGHTRAG_LLM_CACHE table")
# Create new primary key constraint without mode
add_pk_sql = """
ALTER TABLE LIGHTRAG_LLM_CACHE
ADD CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, id)
"""
await self.execute(add_pk_sql)
logger.info("Created new primary key constraint (workspace, id)")
else:
logger.info("mode column does not exist in LIGHTRAG_LLM_CACHE table")
except Exception as e:
logger.warning(f"Failed to add columns to LIGHTRAG_LLM_CACHE: {e}")
logger.warning(f"Failed to migrate LLM cache schema: {e}")
async def _migrate_timestamp_columns(self):
"""Migrate timestamp columns in tables to witimezone-free types, assuming original data is in UTC time"""
@ -872,11 +902,11 @@ class PostgreSQLDB:
logger.error(f"PostgreSQL, Failed to migrate timestamp columns: {e}")
# Don't throw an exception, allow the initialization process to continue
# Migrate LLM cache table to add chunk_id and cache_type columns if needed
# Migrate LLM cache schema: add new columns and remove deprecated mode field
try:
await self._migrate_llm_cache_add_columns()
await self._migrate_llm_cache_schema()
except Exception as e:
logger.error(f"PostgreSQL, Failed to migrate LLM cache columns: {e}")
logger.error(f"PostgreSQL, Failed to migrate LLM cache schema: {e}")
# Don't throw an exception, allow the initialization process to continue
# Finally, attempt to migrate old doc chunks data if needed
@ -1402,14 +1432,13 @@ class PGKVStorage(BaseKVStorage):
queryparam = json.loads(queryparam)
except json.JSONDecodeError:
queryparam = None
# Map field names and add cache_type for compatibility
# Map field names for compatibility (mode field removed)
response = {
**response,
"return": response.get("return_value", ""),
"cache_type": response.get("cache_type"),
"original_prompt": response.get("original_prompt", ""),
"chunk_id": response.get("chunk_id"),
"mode": response.get("mode", "default"),
"queryparam": queryparam,
"create_time": create_time,
"update_time": create_time if update_time == 0 else update_time,
@ -1486,14 +1515,13 @@ class PGKVStorage(BaseKVStorage):
queryparam = json.loads(queryparam)
except json.JSONDecodeError:
queryparam = None
# Map field names and add cache_type for compatibility
# Map field names for compatibility (mode field removed)
processed_row = {
**row,
"return": row.get("return_value", ""),
"cache_type": row.get("cache_type"),
"original_prompt": row.get("original_prompt", ""),
"chunk_id": row.get("chunk_id"),
"mode": row.get("mode", "default"),
"queryparam": queryparam,
"create_time": create_time,
"update_time": create_time if update_time == 0 else update_time,
@ -1597,7 +1625,6 @@ class PGKVStorage(BaseKVStorage):
"id": k, # Use flattened key as id
"original_prompt": v["original_prompt"],
"return_value": v["return"],
"mode": v.get("mode", "default"), # Get mode from data
"chunk_id": v.get("chunk_id"),
"cache_type": v.get(
"cache_type", "extract"
@ -1671,37 +1698,16 @@ class PGKVStorage(BaseKVStorage):
logger.error(f"Error while deleting records from {self.namespace}: {e}")
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
"""Delete specific records from storage by cache mode
"""Delete specific records from storage by cache mode (deprecated - mode field removed)
Args:
modes (list[str]): List of cache modes to be dropped from storage
modes (list[str]): List of cache modes (deprecated, no longer used)
Returns:
bool: True if successful, False otherwise
bool: False (method deprecated due to mode field removal)
"""
if not modes:
return False
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return False
if table_name != "LIGHTRAG_LLM_CACHE":
return False
sql = f"""
DELETE FROM {table_name}
WHERE workspace = $1 AND mode = ANY($2)
"""
params = {"workspace": self.db.workspace, "modes": modes}
logger.info(f"Deleting cache by modes: {modes}")
await self.db.execute(sql, params)
return True
except Exception as e:
logger.error(f"Error deleting cache by modes {modes}: {e}")
return False
logger.warning("drop_cache_by_modes is deprecated: mode field has been removed from LLM cache")
return False
async def drop(self) -> dict[str, str]:
"""Drop the storage"""
@ -4084,7 +4090,6 @@ TABLES = {
"ddl": """CREATE TABLE LIGHTRAG_LLM_CACHE (
workspace varchar(255) NOT NULL,
id varchar(255) NOT NULL,
mode varchar(32) NOT NULL,
original_prompt TEXT,
return_value TEXT,
chunk_id VARCHAR(255) NULL,
@ -4092,7 +4097,7 @@ TABLES = {
queryparam JSONB NULL,
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id)
CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, id)
)"""
},
"LIGHTRAG_DOC_STATUS": {
@ -4150,14 +4155,11 @@ SQL_TEMPLATES = {
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id=$2
""",
"get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type, queryparam,
"get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, chunk_id, cache_type, queryparam,
EXTRACT(EPOCH FROM create_time)::BIGINT as create_time,
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2
""",
"get_by_mode_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND mode=$2 AND id=$3
""",
"get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content
FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids})
""",
@ -4168,7 +4170,7 @@ SQL_TEMPLATES = {
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id IN ({ids})
""",
"get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type, queryparam,
"get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, chunk_id, cache_type, queryparam,
EXTRACT(EPOCH FROM create_time)::BIGINT as create_time,
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id IN ({ids})
@ -4199,12 +4201,11 @@ SQL_TEMPLATES = {
ON CONFLICT (workspace,id) DO UPDATE
SET content = $2, update_time = CURRENT_TIMESTAMP
""",
"upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,mode,chunk_id,cache_type,queryparam)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (workspace,mode,id) DO UPDATE
"upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,chunk_id,cache_type,queryparam)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (workspace,id) DO UPDATE
SET original_prompt = EXCLUDED.original_prompt,
return_value=EXCLUDED.return_value,
mode=EXCLUDED.mode,
chunk_id=EXCLUDED.chunk_id,
cache_type=EXCLUDED.cache_type,
queryparam=EXCLUDED.queryparam,