diff --git a/env.example b/env.example index 19153ecc..98e4790b 100644 --- a/env.example +++ b/env.example @@ -114,15 +114,6 @@ EMBEDDING_BINDING_HOST=http://localhost:11434 # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage -### TiDB Configuration (Deprecated) -# TIDB_HOST=localhost -# TIDB_PORT=4000 -# TIDB_USER=your_username -# TIDB_PASSWORD='your_password' -# TIDB_DATABASE=your_database -### separating all data from difference Lightrag instances(deprecating) -# TIDB_WORKSPACE=default - ### PostgreSQL Configuration POSTGRES_HOST=localhost POSTGRES_PORT=5432 @@ -130,7 +121,7 @@ POSTGRES_USER=your_username POSTGRES_PASSWORD='your_password' POSTGRES_DATABASE=your_database POSTGRES_MAX_CONNECTIONS=12 -### separating all data from difference Lightrag instances(deprecating) +### separating all data from difference Lightrag instances # POSTGRES_WORKSPACE=default ### Neo4j Configuration @@ -146,14 +137,15 @@ NEO4J_PASSWORD='your_password' # AGE_POSTGRES_PORT=8529 # AGE Graph Name(apply to PostgreSQL and independent AGM) -### AGE_GRAPH_NAME is precated +### AGE_GRAPH_NAME is deprecated # AGE_GRAPH_NAME=lightrag ### MongoDB Configuration MONGO_URI=mongodb://root:root@localhost:27017/ MONGO_DATABASE=LightRAG ### separating all data from difference Lightrag instances(deprecating) -# MONGODB_GRAPH=false +### separating all data from difference Lightrag instances +# MONGODB_WORKSPACE=default ### Milvus Configuration MILVUS_URI=http://localhost:19530 diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index 38baff5c..6bc5db82 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -133,6 +133,11 @@ class MongoKVStorage(BaseKVStorage): operations = [] for k, v in data.items(): + # For text_chunks namespace, ensure llm_cache_list field exists + if self.namespace.endswith("text_chunks"): + if "llm_cache_list" not in v: + v["llm_cache_list"] = [] + v["_id"] = k # Use flattened key as _id operations.append(UpdateOne({"_id": k}, {"$set": v}, upsert=True)) @@ -247,6 +252,9 @@ class MongoDocStatusStorage(DocStatusStorage): return update_tasks: list[Any] = [] for k, v in data.items(): + # Ensure chunks_list field exists and is an array + if "chunks_list" not in v: + v["chunks_list"] = [] data[k]["_id"] = k update_tasks.append( self._data.update_one({"_id": k}, {"$set": v}, upsert=True) @@ -279,6 +287,7 @@ class MongoDocStatusStorage(DocStatusStorage): updated_at=doc.get("updated_at"), chunks_count=doc.get("chunks_count", -1), file_path=doc.get("file_path", doc["_id"]), + chunks_list=doc.get("chunks_list", []), ) for doc in result } diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 28a86b6e..cea4c2bf 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -136,6 +136,48 @@ class PostgreSQLDB: except Exception as e: logger.warning(f"Failed to add chunk_id column to LIGHTRAG_LLM_CACHE: {e}") + async def _migrate_llm_cache_add_cache_type(self): + """Add cache_type column to LIGHTRAG_LLM_CACHE table if it doesn't exist""" + try: + # Check if cache_type column exists + check_column_sql = """ + SELECT column_name + FROM information_schema.columns + WHERE table_name = 'lightrag_llm_cache' + AND column_name = 'cache_type' + """ + + column_info = await self.query(check_column_sql) + if not column_info: + logger.info("Adding cache_type column to LIGHTRAG_LLM_CACHE table") + add_column_sql = """ + ALTER TABLE LIGHTRAG_LLM_CACHE + ADD COLUMN cache_type VARCHAR(32) NULL + """ + await self.execute(add_column_sql) + logger.info( + "Successfully added cache_type column to LIGHTRAG_LLM_CACHE table" + ) + + # Migrate existing data: extract cache_type from flattened keys + logger.info("Migrating existing LLM cache data to populate cache_type field") + update_sql = """ + UPDATE LIGHTRAG_LLM_CACHE + SET cache_type = CASE + WHEN id LIKE '%:%:%' THEN split_part(id, ':', 2) + ELSE 'extract' + END + WHERE cache_type IS NULL + """ + await self.execute(update_sql) + logger.info("Successfully migrated existing LLM cache data") + else: + logger.info( + "cache_type column already exists in LIGHTRAG_LLM_CACHE table" + ) + except Exception as e: + logger.warning(f"Failed to add cache_type column to LIGHTRAG_LLM_CACHE: {e}") + async def _migrate_timestamp_columns(self): """Migrate timestamp columns in tables to timezone-aware types, assuming original data is in UTC time""" # Tables and columns that need migration @@ -301,15 +343,17 @@ class PostgreSQLDB: record["mode"], record["original_prompt"] ) + # Determine cache_type based on mode + cache_type = "extract" if record["mode"] == "default" else "unknown" + # Generate new flattened key - cache_type = "extract" # Default type new_key = f"{record['mode']}:{cache_type}:{new_hash}" - # Insert new format data + # Insert new format data with cache_type field insert_sql = """ INSERT INTO LIGHTRAG_LLM_CACHE - (workspace, id, mode, original_prompt, return_value, chunk_id, create_time, update_time) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + (workspace, id, mode, original_prompt, return_value, chunk_id, cache_type, create_time, update_time) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) ON CONFLICT (workspace, mode, id) DO NOTHING """ @@ -322,6 +366,7 @@ class PostgreSQLDB: "original_prompt": record["original_prompt"], "return_value": record["return_value"], "chunk_id": record["chunk_id"], + "cache_type": cache_type, # Add cache_type field "create_time": record["create_time"], "update_time": record["update_time"], }, @@ -357,6 +402,68 @@ class PostgreSQLDB: logger.error(f"LLM cache migration failed: {e}") # Don't raise exception, allow system to continue startup + async def _migrate_doc_status_add_chunks_list(self): + """Add chunks_list column to LIGHTRAG_DOC_STATUS table if it doesn't exist""" + try: + # Check if chunks_list column exists + check_column_sql = """ + SELECT column_name + FROM information_schema.columns + WHERE table_name = 'lightrag_doc_status' + AND column_name = 'chunks_list' + """ + + column_info = await self.query(check_column_sql) + if not column_info: + logger.info("Adding chunks_list column to LIGHTRAG_DOC_STATUS table") + add_column_sql = """ + ALTER TABLE LIGHTRAG_DOC_STATUS + ADD COLUMN chunks_list JSONB NULL DEFAULT '[]'::jsonb + """ + await self.execute(add_column_sql) + logger.info( + "Successfully added chunks_list column to LIGHTRAG_DOC_STATUS table" + ) + else: + logger.info( + "chunks_list column already exists in LIGHTRAG_DOC_STATUS table" + ) + except Exception as e: + logger.warning( + f"Failed to add chunks_list column to LIGHTRAG_DOC_STATUS: {e}" + ) + + async def _migrate_text_chunks_add_llm_cache_list(self): + """Add llm_cache_list column to LIGHTRAG_DOC_CHUNKS table if it doesn't exist""" + try: + # Check if llm_cache_list column exists + check_column_sql = """ + SELECT column_name + FROM information_schema.columns + WHERE table_name = 'lightrag_doc_chunks' + AND column_name = 'llm_cache_list' + """ + + column_info = await self.query(check_column_sql) + if not column_info: + logger.info("Adding llm_cache_list column to LIGHTRAG_DOC_CHUNKS table") + add_column_sql = """ + ALTER TABLE LIGHTRAG_DOC_CHUNKS + ADD COLUMN llm_cache_list JSONB NULL DEFAULT '[]'::jsonb + """ + await self.execute(add_column_sql) + logger.info( + "Successfully added llm_cache_list column to LIGHTRAG_DOC_CHUNKS table" + ) + else: + logger.info( + "llm_cache_list column already exists in LIGHTRAG_DOC_CHUNKS table" + ) + except Exception as e: + logger.warning( + f"Failed to add llm_cache_list column to LIGHTRAG_DOC_CHUNKS: {e}" + ) + async def check_tables(self): # First create all tables for k, v in TABLES.items(): @@ -408,6 +515,13 @@ class PostgreSQLDB: logger.error(f"PostgreSQL, Failed to migrate LLM cache chunk_id field: {e}") # Don't throw an exception, allow the initialization process to continue + # Migrate LLM cache table to add cache_type field if needed + try: + await self._migrate_llm_cache_add_cache_type() + except Exception as e: + logger.error(f"PostgreSQL, Failed to migrate LLM cache cache_type field: {e}") + # Don't throw an exception, allow the initialization process to continue + # Finally, attempt to migrate old doc chunks data if needed try: await self._migrate_doc_chunks_to_vdb_chunks() @@ -421,6 +535,22 @@ class PostgreSQLDB: except Exception as e: logger.error(f"PostgreSQL, LLM cache migration failed: {e}") + # Migrate doc status to add chunks_list field if needed + try: + await self._migrate_doc_status_add_chunks_list() + except Exception as e: + logger.error( + f"PostgreSQL, Failed to migrate doc status chunks_list field: {e}" + ) + + # Migrate text chunks to add llm_cache_list field if needed + try: + await self._migrate_text_chunks_add_llm_cache_list() + except Exception as e: + logger.error( + f"PostgreSQL, Failed to migrate text chunks llm_cache_list field: {e}" + ) + async def query( self, sql: str, @@ -608,17 +738,11 @@ class PGKVStorage(BaseKVStorage): if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE): processed_results = {} for row in results: - # Parse flattened key to extract cache_type - key_parts = row["id"].split(":") - cache_type = key_parts[1] if len(key_parts) >= 3 else "unknown" - # Map field names and add cache_type for compatibility processed_row = { **row, - "return": row.get( - "return_value", "" - ), # Map return_value to return - "cache_type": cache_type, # Add cache_type from key + "return": row.get("return_value", ""), + "cache_type": row.get("original_prompt", "unknow"), "original_prompt": row.get("original_prompt", ""), "chunk_id": row.get("chunk_id"), "mode": row.get("mode", "default"), @@ -626,6 +750,20 @@ class PGKVStorage(BaseKVStorage): processed_results[row["id"]] = processed_row return processed_results + # For text_chunks namespace, parse llm_cache_list JSON string back to list + if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): + processed_results = {} + for row in results: + llm_cache_list = row.get("llm_cache_list", []) + if isinstance(llm_cache_list, str): + try: + llm_cache_list = json.loads(llm_cache_list) + except json.JSONDecodeError: + llm_cache_list = [] + row["llm_cache_list"] = llm_cache_list + processed_results[row["id"]] = row + return processed_results + # For other namespaces, return as-is return {row["id"]: row for row in results} except Exception as e: @@ -637,6 +775,29 @@ class PGKVStorage(BaseKVStorage): sql = SQL_TEMPLATES["get_by_id_" + self.namespace] params = {"workspace": self.db.workspace, "id": id} response = await self.db.query(sql, params) + + if response and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): + # Parse llm_cache_list JSON string back to list + llm_cache_list = response.get("llm_cache_list", []) + if isinstance(llm_cache_list, str): + try: + llm_cache_list = json.loads(llm_cache_list) + except json.JSONDecodeError: + llm_cache_list = [] + response["llm_cache_list"] = llm_cache_list + + # Special handling for LLM cache to ensure compatibility with _get_cached_extraction_results + if response and is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE): + # Map field names and add cache_type for compatibility + response = { + **response, + "return": response.get("return_value", ""), + "cache_type": response.get("cache_type"), + "original_prompt": response.get("original_prompt", ""), + "chunk_id": response.get("chunk_id"), + "mode": response.get("mode", "default"), + } + return response if response else None # Query by id @@ -646,13 +807,36 @@ class PGKVStorage(BaseKVStorage): ids=",".join([f"'{id}'" for id in ids]) ) params = {"workspace": self.db.workspace} - return await self.db.query(sql, params, multirows=True) + results = await self.db.query(sql, params, multirows=True) - async def get_by_status(self, status: str) -> Union[list[dict[str, Any]], None]: - """Specifically for llm_response_cache.""" - SQL = SQL_TEMPLATES["get_by_status_" + self.namespace] - params = {"workspace": self.db.workspace, "status": status} - return await self.db.query(SQL, params, multirows=True) + if results and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): + # Parse llm_cache_list JSON string back to list for each result + for result in results: + llm_cache_list = result.get("llm_cache_list", []) + if isinstance(llm_cache_list, str): + try: + llm_cache_list = json.loads(llm_cache_list) + except json.JSONDecodeError: + llm_cache_list = [] + result["llm_cache_list"] = llm_cache_list + + # Special handling for LLM cache to ensure compatibility with _get_cached_extraction_results + if results and is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE): + processed_results = [] + for row in results: + # Map field names and add cache_type for compatibility + processed_row = { + **row, + "return": row.get("return_value", ""), + "cache_type": row.get("cache_type"), + "original_prompt": row.get("original_prompt", ""), + "chunk_id": row.get("chunk_id"), + "mode": row.get("mode", "default"), + } + processed_results.append(processed_row) + return processed_results + + return results if results else [] async def filter_keys(self, keys: set[str]) -> set[str]: """Filter out duplicated content""" @@ -693,6 +877,7 @@ class PGKVStorage(BaseKVStorage): "full_doc_id": v["full_doc_id"], "content": v["content"], "file_path": v["file_path"], + "llm_cache_list": json.dumps(v.get("llm_cache_list", [])), "create_time": current_time, "update_time": current_time, } @@ -716,6 +901,7 @@ class PGKVStorage(BaseKVStorage): "return_value": v["return"], "mode": v.get("mode", "default"), # Get mode from data "chunk_id": v.get("chunk_id"), + "cache_type": v.get("cache_type", "extract"), # Get cache_type from data } await self.db.execute(upsert_sql, _data) @@ -1140,6 +1326,14 @@ class PGDocStatusStorage(DocStatusStorage): if result is None or result == []: return None else: + # Parse chunks_list JSON string back to list + chunks_list = result[0].get("chunks_list", []) + if isinstance(chunks_list, str): + try: + chunks_list = json.loads(chunks_list) + except json.JSONDecodeError: + chunks_list = [] + return dict( content=result[0]["content"], content_length=result[0]["content_length"], @@ -1149,6 +1343,7 @@ class PGDocStatusStorage(DocStatusStorage): created_at=result[0]["created_at"], updated_at=result[0]["updated_at"], file_path=result[0]["file_path"], + chunks_list=chunks_list, ) async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]: @@ -1163,19 +1358,32 @@ class PGDocStatusStorage(DocStatusStorage): if not results: return [] - return [ - { - "content": row["content"], - "content_length": row["content_length"], - "content_summary": row["content_summary"], - "status": row["status"], - "chunks_count": row["chunks_count"], - "created_at": row["created_at"], - "updated_at": row["updated_at"], - "file_path": row["file_path"], - } - for row in results - ] + + processed_results = [] + for row in results: + # Parse chunks_list JSON string back to list + chunks_list = row.get("chunks_list", []) + if isinstance(chunks_list, str): + try: + chunks_list = json.loads(chunks_list) + except json.JSONDecodeError: + chunks_list = [] + + processed_results.append( + { + "content": row["content"], + "content_length": row["content_length"], + "content_summary": row["content_summary"], + "status": row["status"], + "chunks_count": row["chunks_count"], + "created_at": row["created_at"], + "updated_at": row["updated_at"], + "file_path": row["file_path"], + "chunks_list": chunks_list, + } + ) + + return processed_results async def get_status_counts(self) -> dict[str, int]: """Get counts of documents in each status""" @@ -1196,8 +1404,18 @@ class PGDocStatusStorage(DocStatusStorage): sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2" params = {"workspace": self.db.workspace, "status": status.value} result = await self.db.query(sql, params, True) - docs_by_status = { - element["id"]: DocProcessingStatus( + + docs_by_status = {} + for element in result: + # Parse chunks_list JSON string back to list + chunks_list = element.get("chunks_list", []) + if isinstance(chunks_list, str): + try: + chunks_list = json.loads(chunks_list) + except json.JSONDecodeError: + chunks_list = [] + + docs_by_status[element["id"]] = DocProcessingStatus( content=element["content"], content_summary=element["content_summary"], content_length=element["content_length"], @@ -1206,9 +1424,9 @@ class PGDocStatusStorage(DocStatusStorage): updated_at=element["updated_at"], chunks_count=element["chunks_count"], file_path=element["file_path"], + chunks_list=chunks_list, ) - for element in result - } + return docs_by_status async def index_done_callback(self) -> None: @@ -1272,10 +1490,10 @@ class PGDocStatusStorage(DocStatusStorage): logger.warning(f"Unable to parse datetime string: {dt_str}") return None - # Modified SQL to include created_at and updated_at in both INSERT and UPDATE operations - # Both fields are updated from the input data in both INSERT and UPDATE cases - sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path,created_at,updated_at) - values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10) + # Modified SQL to include created_at, updated_at, and chunks_list in both INSERT and UPDATE operations + # All fields are updated from the input data in both INSERT and UPDATE cases + sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at) + values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) on conflict(id,workspace) do update set content = EXCLUDED.content, content_summary = EXCLUDED.content_summary, @@ -1283,6 +1501,7 @@ class PGDocStatusStorage(DocStatusStorage): chunks_count = EXCLUDED.chunks_count, status = EXCLUDED.status, file_path = EXCLUDED.file_path, + chunks_list = EXCLUDED.chunks_list, created_at = EXCLUDED.created_at, updated_at = EXCLUDED.updated_at""" for k, v in data.items(): @@ -1290,7 +1509,7 @@ class PGDocStatusStorage(DocStatusStorage): created_at = parse_datetime(v.get("created_at")) updated_at = parse_datetime(v.get("updated_at")) - # chunks_count is optional + # chunks_count and chunks_list are optional await self.db.execute( sql, { @@ -1302,6 +1521,7 @@ class PGDocStatusStorage(DocStatusStorage): "chunks_count": v["chunks_count"] if "chunks_count" in v else -1, "status": v["status"], "file_path": v["file_path"], + "chunks_list": json.dumps(v.get("chunks_list", [])), "created_at": created_at, # Use the converted datetime object "updated_at": updated_at, # Use the converted datetime object }, @@ -2620,6 +2840,7 @@ TABLES = { tokens INTEGER, content TEXT, file_path VARCHAR(256), + llm_cache_list JSONB NULL DEFAULT '[]'::jsonb, create_time TIMESTAMP(0) WITH TIME ZONE, update_time TIMESTAMP(0) WITH TIME ZONE, CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id) @@ -2692,6 +2913,7 @@ TABLES = { chunks_count int4 NULL, status varchar(64) NULL, file_path TEXT NULL, + chunks_list JSONB NULL DEFAULT '[]'::jsonb, created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NULL, updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NULL, CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id) @@ -2706,24 +2928,26 @@ SQL_TEMPLATES = { FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id=$2 """, "get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content, - chunk_order_index, full_doc_id, file_path + chunk_order_index, full_doc_id, file_path, + COALESCE(llm_cache_list, '[]'::jsonb) as llm_cache_list FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id=$2 """, - "get_by_id_llm_response_cache": """SELECT id, original_prompt, COALESCE(return_value, '') as "return", mode, chunk_id + "get_by_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2 """, - "get_by_mode_id_llm_response_cache": """SELECT id, original_prompt, COALESCE(return_value, '') as "return", mode, chunk_id + "get_by_mode_id_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND mode=$2 AND id=$3 """, "get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids}) """, "get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content, - chunk_order_index, full_doc_id, file_path + chunk_order_index, full_doc_id, file_path, + COALESCE(llm_cache_list, '[]'::jsonb) as llm_cache_list FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=$1 AND id IN ({ids}) """, - "get_by_ids_llm_response_cache": """SELECT id, original_prompt, COALESCE(return_value, '') as "return", mode, chunk_id - FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND mode= IN ({ids}) + "get_by_ids_llm_response_cache": """SELECT id, original_prompt, return_value, mode, chunk_id, cache_type + FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id IN ({ids}) """, "filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})", "upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, workspace) @@ -2731,25 +2955,27 @@ SQL_TEMPLATES = { ON CONFLICT (workspace,id) DO UPDATE SET content = $2, update_time = CURRENT_TIMESTAMP """, - "upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,mode,chunk_id) - VALUES ($1, $2, $3, $4, $5, $6) + "upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,mode,chunk_id,cache_type) + VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (workspace,mode,id) DO UPDATE SET original_prompt = EXCLUDED.original_prompt, return_value=EXCLUDED.return_value, mode=EXCLUDED.mode, chunk_id=EXCLUDED.chunk_id, + cache_type=EXCLUDED.cache_type, update_time = CURRENT_TIMESTAMP """, "upsert_text_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens, - chunk_order_index, full_doc_id, content, file_path, + chunk_order_index, full_doc_id, content, file_path, llm_cache_list, create_time, update_time) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT (workspace,id) DO UPDATE SET tokens=EXCLUDED.tokens, chunk_order_index=EXCLUDED.chunk_order_index, full_doc_id=EXCLUDED.full_doc_id, content = EXCLUDED.content, file_path=EXCLUDED.file_path, + llm_cache_list=EXCLUDED.llm_cache_list, update_time = EXCLUDED.update_time """, # SQL for VectorStorage diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py index 06ec1cd5..d60bb1f6 100644 --- a/lightrag/kg/tidb_impl.py +++ b/lightrag/kg/tidb_impl.py @@ -520,11 +520,6 @@ class TiDBVectorDBStorage(BaseVectorStorage): } await self.db.execute(SQL_TEMPLATES["upsert_relationship"], param) - async def get_by_status(self, status: str) -> Union[list[dict[str, Any]], None]: - SQL = SQL_TEMPLATES["get_by_status_" + self.namespace] - params = {"workspace": self.db.workspace, "status": status} - return await self.db.query(SQL, params, multirows=True) - async def delete(self, ids: list[str]) -> None: """Delete vectors with specified IDs from the storage.