From 457d51952e608de0eadc7df2cdcda7542be1fb85 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 5 Oct 2025 11:44:27 +0800 Subject: [PATCH] Add doc_name field to full docs storage - Store file_path in full_docs storage - Update PostgreSQL implementation by map file_path to doc_name - Other storage implementation automatically handles the new field --- lightrag/kg/postgres_impl.py | 15 ++++++++++----- lightrag/lightrag.py | 7 +++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index ad271b15..f22674c0 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -1750,6 +1750,7 @@ class PGKVStorage(BaseKVStorage): _data = { "id": k, "content": v["content"], + "doc_name": v.get("file_path", ""), # Map file_path to doc_name "workspace": self.workspace, } await self.db.execute(upsert_sql, _data) @@ -4588,7 +4589,8 @@ TABLES = { SQL_TEMPLATES = { # SQL for KVStorage - "get_by_id_full_docs": """SELECT id, COALESCE(content, '') as content + "get_by_id_full_docs": """SELECT id, COALESCE(content, '') as content, + COALESCE(doc_name, '') as file_path FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id=$2 """, "get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content, @@ -4603,7 +4605,8 @@ SQL_TEMPLATES = { EXTRACT(EPOCH FROM update_time)::BIGINT as update_time FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2 """, - "get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content + "get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content, + COALESCE(doc_name, '') as file_path FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids}) """, "get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content, @@ -4639,10 +4642,12 @@ SQL_TEMPLATES = { FROM LIGHTRAG_FULL_RELATIONS WHERE workspace=$1 AND id IN ({ids}) """, "filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})", - "upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, workspace) - VALUES ($1, $2, $3) + "upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, doc_name, workspace) + VALUES ($1, $2, $3, $4) ON CONFLICT (workspace,id) DO UPDATE - SET content = $2, update_time = CURRENT_TIMESTAMP + SET content = $2, + doc_name = $3, + update_time = CURRENT_TIMESTAMP """, "upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,chunk_id,cache_type,queryparam) VALUES ($1, $2, $3, $4, $5, $6, $7) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 60c296ac..b4345405 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -961,7 +961,7 @@ class LightRAG: doc_key = compute_mdhash_id(full_text, prefix="doc-") else: doc_key = doc_id - new_docs = {doc_key: {"content": full_text}} + new_docs = {doc_key: {"content": full_text, "file_path": file_path}} _add_doc_keys = await self.full_docs.filter_keys({doc_key}) new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys} @@ -1139,7 +1139,10 @@ class LightRAG: # 4. Store document content in full_docs and status in doc_status # Store full document content separately full_docs_data = { - doc_id: {"content": contents[doc_id]["content"]} + doc_id: { + "content": contents[doc_id]["content"], + "file_path": contents[doc_id]["file_path"], + } for doc_id in new_docs.keys() } await self.full_docs.upsert(full_docs_data)