Add doc_name field to full docs storage

- Store file_path in full_docs storage - Update PostgreSQL implementation by map file_path to doc_name - Other storage implementation automatically handles the new field
2025-10-05 11:44:27 +08:00 · 2025-10-05 11:44:27 +08:00 · 457d51952e
commit 457d51952e
parent d550f1c58c
2 changed files with 15 additions and 7 deletions
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@ -1750,6 +1750,7 @@ class PGKVStorage(BaseKVStorage):
                _data = {
                    "id": k,
                    "content": v["content"],
+                    "doc_name": v.get("file_path", ""),  # Map file_path to doc_name
                    "workspace": self.workspace,
                }
                await self.db.execute(upsert_sql, _data)
@ -4588,7 +4589,8 @@ TABLES = {

 SQL_TEMPLATES = {
    # SQL for KVStorage
-    "get_by_id_full_docs": """SELECT id, COALESCE(content, '') as content
+    "get_by_id_full_docs": """SELECT id, COALESCE(content, '') as content,
+                                COALESCE(doc_name, '') as file_path
                                FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id=$2
                            """,
    "get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
@ -4603,7 +4605,8 @@ SQL_TEMPLATES = {
                                EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
                                FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2
                               """,
-    "get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content
+    "get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content,
+                                 COALESCE(doc_name, '') as file_path
                                 FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids})
                            """,
    "get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
@ -4639,10 +4642,12 @@ SQL_TEMPLATES = {
                                 FROM LIGHTRAG_FULL_RELATIONS WHERE workspace=$1 AND id IN ({ids})
                                """,
    "filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})",
-    "upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, workspace)
-                        VALUES ($1, $2, $3)
+    "upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, doc_name, workspace)
+                        VALUES ($1, $2, $3, $4)
                        ON CONFLICT (workspace,id) DO UPDATE
-                           SET content = $2, update_time = CURRENT_TIMESTAMP
+                           SET content = $2,
+                               doc_name = $3,
+                               update_time = CURRENT_TIMESTAMP
                       """,
    "upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,chunk_id,cache_type,queryparam)
                                      VALUES ($1, $2, $3, $4, $5, $6, $7)
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -961,7 +961,7 @@ class LightRAG:
                doc_key = compute_mdhash_id(full_text, prefix="doc-")
            else:
                doc_key = doc_id
-            new_docs = {doc_key: {"content": full_text}}
+            new_docs = {doc_key: {"content": full_text, "file_path": file_path}}

            _add_doc_keys = await self.full_docs.filter_keys({doc_key})
            new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
@ -1139,7 +1139,10 @@ class LightRAG:
        # 4. Store document content in full_docs and status in doc_status
        #    Store full document content separately
        full_docs_data = {
-            doc_id: {"content": contents[doc_id]["content"]}
+            doc_id: {
+                "content": contents[doc_id]["content"],
+                "file_path": contents[doc_id]["file_path"],
+            }
            for doc_id in new_docs.keys()
        }
        await self.full_docs.upsert(full_docs_data)