Merge pull request #2171 from danielaskdd/doc-name-in-full-docs
Fix: Add file_path field to full_docs storage
This commit is contained in:
commit
1b274706d8
2 changed files with 15 additions and 7 deletions
|
|
@ -1750,6 +1750,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
_data = {
|
_data = {
|
||||||
"id": k,
|
"id": k,
|
||||||
"content": v["content"],
|
"content": v["content"],
|
||||||
|
"doc_name": v.get("file_path", ""), # Map file_path to doc_name
|
||||||
"workspace": self.workspace,
|
"workspace": self.workspace,
|
||||||
}
|
}
|
||||||
await self.db.execute(upsert_sql, _data)
|
await self.db.execute(upsert_sql, _data)
|
||||||
|
|
@ -4588,7 +4589,8 @@ TABLES = {
|
||||||
|
|
||||||
SQL_TEMPLATES = {
|
SQL_TEMPLATES = {
|
||||||
# SQL for KVStorage
|
# SQL for KVStorage
|
||||||
"get_by_id_full_docs": """SELECT id, COALESCE(content, '') as content
|
"get_by_id_full_docs": """SELECT id, COALESCE(content, '') as content,
|
||||||
|
COALESCE(doc_name, '') as file_path
|
||||||
FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id=$2
|
FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id=$2
|
||||||
""",
|
""",
|
||||||
"get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
|
"get_by_id_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
|
||||||
|
|
@ -4603,7 +4605,8 @@ SQL_TEMPLATES = {
|
||||||
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
|
EXTRACT(EPOCH FROM update_time)::BIGINT as update_time
|
||||||
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2
|
FROM LIGHTRAG_LLM_CACHE WHERE workspace=$1 AND id=$2
|
||||||
""",
|
""",
|
||||||
"get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content
|
"get_by_ids_full_docs": """SELECT id, COALESCE(content, '') as content,
|
||||||
|
COALESCE(doc_name, '') as file_path
|
||||||
FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids})
|
FROM LIGHTRAG_DOC_FULL WHERE workspace=$1 AND id IN ({ids})
|
||||||
""",
|
""",
|
||||||
"get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
|
"get_by_ids_text_chunks": """SELECT id, tokens, COALESCE(content, '') as content,
|
||||||
|
|
@ -4639,10 +4642,12 @@ SQL_TEMPLATES = {
|
||||||
FROM LIGHTRAG_FULL_RELATIONS WHERE workspace=$1 AND id IN ({ids})
|
FROM LIGHTRAG_FULL_RELATIONS WHERE workspace=$1 AND id IN ({ids})
|
||||||
""",
|
""",
|
||||||
"filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})",
|
"filter_keys": "SELECT id FROM {table_name} WHERE workspace=$1 AND id IN ({ids})",
|
||||||
"upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, workspace)
|
"upsert_doc_full": """INSERT INTO LIGHTRAG_DOC_FULL (id, content, doc_name, workspace)
|
||||||
VALUES ($1, $2, $3)
|
VALUES ($1, $2, $3, $4)
|
||||||
ON CONFLICT (workspace,id) DO UPDATE
|
ON CONFLICT (workspace,id) DO UPDATE
|
||||||
SET content = $2, update_time = CURRENT_TIMESTAMP
|
SET content = $2,
|
||||||
|
doc_name = $3,
|
||||||
|
update_time = CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
"upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,chunk_id,cache_type,queryparam)
|
"upsert_llm_response_cache": """INSERT INTO LIGHTRAG_LLM_CACHE(workspace,id,original_prompt,return_value,chunk_id,cache_type,queryparam)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||||
|
|
|
||||||
|
|
@ -961,7 +961,7 @@ class LightRAG:
|
||||||
doc_key = compute_mdhash_id(full_text, prefix="doc-")
|
doc_key = compute_mdhash_id(full_text, prefix="doc-")
|
||||||
else:
|
else:
|
||||||
doc_key = doc_id
|
doc_key = doc_id
|
||||||
new_docs = {doc_key: {"content": full_text}}
|
new_docs = {doc_key: {"content": full_text, "file_path": file_path}}
|
||||||
|
|
||||||
_add_doc_keys = await self.full_docs.filter_keys({doc_key})
|
_add_doc_keys = await self.full_docs.filter_keys({doc_key})
|
||||||
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
||||||
|
|
@ -1139,7 +1139,10 @@ class LightRAG:
|
||||||
# 4. Store document content in full_docs and status in doc_status
|
# 4. Store document content in full_docs and status in doc_status
|
||||||
# Store full document content separately
|
# Store full document content separately
|
||||||
full_docs_data = {
|
full_docs_data = {
|
||||||
doc_id: {"content": contents[doc_id]["content"]}
|
doc_id: {
|
||||||
|
"content": contents[doc_id]["content"],
|
||||||
|
"file_path": contents[doc_id]["file_path"],
|
||||||
|
}
|
||||||
for doc_id in new_docs.keys()
|
for doc_id in new_docs.keys()
|
||||||
}
|
}
|
||||||
await self.full_docs.upsert(full_docs_data)
|
await self.full_docs.upsert(full_docs_data)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue