Remove content field from DocProcessingStatus, update MongoDB and PostgreSQL implementation
This commit is contained in:
parent
8274ed52d1
commit
24c36d876c
3 changed files with 8 additions and 11 deletions
|
|
@ -629,8 +629,6 @@ class DocStatus(str, Enum):
|
|||
class DocProcessingStatus:
|
||||
"""Document processing status data structure"""
|
||||
|
||||
content: str
|
||||
"""Original content of the document"""
|
||||
content_summary: str
|
||||
"""First 100 chars of document content, used for preview"""
|
||||
content_length: int
|
||||
|
|
|
|||
|
|
@ -374,14 +374,13 @@ class MongoDocStatusStorage(DocStatusStorage):
|
|||
result = await cursor.to_list()
|
||||
return {
|
||||
doc["_id"]: DocProcessingStatus(
|
||||
content=doc["content"],
|
||||
content_summary=doc.get("content_summary"),
|
||||
content_length=doc["content_length"],
|
||||
file_path=doc.get("file_path", doc["_id"]),
|
||||
status=doc["status"],
|
||||
created_at=doc.get("created_at"),
|
||||
updated_at=doc.get("updated_at"),
|
||||
chunks_count=doc.get("chunks_count", -1),
|
||||
file_path=doc.get("file_path", doc["_id"]),
|
||||
chunks_list=doc.get("chunks_list", []),
|
||||
)
|
||||
for doc in result
|
||||
|
|
|
|||
|
|
@ -1673,7 +1673,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||
updated_at = self._format_datetime_with_timezone(result[0]["updated_at"])
|
||||
|
||||
return dict(
|
||||
content=result[0]["content"],
|
||||
# content=result[0]["content"],
|
||||
content_length=result[0]["content_length"],
|
||||
content_summary=result[0]["content_summary"],
|
||||
status=result[0]["status"],
|
||||
|
|
@ -1713,7 +1713,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||
|
||||
processed_results.append(
|
||||
{
|
||||
"content": row["content"],
|
||||
# "content": row["content"],
|
||||
"content_length": row["content_length"],
|
||||
"content_summary": row["content_summary"],
|
||||
"status": row["status"],
|
||||
|
|
@ -1762,7 +1762,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||
updated_at = self._format_datetime_with_timezone(element["updated_at"])
|
||||
|
||||
docs_by_status[element["id"]] = DocProcessingStatus(
|
||||
content=element["content"],
|
||||
# content=element["content"],
|
||||
content_summary=element["content_summary"],
|
||||
content_length=element["content_length"],
|
||||
status=element["status"],
|
||||
|
|
@ -1845,10 +1845,9 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||
|
||||
# Modified SQL to include created_at, updated_at, and chunks_list in both INSERT and UPDATE operations
|
||||
# All fields are updated from the input data in both INSERT and UPDATE cases
|
||||
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at)
|
||||
values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)
|
||||
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at)
|
||||
values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
||||
on conflict(id,workspace) do update set
|
||||
content = EXCLUDED.content,
|
||||
content_summary = EXCLUDED.content_summary,
|
||||
content_length = EXCLUDED.content_length,
|
||||
chunks_count = EXCLUDED.chunks_count,
|
||||
|
|
@ -1868,7 +1867,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||
{
|
||||
"workspace": self.db.workspace,
|
||||
"id": k,
|
||||
"content": v["content"],
|
||||
# "content": v["content"],
|
||||
"content_summary": v["content_summary"],
|
||||
"content_length": v["content_length"],
|
||||
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
||||
|
|
@ -3364,6 +3363,7 @@ TABLES = {
|
|||
CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id)
|
||||
)"""
|
||||
},
|
||||
# content column in LIGHTRAG_DOC_STATUS is deprecated, use the same column in LIGHTRAG_DOC_FULL instead
|
||||
"LIGHTRAG_DOC_STATUS": {
|
||||
"ddl": """CREATE TABLE LIGHTRAG_DOC_STATUS (
|
||||
workspace varchar(255) NOT NULL,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue