diff --git a/lightrag/base.py b/lightrag/base.py index ac0545ce..9af3250a 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -629,8 +629,6 @@ class DocStatus(str, Enum): class DocProcessingStatus: """Document processing status data structure""" - content: str - """Original content of the document""" content_summary: str """First 100 chars of document content, used for preview""" content_length: int diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index dcf99327..5ecbedfd 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -374,14 +374,13 @@ class MongoDocStatusStorage(DocStatusStorage): result = await cursor.to_list() return { doc["_id"]: DocProcessingStatus( - content=doc["content"], content_summary=doc.get("content_summary"), content_length=doc["content_length"], + file_path=doc.get("file_path", doc["_id"]), status=doc["status"], created_at=doc.get("created_at"), updated_at=doc.get("updated_at"), chunks_count=doc.get("chunks_count", -1), - file_path=doc.get("file_path", doc["_id"]), chunks_list=doc.get("chunks_list", []), ) for doc in result diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 0e49a67f..d2b170bf 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -1673,7 +1673,7 @@ class PGDocStatusStorage(DocStatusStorage): updated_at = self._format_datetime_with_timezone(result[0]["updated_at"]) return dict( - content=result[0]["content"], + # content=result[0]["content"], content_length=result[0]["content_length"], content_summary=result[0]["content_summary"], status=result[0]["status"], @@ -1713,7 +1713,7 @@ class PGDocStatusStorage(DocStatusStorage): processed_results.append( { - "content": row["content"], + # "content": row["content"], "content_length": row["content_length"], "content_summary": row["content_summary"], "status": row["status"], @@ -1762,7 +1762,7 @@ class PGDocStatusStorage(DocStatusStorage): updated_at = self._format_datetime_with_timezone(element["updated_at"]) docs_by_status[element["id"]] = DocProcessingStatus( - content=element["content"], + # content=element["content"], content_summary=element["content_summary"], content_length=element["content_length"], status=element["status"], @@ -1845,10 +1845,9 @@ class PGDocStatusStorage(DocStatusStorage): # Modified SQL to include created_at, updated_at, and chunks_list in both INSERT and UPDATE operations # All fields are updated from the input data in both INSERT and UPDATE cases - sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at) - values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) + sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at) + values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10) on conflict(id,workspace) do update set - content = EXCLUDED.content, content_summary = EXCLUDED.content_summary, content_length = EXCLUDED.content_length, chunks_count = EXCLUDED.chunks_count, @@ -1868,7 +1867,7 @@ class PGDocStatusStorage(DocStatusStorage): { "workspace": self.db.workspace, "id": k, - "content": v["content"], + # "content": v["content"], "content_summary": v["content_summary"], "content_length": v["content_length"], "chunks_count": v["chunks_count"] if "chunks_count" in v else -1, @@ -3364,6 +3363,7 @@ TABLES = { CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id) )""" }, + # content column in LIGHTRAG_DOC_STATUS is deprecated, use the same column in LIGHTRAG_DOC_FULL instead "LIGHTRAG_DOC_STATUS": { "ddl": """CREATE TABLE LIGHTRAG_DOC_STATUS ( workspace varchar(255) NOT NULL,