Remove content field from DocProcessingStatus, update MongoDB and PostgreSQL implementation
This commit is contained in:
parent
8274ed52d1
commit
24c36d876c
3 changed files with 8 additions and 11 deletions
|
|
@ -629,8 +629,6 @@ class DocStatus(str, Enum):
|
||||||
class DocProcessingStatus:
|
class DocProcessingStatus:
|
||||||
"""Document processing status data structure"""
|
"""Document processing status data structure"""
|
||||||
|
|
||||||
content: str
|
|
||||||
"""Original content of the document"""
|
|
||||||
content_summary: str
|
content_summary: str
|
||||||
"""First 100 chars of document content, used for preview"""
|
"""First 100 chars of document content, used for preview"""
|
||||||
content_length: int
|
content_length: int
|
||||||
|
|
|
||||||
|
|
@ -374,14 +374,13 @@ class MongoDocStatusStorage(DocStatusStorage):
|
||||||
result = await cursor.to_list()
|
result = await cursor.to_list()
|
||||||
return {
|
return {
|
||||||
doc["_id"]: DocProcessingStatus(
|
doc["_id"]: DocProcessingStatus(
|
||||||
content=doc["content"],
|
|
||||||
content_summary=doc.get("content_summary"),
|
content_summary=doc.get("content_summary"),
|
||||||
content_length=doc["content_length"],
|
content_length=doc["content_length"],
|
||||||
|
file_path=doc.get("file_path", doc["_id"]),
|
||||||
status=doc["status"],
|
status=doc["status"],
|
||||||
created_at=doc.get("created_at"),
|
created_at=doc.get("created_at"),
|
||||||
updated_at=doc.get("updated_at"),
|
updated_at=doc.get("updated_at"),
|
||||||
chunks_count=doc.get("chunks_count", -1),
|
chunks_count=doc.get("chunks_count", -1),
|
||||||
file_path=doc.get("file_path", doc["_id"]),
|
|
||||||
chunks_list=doc.get("chunks_list", []),
|
chunks_list=doc.get("chunks_list", []),
|
||||||
)
|
)
|
||||||
for doc in result
|
for doc in result
|
||||||
|
|
|
||||||
|
|
@ -1673,7 +1673,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
updated_at = self._format_datetime_with_timezone(result[0]["updated_at"])
|
updated_at = self._format_datetime_with_timezone(result[0]["updated_at"])
|
||||||
|
|
||||||
return dict(
|
return dict(
|
||||||
content=result[0]["content"],
|
# content=result[0]["content"],
|
||||||
content_length=result[0]["content_length"],
|
content_length=result[0]["content_length"],
|
||||||
content_summary=result[0]["content_summary"],
|
content_summary=result[0]["content_summary"],
|
||||||
status=result[0]["status"],
|
status=result[0]["status"],
|
||||||
|
|
@ -1713,7 +1713,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
|
|
||||||
processed_results.append(
|
processed_results.append(
|
||||||
{
|
{
|
||||||
"content": row["content"],
|
# "content": row["content"],
|
||||||
"content_length": row["content_length"],
|
"content_length": row["content_length"],
|
||||||
"content_summary": row["content_summary"],
|
"content_summary": row["content_summary"],
|
||||||
"status": row["status"],
|
"status": row["status"],
|
||||||
|
|
@ -1762,7 +1762,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
updated_at = self._format_datetime_with_timezone(element["updated_at"])
|
updated_at = self._format_datetime_with_timezone(element["updated_at"])
|
||||||
|
|
||||||
docs_by_status[element["id"]] = DocProcessingStatus(
|
docs_by_status[element["id"]] = DocProcessingStatus(
|
||||||
content=element["content"],
|
# content=element["content"],
|
||||||
content_summary=element["content_summary"],
|
content_summary=element["content_summary"],
|
||||||
content_length=element["content_length"],
|
content_length=element["content_length"],
|
||||||
status=element["status"],
|
status=element["status"],
|
||||||
|
|
@ -1845,10 +1845,9 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
|
|
||||||
# Modified SQL to include created_at, updated_at, and chunks_list in both INSERT and UPDATE operations
|
# Modified SQL to include created_at, updated_at, and chunks_list in both INSERT and UPDATE operations
|
||||||
# All fields are updated from the input data in both INSERT and UPDATE cases
|
# All fields are updated from the input data in both INSERT and UPDATE cases
|
||||||
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at)
|
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content_summary,content_length,chunks_count,status,file_path,chunks_list,created_at,updated_at)
|
||||||
values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)
|
values($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
||||||
on conflict(id,workspace) do update set
|
on conflict(id,workspace) do update set
|
||||||
content = EXCLUDED.content,
|
|
||||||
content_summary = EXCLUDED.content_summary,
|
content_summary = EXCLUDED.content_summary,
|
||||||
content_length = EXCLUDED.content_length,
|
content_length = EXCLUDED.content_length,
|
||||||
chunks_count = EXCLUDED.chunks_count,
|
chunks_count = EXCLUDED.chunks_count,
|
||||||
|
|
@ -1868,7 +1867,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
{
|
{
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.db.workspace,
|
||||||
"id": k,
|
"id": k,
|
||||||
"content": v["content"],
|
# "content": v["content"],
|
||||||
"content_summary": v["content_summary"],
|
"content_summary": v["content_summary"],
|
||||||
"content_length": v["content_length"],
|
"content_length": v["content_length"],
|
||||||
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
||||||
|
|
@ -3364,6 +3363,7 @@ TABLES = {
|
||||||
CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id)
|
CONSTRAINT LIGHTRAG_LLM_CACHE_PK PRIMARY KEY (workspace, mode, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
|
# content column in LIGHTRAG_DOC_STATUS is deprecated, use the same column in LIGHTRAG_DOC_FULL instead
|
||||||
"LIGHTRAG_DOC_STATUS": {
|
"LIGHTRAG_DOC_STATUS": {
|
||||||
"ddl": """CREATE TABLE LIGHTRAG_DOC_STATUS (
|
"ddl": """CREATE TABLE LIGHTRAG_DOC_STATUS (
|
||||||
workspace varchar(255) NOT NULL,
|
workspace varchar(255) NOT NULL,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue