Remove content fallback and standardize doc status handling
- Remove content_summary fallback logic - Standardize doc status processing - Handle missing file_path consistently
This commit is contained in:
parent
24c36d876c
commit
92bbb7a1b3
2 changed files with 13 additions and 16 deletions
|
|
@ -95,9 +95,6 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|||
try:
|
||||
# Make a copy of the data to avoid modifying the original
|
||||
data = v.copy()
|
||||
# If content is missing, use content_summary as content
|
||||
if "content" not in data and "content_summary" in data:
|
||||
data["content"] = data["content_summary"]
|
||||
# If file_path is not in data, use document id as file path
|
||||
if "file_path" not in data:
|
||||
data["file_path"] = "no-file-path"
|
||||
|
|
|
|||
|
|
@ -372,19 +372,19 @@ class MongoDocStatusStorage(DocStatusStorage):
|
|||
"""Get all documents with a specific status"""
|
||||
cursor = self._data.find({"status": status.value})
|
||||
result = await cursor.to_list()
|
||||
return {
|
||||
doc["_id"]: DocProcessingStatus(
|
||||
content_summary=doc.get("content_summary"),
|
||||
content_length=doc["content_length"],
|
||||
file_path=doc.get("file_path", doc["_id"]),
|
||||
status=doc["status"],
|
||||
created_at=doc.get("created_at"),
|
||||
updated_at=doc.get("updated_at"),
|
||||
chunks_count=doc.get("chunks_count", -1),
|
||||
chunks_list=doc.get("chunks_list", []),
|
||||
)
|
||||
for doc in result
|
||||
}
|
||||
processed_result = {}
|
||||
for doc in result:
|
||||
try:
|
||||
# Make a copy of the data to avoid modifying the original
|
||||
data = doc.copy()
|
||||
# If file_path is not in data, use document id as file path
|
||||
if "file_path" not in data:
|
||||
data["file_path"] = "no-file-path"
|
||||
processed_result[doc["_id"]] = DocProcessingStatus(**data)
|
||||
except KeyError as e:
|
||||
logger.error(f"Missing required field for document {doc['_id']}: {e}")
|
||||
continue
|
||||
return processed_result
|
||||
|
||||
async def index_done_callback(self) -> None:
|
||||
# Mongo handles persistence automatically
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue