diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 556c7ac9..9dfaa710 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -492,6 +492,150 @@ class TrackStatusResponse(BaseModel): } +class DocumentsRequest(BaseModel): + """Request model for paginated document queries + + Attributes: + status_filter: Filter by document status, None for all statuses + page: Page number (1-based) + page_size: Number of documents per page (10-200) + sort_field: Field to sort by ('created_at', 'updated_at', 'id') + sort_direction: Sort direction ('asc' or 'desc') + """ + + status_filter: Optional[DocStatus] = Field( + default=None, description="Filter by document status, None for all statuses" + ) + page: int = Field(default=1, ge=1, description="Page number (1-based)") + page_size: int = Field( + default=50, ge=10, le=200, description="Number of documents per page (10-200)" + ) + sort_field: Literal["created_at", "updated_at", "id"] = Field( + default="updated_at", description="Field to sort by" + ) + sort_direction: Literal["asc", "desc"] = Field( + default="desc", description="Sort direction" + ) + + class Config: + json_schema_extra = { + "example": { + "status_filter": "PROCESSED", + "page": 1, + "page_size": 50, + "sort_field": "updated_at", + "sort_direction": "desc", + } + } + + +class PaginationInfo(BaseModel): + """Pagination information + + Attributes: + page: Current page number + page_size: Number of items per page + total_count: Total number of items + total_pages: Total number of pages + has_next: Whether there is a next page + has_prev: Whether there is a previous page + """ + + page: int = Field(description="Current page number") + page_size: int = Field(description="Number of items per page") + total_count: int = Field(description="Total number of items") + total_pages: int = Field(description="Total number of pages") + has_next: bool = Field(description="Whether there is a next page") + has_prev: bool = Field(description="Whether there is a previous page") + + class Config: + json_schema_extra = { + "example": { + "page": 1, + "page_size": 50, + "total_count": 150, + "total_pages": 3, + "has_next": True, + "has_prev": False, + } + } + + +class PaginatedDocsResponse(BaseModel): + """Response model for paginated document queries + + Attributes: + documents: List of documents for the current page + pagination: Pagination information + status_counts: Count of documents by status for all documents + """ + + documents: List[DocStatusResponse] = Field( + description="List of documents for the current page" + ) + pagination: PaginationInfo = Field(description="Pagination information") + status_counts: Dict[str, int] = Field( + description="Count of documents by status for all documents" + ) + + class Config: + json_schema_extra = { + "example": { + "documents": [ + { + "id": "doc_123456", + "content_summary": "Research paper on machine learning", + "content_length": 15240, + "status": "PROCESSED", + "created_at": "2025-03-31T12:34:56", + "updated_at": "2025-03-31T12:35:30", + "track_id": "upload_20250729_170612_abc123", + "chunks_count": 12, + "error_msg": None, + "metadata": {"author": "John Doe", "year": 2025}, + "file_path": "research_paper.pdf", + } + ], + "pagination": { + "page": 1, + "page_size": 50, + "total_count": 150, + "total_pages": 3, + "has_next": True, + "has_prev": False, + }, + "status_counts": { + "PENDING": 10, + "PROCESSING": 5, + "PROCESSED": 130, + "FAILED": 5, + }, + } + } + + +class StatusCountsResponse(BaseModel): + """Response model for document status counts + + Attributes: + status_counts: Count of documents by status + """ + + status_counts: Dict[str, int] = Field(description="Count of documents by status") + + class Config: + json_schema_extra = { + "example": { + "status_counts": { + "PENDING": 10, + "PROCESSING": 5, + "PROCESSED": 130, + "FAILED": 5, + } + } + } + + class PipelineStatusResponse(BaseModel): """Response model for pipeline status @@ -1863,4 +2007,118 @@ def create_document_routes( logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) + @router.post( + "/paginated", + response_model=PaginatedDocsResponse, + dependencies=[Depends(combined_auth)], + ) + async def get_documents_paginated( + request: DocumentsRequest, + ) -> PaginatedDocsResponse: + """ + Get documents with pagination support. + + This endpoint retrieves documents with pagination, filtering, and sorting capabilities. + It provides better performance for large document collections by loading only the + requested page of data. + + Args: + request (DocumentsRequest): The request body containing pagination parameters + + Returns: + PaginatedDocsResponse: A response object containing: + - documents: List of documents for the current page + - pagination: Pagination information (page, total_count, etc.) + - status_counts: Count of documents by status for all documents + + Raises: + HTTPException: If an error occurs while retrieving documents (500). + """ + try: + # Get paginated documents and status counts in parallel + docs_task = rag.doc_status.get_docs_paginated( + status_filter=request.status_filter, + page=request.page, + page_size=request.page_size, + sort_field=request.sort_field, + sort_direction=request.sort_direction, + ) + status_counts_task = rag.doc_status.get_all_status_counts() + + # Execute both queries in parallel + (documents_with_ids, total_count), status_counts = await asyncio.gather( + docs_task, status_counts_task + ) + + # Convert documents to response format + doc_responses = [] + for doc_id, doc in documents_with_ids: + doc_responses.append( + DocStatusResponse( + id=doc_id, + content_summary=doc.content_summary, + content_length=doc.content_length, + status=doc.status, + created_at=format_datetime(doc.created_at), + updated_at=format_datetime(doc.updated_at), + track_id=doc.track_id, + chunks_count=doc.chunks_count, + error_msg=doc.error_msg, + metadata=doc.metadata, + file_path=doc.file_path, + ) + ) + + # Calculate pagination info + total_pages = (total_count + request.page_size - 1) // request.page_size + has_next = request.page < total_pages + has_prev = request.page > 1 + + pagination = PaginationInfo( + page=request.page, + page_size=request.page_size, + total_count=total_count, + total_pages=total_pages, + has_next=has_next, + has_prev=has_prev, + ) + + return PaginatedDocsResponse( + documents=doc_responses, + pagination=pagination, + status_counts=status_counts, + ) + + except Exception as e: + logger.error(f"Error getting paginated documents: {str(e)}") + logger.error(traceback.format_exc()) + raise HTTPException(status_code=500, detail=str(e)) + + @router.get( + "/status_counts", + response_model=StatusCountsResponse, + dependencies=[Depends(combined_auth)], + ) + async def get_document_status_counts() -> StatusCountsResponse: + """ + Get counts of documents by status. + + This endpoint retrieves the count of documents in each processing status + (PENDING, PROCESSING, PROCESSED, FAILED) for all documents in the system. + + Returns: + StatusCountsResponse: A response object containing status counts + + Raises: + HTTPException: If an error occurs while retrieving status counts (500). + """ + try: + status_counts = await rag.doc_status.get_all_status_counts() + return StatusCountsResponse(status_counts=status_counts) + + except Exception as e: + logger.error(f"Error getting document status counts: {str(e)}") + logger.error(traceback.format_exc()) + raise HTTPException(status_code=500, detail=str(e)) + return router diff --git a/lightrag/base.py b/lightrag/base.py index f98c11d4..35d5caae 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -673,6 +673,36 @@ class DocStatusStorage(BaseKVStorage, ABC): ) -> dict[str, DocProcessingStatus]: """Get all documents with a specific track_id""" + @abstractmethod + async def get_docs_paginated( + self, + status_filter: DocStatus | None = None, + page: int = 1, + page_size: int = 50, + sort_field: str = "updated_at", + sort_direction: str = "desc", + ) -> tuple[list[tuple[str, DocProcessingStatus]], int]: + """Get documents with pagination support + + Args: + status_filter: Filter by document status, None for all statuses + page: Page number (1-based) + page_size: Number of documents per page (10-200) + sort_field: Field to sort by ('created_at', 'updated_at', 'id') + sort_direction: Sort direction ('asc' or 'desc') + + Returns: + Tuple of (list of (doc_id, DocProcessingStatus) tuples, total_count) + """ + + @abstractmethod + async def get_all_status_counts(self) -> dict[str, int]: + """Get counts of documents in each status for all documents + + Returns: + Dictionary mapping status names to counts + """ + async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool: """Drop cache is not supported for Doc Status storage""" return False diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 404dc260..f183a358 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -173,6 +173,111 @@ class JsonDocStatusStorage(DocStatusStorage): async with self._storage_lock: return self._data.get(id) + async def get_docs_paginated( + self, + status_filter: DocStatus | None = None, + page: int = 1, + page_size: int = 50, + sort_field: str = "updated_at", + sort_direction: str = "desc", + ) -> tuple[list[tuple[str, DocProcessingStatus]], int]: + """Get documents with pagination support + + Args: + status_filter: Filter by document status, None for all statuses + page: Page number (1-based) + page_size: Number of documents per page (10-200) + sort_field: Field to sort by ('created_at', 'updated_at', 'id') + sort_direction: Sort direction ('asc' or 'desc') + + Returns: + Tuple of (list of (doc_id, DocProcessingStatus) tuples, total_count) + """ + # Validate parameters + if page < 1: + page = 1 + if page_size < 10: + page_size = 10 + elif page_size > 200: + page_size = 200 + + if sort_field not in ["created_at", "updated_at", "id"]: + sort_field = "updated_at" + + if sort_direction.lower() not in ["asc", "desc"]: + sort_direction = "desc" + + # For JSON storage, we load all data and sort/filter in memory + all_docs = [] + + async with self._storage_lock: + for doc_id, doc_data in self._data.items(): + # Apply status filter + if ( + status_filter is not None + and doc_data.get("status") != status_filter.value + ): + continue + + try: + # Prepare document data + data = doc_data.copy() + data.pop("content", None) + if "file_path" not in data: + data["file_path"] = "no-file-path" + if "metadata" not in data: + data["metadata"] = {} + if "error_msg" not in data: + data["error_msg"] = None + + doc_status = DocProcessingStatus(**data) + + # Add sort key for sorting + if sort_field == "id": + doc_status._sort_key = doc_id + else: + doc_status._sort_key = getattr(doc_status, sort_field, "") + + all_docs.append((doc_id, doc_status)) + + except KeyError as e: + logger.error(f"Error processing document {doc_id}: {e}") + continue + + # Sort documents + reverse_sort = sort_direction.lower() == "desc" + all_docs.sort( + key=lambda x: getattr(x[1], "_sort_key", ""), reverse=reverse_sort + ) + + # Remove sort key from documents + for doc_id, doc in all_docs: + if hasattr(doc, "_sort_key"): + delattr(doc, "_sort_key") + + total_count = len(all_docs) + + # Apply pagination + start_idx = (page - 1) * page_size + end_idx = start_idx + page_size + paginated_docs = all_docs[start_idx:end_idx] + + return paginated_docs, total_count + + async def get_all_status_counts(self) -> dict[str, int]: + """Get counts of documents in each status for all documents + + Returns: + Dictionary mapping status names to counts, including 'all' field + """ + counts = await self.get_status_counts() + + # Add 'all' field with total count + total_count = sum(counts.values()) + counts["all"] = total_count + + return counts + async def delete(self, doc_ids: list[str]) -> None: """Delete specific records from storage by their IDs diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index e0d48e27..2cdd04ca 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -325,6 +325,9 @@ class MongoDocStatusStorage(DocStatusStorage): # Create track_id index for better query performance await self.create_track_id_index_if_not_exists() + # Create pagination indexes for better query performance + await self.create_pagination_indexes_if_not_exists() + logger.debug(f"Use MongoDB as DocStatus {self._collection_name}") async def finalize(self): @@ -481,6 +484,155 @@ class MongoDocStatusStorage(DocStatusStorage): f"Error creating track_id index for {self._collection_name}: {e}" ) + async def create_pagination_indexes_if_not_exists(self): + """Create indexes to optimize pagination queries""" + try: + indexes_cursor = await self._data.list_indexes() + existing_indexes = await indexes_cursor.to_list(length=None) + + # Define indexes needed for pagination + pagination_indexes = [ + { + "name": "status_updated_at", + "keys": [("status", 1), ("updated_at", -1)], + }, + { + "name": "status_created_at", + "keys": [("status", 1), ("created_at", -1)], + }, + {"name": "updated_at", "keys": [("updated_at", -1)]}, + {"name": "created_at", "keys": [("created_at", -1)]}, + {"name": "id", "keys": [("_id", 1)]}, + ] + + # Check which indexes already exist + existing_index_names = {idx.get("name", "") for idx in existing_indexes} + + for index_info in pagination_indexes: + index_name = index_info["name"] + if index_name not in existing_index_names: + await self._data.create_index(index_info["keys"], name=index_name) + logger.info( + f"Created pagination index '{index_name}' for collection {self._collection_name}" + ) + else: + logger.debug( + f"Pagination index '{index_name}' already exists for collection {self._collection_name}" + ) + + except PyMongoError as e: + logger.error( + f"Error creating pagination indexes for {self._collection_name}: {e}" + ) + + async def get_docs_paginated( + self, + status_filter: DocStatus | None = None, + page: int = 1, + page_size: int = 50, + sort_field: str = "updated_at", + sort_direction: str = "desc", + ) -> tuple[list[tuple[str, DocProcessingStatus]], int]: + """Get documents with pagination support + + Args: + status_filter: Filter by document status, None for all statuses + page: Page number (1-based) + page_size: Number of documents per page (10-200) + sort_field: Field to sort by ('created_at', 'updated_at', '_id') + sort_direction: Sort direction ('asc' or 'desc') + + Returns: + Tuple of (list of (doc_id, DocProcessingStatus) tuples, total_count) + """ + # Validate parameters + if page < 1: + page = 1 + if page_size < 10: + page_size = 10 + elif page_size > 200: + page_size = 200 + + if sort_field not in ["created_at", "updated_at", "_id"]: + sort_field = "updated_at" + + if sort_direction.lower() not in ["asc", "desc"]: + sort_direction = "desc" + + # Build query filter + query_filter = {} + if status_filter is not None: + query_filter["status"] = status_filter.value + + # Get total count + total_count = await self._data.count_documents(query_filter) + + # Calculate skip value + skip = (page - 1) * page_size + + # Build sort criteria + sort_direction_value = 1 if sort_direction.lower() == "asc" else -1 + sort_criteria = [(sort_field, sort_direction_value)] + + # Query for paginated data + cursor = ( + self._data.find(query_filter) + .sort(sort_criteria) + .skip(skip) + .limit(page_size) + ) + result = await cursor.to_list(length=page_size) + + # Convert to (doc_id, DocProcessingStatus) tuples + documents = [] + for doc in result: + try: + doc_id = doc["_id"] + + # Make a copy of the data to avoid modifying the original + data = doc.copy() + # Remove deprecated content field if it exists + data.pop("content", None) + # Remove MongoDB _id field if it exists + data.pop("_id", None) + # If file_path is not in data, use document id as file path + if "file_path" not in data: + data["file_path"] = "no-file-path" + # Ensure new fields exist with default values + if "metadata" not in data: + data["metadata"] = {} + if "error_msg" not in data: + data["error_msg"] = None + + doc_status = DocProcessingStatus(**data) + documents.append((doc_id, doc_status)) + except KeyError as e: + logger.error(f"Missing required field for document {doc['_id']}: {e}") + continue + + return documents, total_count + + async def get_all_status_counts(self) -> dict[str, int]: + """Get counts of documents in each status for all documents + + Returns: + Dictionary mapping status names to counts, including 'all' field + """ + pipeline = [{"$group": {"_id": "$status", "count": {"$sum": 1}}}] + cursor = self._data.aggregate(pipeline, allowDiskUse=True) + result = await cursor.to_list() + + counts = {} + total_count = 0 + for doc in result: + counts[doc["_id"]] = doc["count"] + total_count += doc["count"] + + # Add 'all' field with total count + counts["all"] = total_count + + return counts + @final @dataclass diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 1b5a7078..a1cc19c9 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -914,6 +914,64 @@ class PostgreSQLDB: f"PostgreSQL, Failed to migrate doc status metadata/error_msg fields: {e}" ) + # Create pagination optimization indexes for LIGHTRAG_DOC_STATUS + try: + await self._create_pagination_indexes() + except Exception as e: + logger.error(f"PostgreSQL, Failed to create pagination indexes: {e}") + + async def _create_pagination_indexes(self): + """Create indexes to optimize pagination queries for LIGHTRAG_DOC_STATUS""" + indexes = [ + { + "name": "idx_lightrag_doc_status_workspace_status_updated_at", + "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_status_updated_at ON LIGHTRAG_DOC_STATUS (workspace, status, updated_at DESC)", + "description": "Composite index for workspace + status + updated_at pagination", + }, + { + "name": "idx_lightrag_doc_status_workspace_status_created_at", + "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_status_created_at ON LIGHTRAG_DOC_STATUS (workspace, status, created_at DESC)", + "description": "Composite index for workspace + status + created_at pagination", + }, + { + "name": "idx_lightrag_doc_status_workspace_updated_at", + "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_updated_at ON LIGHTRAG_DOC_STATUS (workspace, updated_at DESC)", + "description": "Index for workspace + updated_at pagination (all statuses)", + }, + { + "name": "idx_lightrag_doc_status_workspace_created_at", + "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_created_at ON LIGHTRAG_DOC_STATUS (workspace, created_at DESC)", + "description": "Index for workspace + created_at pagination (all statuses)", + }, + { + "name": "idx_lightrag_doc_status_workspace_id", + "sql": "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_lightrag_doc_status_workspace_id ON LIGHTRAG_DOC_STATUS (workspace, id)", + "description": "Index for workspace + id sorting", + }, + ] + + for index in indexes: + try: + # Check if index already exists + check_sql = """ + SELECT indexname + FROM pg_indexes + WHERE tablename = 'lightrag_doc_status' + AND indexname = $1 + """ + + existing = await self.query(check_sql, {"indexname": index["name"]}) + + if not existing: + logger.info(f"Creating pagination index: {index['description']}") + await self.execute(index["sql"]) + logger.info(f"Successfully created index: {index['name']}") + else: + logger.debug(f"Index already exists: {index['name']}") + + except Exception as e: + logger.warning(f"Failed to create index {index['name']}: {e}") + async def query( self, sql: str, @@ -1980,6 +2038,141 @@ class PGDocStatusStorage(DocStatusStorage): return docs_by_track_id + async def get_docs_paginated( + self, + status_filter: DocStatus | None = None, + page: int = 1, + page_size: int = 50, + sort_field: str = "updated_at", + sort_direction: str = "desc", + ) -> tuple[list[tuple[str, DocProcessingStatus]], int]: + """Get documents with pagination support + + Args: + status_filter: Filter by document status, None for all statuses + page: Page number (1-based) + page_size: Number of documents per page (10-200) + sort_field: Field to sort by ('created_at', 'updated_at', 'id') + sort_direction: Sort direction ('asc' or 'desc') + + Returns: + Tuple of (list of (doc_id, DocProcessingStatus) tuples, total_count) + """ + # Validate parameters + if page < 1: + page = 1 + if page_size < 10: + page_size = 10 + elif page_size > 200: + page_size = 200 + + if sort_field not in ["created_at", "updated_at", "id"]: + sort_field = "updated_at" + + if sort_direction.lower() not in ["asc", "desc"]: + sort_direction = "desc" + + # Calculate offset + offset = (page - 1) * page_size + + # Build WHERE clause + where_clause = "WHERE workspace=$1" + params = {"workspace": self.db.workspace} + param_count = 1 + + if status_filter is not None: + param_count += 1 + where_clause += f" AND status=${param_count}" + params["status"] = status_filter.value + + # Build ORDER BY clause + order_clause = f"ORDER BY {sort_field} {sort_direction.upper()}" + + # Query for total count + count_sql = f"SELECT COUNT(*) as total FROM LIGHTRAG_DOC_STATUS {where_clause}" + count_result = await self.db.query(count_sql, params) + total_count = count_result["total"] if count_result else 0 + + # Query for paginated data + data_sql = f""" + SELECT * FROM LIGHTRAG_DOC_STATUS + {where_clause} + {order_clause} + LIMIT ${param_count + 1} OFFSET ${param_count + 2} + """ + params["limit"] = page_size + params["offset"] = offset + + result = await self.db.query(data_sql, params, True) + + # Convert to (doc_id, DocProcessingStatus) tuples + documents = [] + for element in result: + doc_id = element["id"] + + # Parse chunks_list JSON string back to list + chunks_list = element.get("chunks_list", []) + if isinstance(chunks_list, str): + try: + chunks_list = json.loads(chunks_list) + except json.JSONDecodeError: + chunks_list = [] + + # Parse metadata JSON string back to dict + metadata = element.get("metadata", {}) + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError: + metadata = {} + + # Convert datetime objects to ISO format strings with timezone info + created_at = self._format_datetime_with_timezone(element["created_at"]) + updated_at = self._format_datetime_with_timezone(element["updated_at"]) + + doc_status = DocProcessingStatus( + content_summary=element["content_summary"], + content_length=element["content_length"], + status=element["status"], + created_at=created_at, + updated_at=updated_at, + chunks_count=element["chunks_count"], + file_path=element["file_path"], + chunks_list=chunks_list, + track_id=element.get("track_id"), + metadata=metadata, + error_msg=element.get("error_msg"), + ) + documents.append((doc_id, doc_status)) + + return documents, total_count + + async def get_all_status_counts(self) -> dict[str, int]: + """Get counts of documents in each status for all documents + + Returns: + Dictionary mapping status names to counts, including 'all' field + """ + sql = """ + SELECT status, COUNT(*) as count + FROM LIGHTRAG_DOC_STATUS + WHERE workspace=$1 + GROUP BY status + """ + params = {"workspace": self.db.workspace} + result = await self.db.query(sql, params, True) + + counts = {} + total_count = 0 + for row in result: + counts[row["status"]] = row["count"] + total_count += row["count"] + + # Add 'all' field with total count + counts["all"] = total_count + + return counts + async def index_done_callback(self) -> None: # PG handles persistence automatically pass diff --git a/lightrag/kg/redis_impl.py b/lightrag/kg/redis_impl.py index 536f1a85..2d9c4383 100644 --- a/lightrag/kg/redis_impl.py +++ b/lightrag/kg/redis_impl.py @@ -919,6 +919,142 @@ class RedisDocStatusStorage(DocStatusStorage): f"Deleted {deleted_count} of {len(doc_ids)} doc status entries from {self.namespace}" ) + async def get_docs_paginated( + self, + status_filter: DocStatus | None = None, + page: int = 1, + page_size: int = 50, + sort_field: str = "updated_at", + sort_direction: str = "desc", + ) -> tuple[list[tuple[str, DocProcessingStatus]], int]: + """Get documents with pagination support + + Args: + status_filter: Filter by document status, None for all statuses + page: Page number (1-based) + page_size: Number of documents per page (10-200) + sort_field: Field to sort by ('created_at', 'updated_at', 'id') + sort_direction: Sort direction ('asc' or 'desc') + + Returns: + Tuple of (list of (doc_id, DocProcessingStatus) tuples, total_count) + """ + # Validate parameters + if page < 1: + page = 1 + if page_size < 10: + page_size = 10 + elif page_size > 200: + page_size = 200 + + if sort_field not in ["created_at", "updated_at", "id"]: + sort_field = "updated_at" + + if sort_direction.lower() not in ["asc", "desc"]: + sort_direction = "desc" + + # For Redis, we need to load all data and sort/filter in memory + all_docs = [] + total_count = 0 + + async with self._get_redis_connection() as redis: + try: + # Use SCAN to iterate through all keys in the namespace + cursor = 0 + while True: + cursor, keys = await redis.scan( + cursor, match=f"{self.namespace}:*", count=1000 + ) + if keys: + # Get all values in batch + pipe = redis.pipeline() + for key in keys: + pipe.get(key) + values = await pipe.execute() + + # Process documents + for key, value in zip(keys, values): + if value: + try: + doc_data = json.loads(value) + + # Apply status filter + if ( + status_filter is not None + and doc_data.get("status") + != status_filter.value + ): + continue + + # Extract document ID from key + doc_id = key.split(":", 1)[1] + + # Prepare document data + data = doc_data.copy() + data.pop("content", None) + if "file_path" not in data: + data["file_path"] = "no-file-path" + if "metadata" not in data: + data["metadata"] = {} + if "error_msg" not in data: + data["error_msg"] = None + + # Add sort key for sorting + if sort_field == "id": + data["_sort_key"] = doc_id + else: + data["_sort_key"] = data.get(sort_field, "") + + doc_status = DocProcessingStatus(**data) + all_docs.append((doc_id, doc_status)) + + except (json.JSONDecodeError, KeyError) as e: + logger.error( + f"Error processing document {key}: {e}" + ) + continue + + if cursor == 0: + break + + except Exception as e: + logger.error(f"Error getting paginated docs: {e}") + return [], 0 + + # Sort documents + reverse_sort = sort_direction.lower() == "desc" + all_docs.sort( + key=lambda x: getattr(x[1], "_sort_key", ""), reverse=reverse_sort + ) + + # Remove sort key from documents + for doc_id, doc in all_docs: + if hasattr(doc, "_sort_key"): + delattr(doc, "_sort_key") + + total_count = len(all_docs) + + # Apply pagination + start_idx = (page - 1) * page_size + end_idx = start_idx + page_size + paginated_docs = all_docs[start_idx:end_idx] + + return paginated_docs, total_count + + async def get_all_status_counts(self) -> dict[str, int]: + """Get counts of documents in each status for all documents + + Returns: + Dictionary mapping status names to counts, including 'all' field + """ + counts = await self.get_status_counts() + + # Add 'all' field with total count + total_count = sum(counts.values()) + counts["all"] = total_count + + return counts + async def drop(self) -> dict[str, str]: """Drop all document status data from storage and clean up resources""" try: diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts index f7ba0de0..187db25e 100644 --- a/lightrag_webui/src/api/lightrag.ts +++ b/lightrag_webui/src/api/lightrag.ts @@ -185,6 +185,33 @@ export type TrackStatusResponse = { status_summary: Record } +export type DocumentsRequest = { + status_filter?: DocStatus | null + page: number + page_size: number + sort_field: 'created_at' | 'updated_at' | 'id' + sort_direction: 'asc' | 'desc' +} + +export type PaginationInfo = { + page: number + page_size: number + total_count: number + total_pages: number + has_next: boolean + has_prev: boolean +} + +export type PaginatedDocsResponse = { + documents: DocStatusResponse[] + pagination: PaginationInfo + status_counts: Record +} + +export type StatusCountsResponse = { + status_counts: Record +} + export type AuthStatusResponse = { auth_configured: boolean access_token?: string @@ -714,3 +741,22 @@ export const getTrackStatus = async (trackId: string): Promise => { + const response = await axiosInstance.post('/documents/paginated', request) + return response.data +} + +/** + * Get counts of documents by status + * @returns Promise with status counts response + */ +export const getDocumentStatusCounts = async (): Promise => { + const response = await axiosInstance.get('/documents/status_counts') + return response.data +} diff --git a/lightrag_webui/src/components/ui/PaginationControls.tsx b/lightrag_webui/src/components/ui/PaginationControls.tsx new file mode 100644 index 00000000..f6ea199a --- /dev/null +++ b/lightrag_webui/src/components/ui/PaginationControls.tsx @@ -0,0 +1,259 @@ +import { useState, useEffect, useCallback } from 'react' +import { useTranslation } from 'react-i18next' +import Button from './Button' +import Input from './Input' +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from './Select' +import { cn } from '@/lib/utils' +import { ChevronLeftIcon, ChevronRightIcon, ChevronsLeftIcon, ChevronsRightIcon } from 'lucide-react' + +export type PaginationControlsProps = { + currentPage: number + totalPages: number + pageSize: number + totalCount: number + onPageChange: (page: number) => void + onPageSizeChange: (pageSize: number) => void + isLoading?: boolean + compact?: boolean + className?: string +} + +const PAGE_SIZE_OPTIONS = [ + { value: 10, label: '10' }, + { value: 20, label: '20' }, + { value: 50, label: '50' }, + { value: 100, label: '100' }, + { value: 200, label: '200' } +] + +export default function PaginationControls({ + currentPage, + totalPages, + pageSize, + totalCount, + onPageChange, + onPageSizeChange, + isLoading = false, + compact = false, + className +}: PaginationControlsProps) { + const { t } = useTranslation() + const [inputPage, setInputPage] = useState(currentPage.toString()) + + // Update input when currentPage changes + useEffect(() => { + setInputPage(currentPage.toString()) + }, [currentPage]) + + // Handle page input change with debouncing + const handlePageInputChange = useCallback((value: string) => { + setInputPage(value) + }, []) + + // Handle page input submit + const handlePageInputSubmit = useCallback(() => { + const pageNum = parseInt(inputPage, 10) + if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= totalPages) { + onPageChange(pageNum) + } else { + // Reset to current page if invalid + setInputPage(currentPage.toString()) + } + }, [inputPage, totalPages, onPageChange, currentPage]) + + // Handle page input key press + const handlePageInputKeyPress = useCallback((e: React.KeyboardEvent) => { + if (e.key === 'Enter') { + handlePageInputSubmit() + } + }, [handlePageInputSubmit]) + + // Handle page size change + const handlePageSizeChange = useCallback((value: string) => { + const newPageSize = parseInt(value, 10) + if (!isNaN(newPageSize)) { + onPageSizeChange(newPageSize) + } + }, [onPageSizeChange]) + + // Navigation handlers + const goToFirstPage = useCallback(() => { + if (currentPage > 1 && !isLoading) { + onPageChange(1) + } + }, [currentPage, onPageChange, isLoading]) + + const goToPrevPage = useCallback(() => { + if (currentPage > 1 && !isLoading) { + onPageChange(currentPage - 1) + } + }, [currentPage, onPageChange, isLoading]) + + const goToNextPage = useCallback(() => { + if (currentPage < totalPages && !isLoading) { + onPageChange(currentPage + 1) + } + }, [currentPage, totalPages, onPageChange, isLoading]) + + const goToLastPage = useCallback(() => { + if (currentPage < totalPages && !isLoading) { + onPageChange(totalPages) + } + }, [currentPage, totalPages, onPageChange, isLoading]) + + if (totalPages <= 1) { + return null + } + + if (compact) { + return ( +
+
+ + +
+ handlePageInputChange(e.target.value)} + onBlur={handlePageInputSubmit} + onKeyPress={handlePageInputKeyPress} + disabled={isLoading} + className="h-8 w-12 text-center text-sm" + /> + / {totalPages} +
+ + +
+ + +
+ ) + } + + return ( +
+
+ {t('pagination.showing', { + start: Math.min((currentPage - 1) * pageSize + 1, totalCount), + end: Math.min(currentPage * pageSize, totalCount), + total: totalCount + })} +
+ +
+
+ + + + +
+ {t('pagination.page')} + handlePageInputChange(e.target.value)} + onBlur={handlePageInputSubmit} + onKeyPress={handlePageInputKeyPress} + disabled={isLoading} + className="h-8 w-16 text-center text-sm" + /> + / {totalPages} +
+ + + + +
+ +
+ {t('pagination.pageSize')} + +
+
+
+ ) +} diff --git a/lightrag_webui/src/features/DocumentManager.tsx b/lightrag_webui/src/features/DocumentManager.tsx index 3d182280..5f5761ca 100644 --- a/lightrag_webui/src/features/DocumentManager.tsx +++ b/lightrag_webui/src/features/DocumentManager.tsx @@ -18,8 +18,17 @@ import UploadDocumentsDialog from '@/components/documents/UploadDocumentsDialog' import ClearDocumentsDialog from '@/components/documents/ClearDocumentsDialog' import DeleteDocumentsDialog from '@/components/documents/DeleteDocumentsDialog' import DeselectDocumentsDialog from '@/components/documents/DeselectDocumentsDialog' +import PaginationControls from '@/components/ui/PaginationControls' -import { getDocuments, scanNewDocuments, DocsStatusesResponse, DocStatus, DocStatusResponse } from '@/api/lightrag' +import { + scanNewDocuments, + getDocumentsPaginated, + DocsStatusesResponse, + DocStatus, + DocStatusResponse, + DocumentsRequest, + PaginationInfo +} from '@/api/lightrag' import { errorMessage } from '@/lib/utils' import { toast } from 'sonner' import { useBackendState } from '@/stores/state' @@ -164,7 +173,23 @@ export default function DocumentManager() { const { t, i18n } = useTranslation() const health = useBackendState.use.health() const pipelineBusy = useBackendState.use.pipelineBusy() + + // Legacy state for backward compatibility const [docs, setDocs] = useState(null) + + // New pagination state + const [, setCurrentPageDocs] = useState([]) + const [pagination, setPagination] = useState({ + page: 1, + page_size: 20, + total_count: 0, + total_pages: 0, + has_next: false, + has_prev: false + }) + const [statusCounts, setStatusCounts] = useState>({ all: 0 }) + const [isRefreshing, setIsRefreshing] = useState(false) + const currentTab = useSettingsStore.use.currentTab() const showFileName = useSettingsStore.use.showFileName() const setShowFileName = useSettingsStore.use.setShowFileName() @@ -176,6 +201,15 @@ export default function DocumentManager() { // State for document status filter const [statusFilter, setStatusFilter] = useState('all'); + // State to store page number for each status filter + const [pageByStatus, setPageByStatus] = useState>({ + all: 1, + processed: 1, + processing: 1, + pending: 1, + failed: 1, + }); + // State for document selection const [selectedDocIds, setSelectedDocIds] = useState([]) const isSelectionMode = selectedDocIds.length > 0 @@ -198,15 +232,23 @@ export default function DocumentManager() { // Handle sort column click const handleSort = (field: SortField) => { - if (sortField === field) { - // Toggle sort direction if clicking the same field - setSortDirection(prev => prev === 'asc' ? 'desc' : 'asc') - } else { - // Set new sort field with default desc direction - setSortField(field) - setSortDirection('desc') - } - } + const newDirection = (sortField === field && sortDirection === 'desc') ? 'asc' : 'desc'; + + setSortField(field); + setSortDirection(newDirection); + + // Reset page to 1 when sorting changes + setPagination(prev => ({ ...prev, page: 1 })); + + // Reset all status filters' page memory since sorting affects all + setPageByStatus({ + all: 1, + processed: 1, + processing: 1, + pending: 1, + failed: 1, + }); + }; // Sort documents based on current sort field and direction const sortDocuments = useCallback((documents: DocStatusResponse[]) => { @@ -373,47 +415,67 @@ export default function DocumentManager() { }; }, [docs]); - const fetchDocuments = useCallback(async () => { + // New paginated data fetching function + const fetchPaginatedDocuments = useCallback(async ( + page: number, + pageSize: number, + statusFilter: StatusFilter + ) => { try { - // Check if component is still mounted before starting the request if (!isMountedRef.current) return; - const docs = await getDocuments(); + setIsRefreshing(true); + + // Prepare request parameters + const request: DocumentsRequest = { + status_filter: statusFilter === 'all' ? null : statusFilter, + page, + page_size: pageSize, + sort_field: sortField, + sort_direction: sortDirection + }; + + const response = await getDocumentsPaginated(request); - // Check again if component is still mounted after the request completes if (!isMountedRef.current) return; - // Only update state if component is still mounted - if (isMountedRef.current) { - // Update docs state - if (docs && docs.statuses) { - const numDocuments = Object.values(docs.statuses).reduce( - (acc, status) => acc + status.length, - 0 - ) - if (numDocuments > 0) { - setDocs(docs) - } else { - setDocs(null) - } - } else { - setDocs(null) + // Update pagination state + setPagination(response.pagination); + setCurrentPageDocs(response.documents); + setStatusCounts(response.status_counts); + + // Update legacy docs state for backward compatibility + const legacyDocs: DocsStatusesResponse = { + statuses: { + processed: response.documents.filter(doc => doc.status === 'processed'), + processing: response.documents.filter(doc => doc.status === 'processing'), + pending: response.documents.filter(doc => doc.status === 'pending'), + failed: response.documents.filter(doc => doc.status === 'failed') } - } - } catch (err) { - // Only show error if component is still mounted - if (isMountedRef.current) { - toast.error(t('documentPanel.documentManager.errors.loadFailed', { error: errorMessage(err) })) - } - } - }, [setDocs, t]) + }; - // Fetch documents when the tab becomes visible - useEffect(() => { - if (currentTab === 'documents') { - fetchDocuments() + if (response.pagination.total_count > 0) { + setDocs(legacyDocs); + } else { + setDocs(null); + } + + } catch (err) { + if (isMountedRef.current) { + toast.error(t('documentPanel.documentManager.errors.loadFailed', { error: errorMessage(err) })); + } + } finally { + if (isMountedRef.current) { + setIsRefreshing(false); + } } - }, [currentTab, fetchDocuments]) + }, [sortField, sortDirection, t]); + + // Legacy fetchDocuments function for backward compatibility + const fetchDocuments = useCallback(async () => { + await fetchPaginatedDocuments(pagination.page, pagination.page_size, statusFilter); + }, [fetchPaginatedDocuments, pagination.page, pagination.page_size, statusFilter]); + const scanDocuments = useCallback(async () => { try { @@ -486,16 +548,67 @@ export default function DocumentManager() { prevStatusCounts.current = newStatusCounts }, [docs]); + // Handle page change - only update state + const handlePageChange = useCallback((newPage: number) => { + if (newPage === pagination.page) return; + + // Save the new page for current status filter + setPageByStatus(prev => ({ ...prev, [statusFilter]: newPage })); + setPagination(prev => ({ ...prev, page: newPage })); + }, [pagination.page, statusFilter]); + + // Handle page size change - only update state + const handlePageSizeChange = useCallback((newPageSize: number) => { + if (newPageSize === pagination.page_size) return; + + // Reset all status filters to page 1 when page size changes + setPageByStatus({ + all: 1, + processed: 1, + processing: 1, + pending: 1, + failed: 1, + }); + + setPagination(prev => ({ ...prev, page: 1, page_size: newPageSize })); + }, [pagination.page_size]); + + // Handle status filter change - only update state + const handleStatusFilterChange = useCallback((newStatusFilter: StatusFilter) => { + if (newStatusFilter === statusFilter) return; + + // Save current page for the current status filter + setPageByStatus(prev => ({ ...prev, [statusFilter]: pagination.page })); + + // Get the saved page for the new status filter + const newPage = pageByStatus[newStatusFilter]; + + // Update status filter and restore the saved page + setStatusFilter(newStatusFilter); + setPagination(prev => ({ ...prev, page: newPage })); + }, [statusFilter, pagination.page, pageByStatus]); + // Handle documents deleted callback const handleDocumentsDeleted = useCallback(async () => { setSelectedDocIds([]) await fetchDocuments() }, [fetchDocuments]) - // Add dependency on sort state to re-render when sort changes + + // Central effect to handle all data fetching useEffect(() => { - // This effect ensures the component re-renders when sort state changes - }, [sortField, sortDirection]); + if (currentTab === 'documents') { + fetchPaginatedDocuments(pagination.page, pagination.page_size, statusFilter); + } + }, [ + currentTab, + pagination.page, + pagination.page_size, + statusFilter, + sortField, + sortDirection, + fetchPaginatedDocuments + ]); return ( @@ -503,7 +616,7 @@ export default function DocumentManager() { {t('documentPanel.documentManager.title')} -
+
-
- {isSelectionMode && ( - 1 && ( + )} - {isSelectionMode ? ( - + {isSelectionMode && ( + + )} + {isSelectionMode ? ( + + ) : ( + + )} + + - ) : ( - - )} - - +
@@ -560,56 +689,61 @@ export default function DocumentManager() {
diff --git a/lightrag_webui/src/locales/en.json b/lightrag_webui/src/locales/en.json index 0dadc0d1..48765d5f 100644 --- a/lightrag_webui/src/locales/en.json +++ b/lightrag_webui/src/locales/en.json @@ -401,5 +401,14 @@ "description": "Please enter your API key to access the service", "placeholder": "Enter your API key", "save": "Save" + }, + "pagination": { + "showing": "Showing {{start}} to {{end}} of {{total}} entries", + "page": "Page", + "pageSize": "Page Size", + "firstPage": "First Page", + "prevPage": "Previous Page", + "nextPage": "Next Page", + "lastPage": "Last Page" } } diff --git a/lightrag_webui/src/locales/zh.json b/lightrag_webui/src/locales/zh.json index 495a684e..29b064f4 100644 --- a/lightrag_webui/src/locales/zh.json +++ b/lightrag_webui/src/locales/zh.json @@ -401,5 +401,14 @@ "description": "请输入您的 API Key 以访问服务", "placeholder": "请输入 API Key", "save": "保存" + }, + "pagination": { + "showing": "显示第 {{start}} 到 {{end}} 条,共 {{total}} 条记录", + "page": "页", + "pageSize": "每页显示", + "firstPage": "首页", + "prevPage": "上一页", + "nextPage": "下一页", + "lastPage": "末页" } } diff --git a/paging.md b/paging.md new file mode 100644 index 00000000..716b7297 --- /dev/null +++ b/paging.md @@ -0,0 +1,251 @@ +# 文档列表页面分页显示功能改造方案 + +## 一、改造目标 + +### 问题现状 +- 当前文档页面一次性加载所有文档,导致大量文档时界面加载慢 +- 前端内存占用过大,用户操作体验差 +- 状态过滤和排序都在前端进行,效率低下 + +### 改造目标 +- 实现后端分页查询,减少单次数据传输量 +- 添加分页控制组件,支持翻页和跳转功能 +- 允许用户设置每页显示行数(10-200条) +- 保持现有状态过滤和排序功能不变 +- 提升大数据量场景下的性能表现 + +## 二、总体架构设计 + +### 设计原则 +1. **统一分页接口**:后端提供统一的分页API,支持状态过滤和排序 +2. **智能刷新策略**:根据处理状态选择合适的刷新频率和范围 +3. **即时用户反馈**:状态切换、分页操作提供立即响应 +4. **向后兼容**:保持现有功能完整性,不影响现有操作流程 +5. **性能优化**:减少内存占用,优化网络请求 + +### 技术方案 +- **后端**:在现有存储层基础上添加分页查询接口 +- **前端**:改造DocumentManager组件,添加分页控制 +- **数据流**:统一分页查询 + 独立状态计数查询 + +## 三、后端改造步骤 + +### 步骤1:存储层接口扩展 + +**改动文件**:`lightrag/kg/base.py` + +**关键思路**: +- 在BaseDocStatusStorage抽象类中添加分页查询方法 +- 设计统一的分页接口,支持状态过滤、排序、分页参数 +- 返回文档列表和总数量的元组 + +**接口设计要点**: +``` +get_docs_paginated(status_filter, page, page_size, sort_field, sort_direction) -> (documents, total_count) +count_by_status(status) -> int +get_all_status_counts() -> Dict[str, int] +``` + +### 步骤2:各存储后端实现 + +**改动文件**: +- `lightrag/kg/postgres_impl.py` +- `lightrag/kg/mongo_impl.py` +- `lightrag/kg/redis_impl.py` +- `lightrag/kg/json_doc_status_impl.py` + +**PostgreSQL实现要点**: +- 使用LIMIT和OFFSET实现分页 +- 构建动态WHERE条件支持状态过滤 +- 使用COUNT查询获取总数量 +- 添加合适的数据库索引优化查询性能 + +**MongoDB实现要点**: +- 使用skip()和limit()实现分页 +- 使用聚合管道进行状态统计 +- 优化查询条件和索引 + +**Redis 与 Json实现要点:** + +* 考虑先用简单的方式实现,即把所有文件清单读到内存中后进行过滤和排序 + +**关键考虑**: + +- 确保各存储后端的分页逻辑一致性 +- 处理边界情况(空结果、超出页码范围等) +- 优化查询性能,避免全表扫描 + +### 步骤3:API路由层改造 + +**改动文件**:`lightrag/api/routers/document_routes.py` + +**新增接口**: +1. `POST /documents/paginated` - 分页查询文档 +2. `GET /documents/status_counts` - 获取状态计数 + +**数据模型设计**: +- DocumentsRequest:分页请求参数 +- PaginatedDocsResponse:分页响应数据 +- PaginationInfo:分页元信息 + +**关键逻辑**: +- 参数验证(页码范围、页面大小限制) +- 并行查询分页数据和状态计数 +- 错误处理和异常响应 + +### 步骤4:数据库优化 + +**索引策略**: +- 为workspace + status + updated_at创建复合索引 +- 为workspace + status + created_at创建复合索引 +- 为workspace + updated_at创建索引 +- 为workspace + created_at创建索引 + +**性能考虑**: +- 避免深度分页的性能问题 +- 考虑添加缓存层优化状态计数查询 +- 监控查询性能,必要时调整索引策略 + +## 四、前端改造步骤 + +### 步骤1:API客户端扩展 + +**改动文件**:`lightrag_webui/src/api/lightrag.ts` + +**新增函数**: +- `getDocumentsPaginated()` - 分页查询文档 +- `getDocumentStatusCounts()` - 获取状态计数 + +**类型定义**: +- 定义分页请求和响应的TypeScript类型 +- 确保类型安全和代码提示 + +### 步骤2:分页控制组件开发 + +**新增文件**:`lightrag_webui/src/components/ui/PaginationControls.tsx` + +**组件功能**: +- 支持紧凑模式和完整模式 +- 页码输入和跳转功能 +- 每页显示数量选择(10-200) +- 总数信息显示 +- 禁用状态处理 + +**设计要点**: +- 响应式设计,适配不同屏幕尺寸 +- 防抖处理,避免频繁请求 +- 错误处理和状态回滚 +- 组件摆放位置:目前状态按钮上方,与scan按钮同一层,居中摆放 + +### 步骤3:状态过滤按钮优化 + +**改动文件**:现有状态过滤相关组件 + +**优化要点**: + +- 添加加载状态指示 +- 数据不足时的智能提示 +- 定期刷新数据,状态切换时如果最先的状态数据距离上次刷新数据超过5秒应即时刷新数据 +- 防止重复点击和并发请求 + +### 步骤4:主组件DocumentManager改造 + +**改动文件**:`lightrag_webui/src/features/DocumentManager.tsx` + +**核心改动**: + +**状态管理重构**: +- 将docs状态改为currentPageDocs(仅存储当前页数据) +- 添加pagination状态管理分页信息 +- 添加statusCounts状态独立管理状态计数 +- 添加加载状态管理(isStatusChanging, isRefreshing) + +**数据获取策略**: +- 实现智能刷新:活跃期完整刷新,稳定期轻量刷新 +- 状态切换时立即刷新数据 +- 分页操作时立即更新数据 +- 定期刷新与手动操作协调 + +**布局调整**: +- 将分页控制组件放置在顶部操作栏中间位置 +- 保持状态过滤按钮在表格上方 +- 确保响应式布局适配 + +**事件处理优化**: +- 状态切换时,如果当前页码数据不足,则重置到第一页 +- 页面大小变更时智能计算新页码 +- 错误时状态回滚机制 + +## 五、用户体验优化 + +### 即时反馈机制 +- 状态切换时显示加载动画 +- 分页操作时提供视觉反馈 +- 数据不足时智能提示用户 + +### 错误处理策略 +- 网络错误时自动重试 +- 操作失败时状态回滚 +- 友好的错误提示信息 + +### 性能优化措施 +- 防抖处理频繁操作 +- 智能刷新策略减少不必要请求 +- 组件卸载时清理定时器和请求 + +## 六、兼容性保障 + +### 向后兼容 +- 保留原有的/documents接口作为备用 +- 现有功能(排序、过滤、选择)保持不变 +- 渐进式升级,支持配置开关 + +### 数据一致性 +- 确保分页数据与状态计数同步 +- 处理并发更新的数据一致性问题 +- 定期刷新保持数据最新 + +## 七、测试策略 + +### 功能测试 +- 各种分页场景测试 +- 状态过滤组合测试 +- 排序功能验证 +- 边界条件测试 + +### 性能测试 +- 大数据量场景测试 +- 并发访问压力测试 +- 内存使用情况监控 +- 响应时间测试 + +### 兼容性测试 +- 不同存储后端测试 +- 不同浏览器兼容性 +- 移动端响应式测试 + +## 八、关键实现细节 + +### 后端分页查询设计 +- **统一接口**:所有存储后端实现相同的分页接口签名 +- **参数验证**:严格验证页码、页面大小、排序参数的合法性 +- **性能优化**:使用数据库原生分页功能,避免应用层分页 +- **错误处理**:统一的错误响应格式和异常处理机制 + +### 前端状态管理策略 +- **数据分离**:当前页数据与状态计数分别管理 +- **智能刷新**:根据文档处理状态选择刷新策略 +- **状态同步**:确保UI状态与后端数据保持一致 +- **错误恢复**:操作失败时自动回滚到之前状态 + +### 分页控制组件设计 +- **紧凑布局**:适配顶部操作栏的空间限制 +- **响应式设计**:在不同屏幕尺寸下自适应布局 +- **交互优化**:防抖处理、加载状态、禁用状态管理 +- **可访问性**:支持键盘导航和屏幕阅读器 + +### 数据库索引优化 +- **复合索引**:workspace + status + sort_field的组合索引 +- **覆盖索引**:尽可能使用覆盖索引减少回表查询 +- **索引监控**:定期监控索引使用情况和查询性能 +- **渐进优化**:根据实际使用情况调整索引策略