Remove legacy storage implementations and deprecated examples: - Delete FAISS, JSON, Memgraph, Milvus, MongoDB, Nano Vector DB, Neo4j, NetworkX, Qdrant, Redis storage backends - Remove Kubernetes deployment manifests and installation scripts - Delete unofficial examples for deprecated backends and offline deployment docs Streamline core infrastructure: - Consolidate storage layer to PostgreSQL-only implementation - Add full-text search caching with FTS cache module - Implement metrics collection and monitoring pipeline - Add explain and metrics API routes Modernize frontend and tooling: - Switch web UI to Bun with bun.lock, remove npm and pnpm lockfiles - Update Dockerfile for PostgreSQL-only deployment - Add Makefile for common development tasks - Update environment and configuration examples Enhance evaluation and testing capabilities: - Add prompt optimization with DSPy and auto-tuning - Implement ground truth regeneration and variant testing - Add prompt debugging and response comparison utilities - Expand test coverage with new integration scenarios Simplify dependencies and configuration: - Remove offline-specific requirement files - Update pyproject.toml with streamlined dependencies - Add Python version pinning with .python-version - Create project guidelines in CLAUDE.md and AGENTS.md
117 lines
4.1 KiB
Python
117 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Monitor LightRAG pipeline processing status with timeouts and retries."""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import time
|
|
from typing import Any
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _fetch_json(url: str, timeout: float) -> dict[str, Any]:
|
|
resp = requests.get(url, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
def monitor(
|
|
api_url: str,
|
|
poll_interval: float = 10.0,
|
|
request_timeout: float = 10.0,
|
|
timeout_seconds: float = 600.0,
|
|
max_retries: int = 5,
|
|
) -> int:
|
|
"""
|
|
Poll the pipeline status endpoint until complete or timeout.
|
|
|
|
Returns exit code: 0 success, 1 timeout, 2 status retries exceeded, 3 doc fetch failed.
|
|
"""
|
|
logger.info('Monitoring LightRAG pipeline... api_url=%s', api_url)
|
|
start = time.time()
|
|
consecutive_errors = 0
|
|
|
|
while True:
|
|
elapsed = time.time() - start
|
|
if elapsed > timeout_seconds:
|
|
logger.warning('Monitoring timed out after %.0fs', elapsed)
|
|
return 1
|
|
|
|
try:
|
|
status = _fetch_json(f'{api_url}/documents/pipeline_status', timeout=request_timeout)
|
|
except requests.RequestException as e:
|
|
consecutive_errors += 1
|
|
logger.error('Failed to fetch pipeline status (%d/%d): %s', consecutive_errors, max_retries, e)
|
|
if consecutive_errors >= max_retries:
|
|
return 2
|
|
time.sleep(poll_interval)
|
|
continue
|
|
except ValueError as e:
|
|
consecutive_errors += 1
|
|
logger.error('Invalid JSON from pipeline status (%d/%d): %s', consecutive_errors, max_retries, e)
|
|
if consecutive_errors >= max_retries:
|
|
return 2
|
|
time.sleep(poll_interval)
|
|
continue
|
|
|
|
consecutive_errors = 0
|
|
busy = bool(status.get('busy', False))
|
|
pending = bool(status.get('request_pending', False))
|
|
msg = str(status.get('latest_message', ''))[:80]
|
|
batch = f'{status.get("cur_batch", 0)}/{status.get("batchs", 0)}'
|
|
logger.info('[%s] batch=%s busy=%s pending=%s | %s', time.strftime('%H:%M:%S'), batch, busy, pending, msg)
|
|
|
|
if not busy and not pending:
|
|
try:
|
|
docs = _fetch_json(f'{api_url}/documents', timeout=request_timeout)
|
|
doc_count = len(docs.get('documents', []))
|
|
logger.info('Pipeline complete. Documents indexed: %d', doc_count)
|
|
return 0
|
|
except (requests.RequestException, ValueError) as e:
|
|
logger.error('Pipeline finished but failed to fetch documents: %s', e)
|
|
return 3
|
|
|
|
time.sleep(poll_interval)
|
|
|
|
|
|
def _parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description='Monitor LightRAG pipeline processing status.')
|
|
parser.add_argument(
|
|
'--api-url',
|
|
default=os.getenv('API_URL', 'http://localhost:9621'),
|
|
help='Base URL of the LightRAG API (default: env API_URL or http://localhost:9621)',
|
|
)
|
|
parser.add_argument('--interval', type=float, default=10.0, help='Polling interval in seconds (default: 10)')
|
|
parser.add_argument('--request-timeout', type=float, default=10.0, help='Per-request timeout in seconds')
|
|
parser.add_argument('--timeout', type=float, default=600.0, help='Overall timeout in seconds (default: 600)')
|
|
parser.add_argument(
|
|
'--max-retries',
|
|
type=int,
|
|
default=5,
|
|
help='Maximum consecutive request failures before exiting (default: 5)',
|
|
)
|
|
parser.add_argument(
|
|
'--log-level',
|
|
default=os.getenv('LOG_LEVEL', 'INFO'),
|
|
help='Logging level (DEBUG, INFO, WARNING, ERROR) (default: INFO)',
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = _parse_args()
|
|
logging.basicConfig(
|
|
level=getattr(logging, str(args.log_level).upper(), logging.INFO),
|
|
format='%(asctime)s %(levelname)s %(name)s: %(message)s',
|
|
)
|
|
exit_code = monitor(
|
|
api_url=args.api_url,
|
|
poll_interval=args.interval,
|
|
request_timeout=args.request_timeout,
|
|
timeout_seconds=args.timeout,
|
|
max_retries=args.max_retries,
|
|
)
|
|
raise SystemExit(exit_code)
|