LightRAG/lightrag/api/routers/metrics_routes.py
clssck 59e89772de refactor: consolidate to PostgreSQL-only backend and modernize stack
Remove legacy storage implementations and deprecated examples:
- Delete FAISS, JSON, Memgraph, Milvus, MongoDB, Nano Vector DB, Neo4j, NetworkX, Qdrant, Redis storage backends
- Remove Kubernetes deployment manifests and installation scripts
- Delete unofficial examples for deprecated backends and offline deployment docs
Streamline core infrastructure:
- Consolidate storage layer to PostgreSQL-only implementation
- Add full-text search caching with FTS cache module
- Implement metrics collection and monitoring pipeline
- Add explain and metrics API routes
Modernize frontend and tooling:
- Switch web UI to Bun with bun.lock, remove npm and pnpm lockfiles
- Update Dockerfile for PostgreSQL-only deployment
- Add Makefile for common development tasks
- Update environment and configuration examples
Enhance evaluation and testing capabilities:
- Add prompt optimization with DSPy and auto-tuning
- Implement ground truth regeneration and variant testing
- Add prompt debugging and response comparison utilities
- Expand test coverage with new integration scenarios
Simplify dependencies and configuration:
- Remove offline-specific requirement files
- Update pyproject.toml with streamlined dependencies
- Add Python version pinning with .python-version
- Create project guidelines in CLAUDE.md and AGENTS.md
2025-12-12 16:28:49 +01:00

213 lines
7.9 KiB
Python

"""
Metrics routes for operational monitoring.
This module provides endpoints for retrieving LightRAG operational metrics
including query latency percentiles, cache hit rates, and API call counts.
Endpoints:
- GET /metrics: Aggregated operational metrics
- GET /metrics/queries: Recent query details
"""
from typing import Any, ClassVar
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel, Field
from lightrag.api.utils_api import get_combined_auth_dependency
from lightrag.cache.fts_cache import get_fts_cache_stats
from lightrag.metrics import get_metrics_collector
from lightrag.utils import logger, statistic_data
class LatencyPercentiles(BaseModel):
"""Query latency percentiles in milliseconds."""
p50: float = Field(description='50th percentile (median) latency')
p95: float = Field(description='95th percentile latency')
p99: float = Field(description='99th percentile latency')
avg: float = Field(description='Average latency')
min: float = Field(description='Minimum latency')
max: float = Field(description='Maximum latency')
class CacheStats(BaseModel):
"""LLM cache statistics."""
total_llm_calls: int = Field(description='Total LLM API calls')
total_cache_hits: int = Field(description='Total cache hits')
hit_rate: float = Field(description='Cache hit rate (0.0 to 1.0)')
class EmbedStats(BaseModel):
"""Embedding statistics."""
total_calls: int = Field(description='Total embedding API calls')
class FTSCacheStats(BaseModel):
"""Full-text search cache statistics."""
enabled: bool = Field(description='Whether FTS caching is enabled')
total_entries: int = Field(description='Total entries in cache')
valid_entries: int = Field(description='Non-expired entries')
max_size: int = Field(description='Maximum cache size')
ttl_seconds: int = Field(description='Cache TTL in seconds')
redis_enabled: bool = Field(description='Whether Redis caching is enabled')
class MetricsResponse(BaseModel):
"""Response model for metrics endpoint."""
uptime_seconds: float = Field(description='Server uptime in seconds')
total_queries: int = Field(description='Total queries processed')
queries_in_window: int = Field(description='Queries in the time window')
window_seconds: float = Field(description='Time window for percentile calculations')
latency_percentiles: LatencyPercentiles | None = Field(
default=None, description='Query latency percentiles (null if no queries in window)'
)
cache_stats: CacheStats
embed_stats: EmbedStats
fts_cache_stats: FTSCacheStats
mode_distribution: dict[str, int] = Field(description='Query count by mode in the time window')
legacy_stats: dict[str, int] = Field(description='Legacy statistics from utils.statistic_data')
class Config:
json_schema_extra: ClassVar[dict[str, Any]] = {
'example': {
'uptime_seconds': 3600.5,
'total_queries': 150,
'queries_in_window': 45,
'window_seconds': 3600,
'latency_percentiles': {
'p50': 234.5,
'p95': 890.2,
'p99': 1200.0,
'avg': 345.6,
'min': 100.0,
'max': 2500.0,
},
'cache_stats': {
'total_llm_calls': 100,
'total_cache_hits': 50,
'hit_rate': 0.33,
},
'embed_stats': {'total_calls': 200},
'fts_cache_stats': {
'enabled': True,
'total_entries': 150,
'valid_entries': 140,
'max_size': 5000,
'ttl_seconds': 300,
'redis_enabled': False,
},
'mode_distribution': {'mix': 30, 'local': 10, 'global': 5},
'legacy_stats': {'llm_call': 100, 'llm_cache': 50, 'embed_call': 200},
}
}
class QueryMetricItem(BaseModel):
"""A single query metric entry."""
timestamp: float = Field(description='Unix timestamp of query')
duration_ms: float = Field(description='Query duration in milliseconds')
mode: str = Field(description='Query mode used')
cache_hit: bool = Field(description='Whether LLM cache was hit')
entities_count: int = Field(description='Number of entities retrieved')
relations_count: int = Field(description='Number of relations retrieved')
chunks_count: int = Field(description='Number of chunks retrieved')
tokens_used: int = Field(description='Total tokens used')
class RecentQueriesResponse(BaseModel):
"""Response model for recent queries endpoint."""
queries: list[QueryMetricItem] = Field(description='Recent query metrics')
count: int = Field(description='Number of queries returned')
def create_metrics_routes(api_key: str | None = None) -> APIRouter:
"""Create metrics routes for operational monitoring.
Args:
api_key: Optional API key for authentication
Returns:
FastAPI router with metrics endpoints
"""
router = APIRouter(tags=['metrics'])
combined_auth = get_combined_auth_dependency(api_key)
@router.get(
'/metrics',
response_model=MetricsResponse,
dependencies=[Depends(combined_auth)],
summary='Get operational metrics',
description=(
'Returns lightweight operational metrics including latency percentiles, '
'cache hit rates, and API call counts. Metrics are computed on-demand '
'from a circular buffer of recent queries.'
),
)
async def get_metrics(
window: float = Query(
default=3600.0,
ge=60.0,
le=86400.0,
description='Time window in seconds for percentile calculations (60s to 24h)',
),
) -> MetricsResponse:
"""Get operational metrics."""
try:
collector = await get_metrics_collector()
stats = collector.compute_stats(window_seconds=window)
fts_stats = get_fts_cache_stats()
return MetricsResponse(
uptime_seconds=stats['uptime_seconds'],
total_queries=stats['total_queries'],
queries_in_window=stats['queries_in_window'],
window_seconds=stats['window_seconds'],
latency_percentiles=(
LatencyPercentiles(**stats['latency_percentiles']) if stats['latency_percentiles'] else None
),
cache_stats=CacheStats(**stats['cache_stats']),
embed_stats=EmbedStats(**stats['embed_stats']),
fts_cache_stats=FTSCacheStats(**fts_stats),
mode_distribution=stats['mode_distribution'],
legacy_stats=statistic_data.copy(),
)
except Exception as e:
logger.error(f'Error getting metrics: {e}', exc_info=True)
raise
@router.get(
'/metrics/queries',
response_model=RecentQueriesResponse,
dependencies=[Depends(combined_auth)],
summary='Get recent query metrics',
description='Returns detailed metrics for recent queries.',
)
async def get_recent_queries(
limit: int = Query(
default=10,
ge=1,
le=100,
description='Maximum number of recent queries to return',
),
) -> RecentQueriesResponse:
"""Get recent query metrics."""
try:
collector = await get_metrics_collector()
queries = collector.get_recent_queries(limit=limit)
return RecentQueriesResponse(
queries=[QueryMetricItem(**q) for q in queries],
count=len(queries),
)
except Exception as e:
logger.error(f'Error getting recent queries: {e}', exc_info=True)
raise
return router