Add comprehensive E2E testing infrastructure with PostgreSQL performance tuning, Gunicorn multi-worker support, and evaluation scripts for RAGAS-based quality assessment. Introduces 4 new evaluation utilities: compare_results.py for A/B test analysis, download_wikipedia.py for reproducible test datasets, e2e_test_harness.py for automated evaluation pipelines, and ingest_test_docs.py for batch document ingestion. Updates docker-compose.test.yml with aggressive async settings, memory limits, and optimized chunking parameters. Parallelize entity summarization in operate.py for improved extraction performance. Fix typos in merge node/edge logs.
43 lines
1.4 KiB
Bash
Executable file
43 lines
1.4 KiB
Bash
Executable file
#!/bin/bash
|
|
# Quick script to populate LightRAG with diverse test documents
|
|
#
|
|
# This downloads Wikipedia articles across 4 domains (Medical, Finance, Climate, Sports)
|
|
# and ingests them into LightRAG. The articles are chosen to have entity overlap
|
|
# (WHO, Carbon/Emissions, Organizations) to test entity merging and summarization.
|
|
#
|
|
# Usage:
|
|
# ./lightrag/evaluation/populate_test_data.sh
|
|
# LIGHTRAG_API_URL=http://localhost:9622 ./lightrag/evaluation/populate_test_data.sh
|
|
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
RAG_URL="${LIGHTRAG_API_URL:-http://localhost:9622}"
|
|
|
|
echo "=== LightRAG Test Data Population ==="
|
|
echo "RAG URL: $RAG_URL"
|
|
echo ""
|
|
|
|
# Check if LightRAG is running
|
|
if ! curl -s "$RAG_URL/health" > /dev/null 2>&1; then
|
|
echo "✗ Cannot connect to LightRAG at $RAG_URL"
|
|
echo " Make sure LightRAG is running first"
|
|
exit 1
|
|
fi
|
|
|
|
# 1. Download Wikipedia articles
|
|
echo "[1/2] Downloading Wikipedia articles..."
|
|
python3 "$SCRIPT_DIR/download_wikipedia.py"
|
|
|
|
# 2. Ingest into LightRAG
|
|
echo ""
|
|
echo "[2/2] Ingesting documents..."
|
|
python3 "$SCRIPT_DIR/ingest_test_docs.py" --rag-url "$RAG_URL"
|
|
|
|
echo ""
|
|
echo "=== Done! ==="
|
|
echo "Documents ingested into LightRAG."
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " - Check graph stats: curl $RAG_URL/graph/statistics"
|
|
echo " - Query the data: curl '$RAG_URL/query?mode=global&query=What+is+climate+change'"
|