366 lines
14 KiB
Python
366 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Integration test script for LightRAG with production setup.
|
|
|
|
This script tests:
|
|
- Document indexing with C++ code repository
|
|
- Query operations (naive, local, global, hybrid)
|
|
- API endpoints (insert, query, graph retrieval)
|
|
- Integration with Redis, Neo4j, and Milvus storage backends
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
import logging
|
|
from pathlib import Path
|
|
import httpx
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class IntegrationTestRunner:
|
|
"""Integration test runner for LightRAG."""
|
|
|
|
def __init__(self, base_url: str = "http://localhost:9621"):
|
|
self.base_url = base_url
|
|
self.client = httpx.AsyncClient(timeout=120.0)
|
|
self.test_results = []
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
await self.client.aclose()
|
|
|
|
def log_result(self, test_name: str, passed: bool, message: str = ""):
|
|
"""Log test result."""
|
|
status = "✅ PASS" if passed else "❌ FAIL"
|
|
logger.info(f"{status} - {test_name}: {message}")
|
|
self.test_results.append(
|
|
{"test": test_name, "passed": passed, "message": message}
|
|
)
|
|
|
|
async def wait_for_server(self, max_retries: int = 30, retry_delay: int = 2):
|
|
"""Wait for LightRAG server to be ready."""
|
|
logger.info("Waiting for LightRAG server to be ready...")
|
|
|
|
for i in range(max_retries):
|
|
try:
|
|
response = await self.client.get(f"{self.base_url}/health")
|
|
if response.status_code == 200:
|
|
logger.info("✅ LightRAG server is ready!")
|
|
return True
|
|
except Exception as e:
|
|
logger.debug(f"Attempt {i+1}/{max_retries}: Server not ready yet - {e}")
|
|
|
|
await asyncio.sleep(retry_delay)
|
|
|
|
logger.error("❌ Server failed to become ready in time")
|
|
return False
|
|
|
|
async def test_health_endpoint(self):
|
|
"""Test health check endpoint."""
|
|
test_name = "Health Check"
|
|
try:
|
|
response = await self.client.get(f"{self.base_url}/health")
|
|
passed = response.status_code == 200
|
|
self.log_result(test_name, passed, f"Status: {response.status_code}")
|
|
return passed
|
|
except Exception as e:
|
|
self.log_result(test_name, False, f"Error: {e}")
|
|
return False
|
|
|
|
async def test_insert_text(self, text: str, description: str = ""):
|
|
"""Test document insertion via API."""
|
|
test_name = f"Insert Document{' - ' + description if description else ''}"
|
|
try:
|
|
response = await self.client.post(
|
|
f"{self.base_url}/documents/text",
|
|
json={"text": text, "description": description},
|
|
)
|
|
passed = response.status_code == 200
|
|
self.log_result(test_name, passed, f"Status: {response.status_code}")
|
|
return passed
|
|
except Exception as e:
|
|
self.log_result(test_name, False, f"Error: {e}")
|
|
return False
|
|
|
|
async def test_insert_file(self, file_path: Path, retry_count: int = 2):
|
|
"""Test file insertion via API with retry logic and fallback to text endpoint."""
|
|
test_name = f"Insert File - {file_path.name}"
|
|
|
|
# Check if this is a header file that should use text endpoint
|
|
use_text_endpoint = file_path.suffix in [".h", ".hpp", ".hh"]
|
|
|
|
for attempt in range(retry_count + 1):
|
|
try:
|
|
if use_text_endpoint:
|
|
# Use text insertion endpoint for header files
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
response = await self.client.post(
|
|
f"{self.base_url}/documents/text",
|
|
json={"text": content, "file_source": file_path.name},
|
|
)
|
|
else:
|
|
# Use file upload endpoint for other files
|
|
with open(file_path, "rb") as f:
|
|
files = {"file": (file_path.name, f, "text/plain")}
|
|
response = await self.client.post(
|
|
f"{self.base_url}/documents/upload", files=files
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
self.log_result(test_name, True, f"Status: {response.status_code}")
|
|
return True
|
|
elif response.status_code == 400:
|
|
# Check if it's unsupported file type error
|
|
try:
|
|
error_detail = response.json()
|
|
error_msg = error_detail.get("detail", "")
|
|
if (
|
|
"Unsupported file type" in error_msg
|
|
and not use_text_endpoint
|
|
):
|
|
# Fallback to text endpoint
|
|
logger.info(
|
|
f"File type not supported for upload, trying text endpoint for {file_path.name}"
|
|
)
|
|
use_text_endpoint = True
|
|
continue
|
|
except (json.JSONDecodeError, ValueError, KeyError):
|
|
pass
|
|
|
|
self.log_result(test_name, False, f"Status: {response.status_code}")
|
|
return False
|
|
elif response.status_code == 500:
|
|
# Try to get error details
|
|
try:
|
|
error_detail = response.json()
|
|
error_msg = error_detail.get("detail", "Unknown error")
|
|
except (json.JSONDecodeError, ValueError, KeyError):
|
|
error_msg = (
|
|
response.text[:200] if response.text else "No error details"
|
|
)
|
|
|
|
if attempt < retry_count:
|
|
logger.warning(
|
|
f"Attempt {attempt + 1} failed for {file_path.name}: {error_msg}. Retrying..."
|
|
)
|
|
await asyncio.sleep(2) # Wait before retry
|
|
continue
|
|
else:
|
|
self.log_result(
|
|
test_name,
|
|
False,
|
|
f"Status: {response.status_code}, Error: {error_msg}",
|
|
)
|
|
return False
|
|
else:
|
|
self.log_result(test_name, False, f"Status: {response.status_code}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
if attempt < retry_count:
|
|
logger.warning(
|
|
f"Attempt {attempt + 1} exception for {file_path.name}: {e}. Retrying..."
|
|
)
|
|
await asyncio.sleep(2)
|
|
continue
|
|
else:
|
|
self.log_result(test_name, False, f"Error: {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
async def test_query(self, query: str, mode: str = "hybrid"):
|
|
"""Test query endpoint."""
|
|
test_name = f"Query ({mode} mode)"
|
|
try:
|
|
response = await self.client.post(
|
|
f"{self.base_url}/query",
|
|
json={"query": query, "mode": mode, "stream": False},
|
|
)
|
|
passed = response.status_code == 200
|
|
|
|
if passed:
|
|
result = response.json()
|
|
response_text = result.get("response", "")
|
|
logger.info(f"Query response preview: {response_text[:200]}...")
|
|
|
|
self.log_result(test_name, passed, f"Status: {response.status_code}")
|
|
return passed
|
|
except Exception as e:
|
|
self.log_result(test_name, False, f"Error: {e}")
|
|
return False
|
|
|
|
async def test_query_with_data(self, query: str, mode: str = "hybrid"):
|
|
"""Test query/data endpoint that returns structured data."""
|
|
test_name = f"Query Data ({mode} mode)"
|
|
try:
|
|
response = await self.client.post(
|
|
f"{self.base_url}/query/data",
|
|
json={"query": query, "mode": mode, "top_k": 10},
|
|
)
|
|
passed = response.status_code == 200
|
|
|
|
if passed:
|
|
result = response.json()
|
|
# Validate response structure
|
|
has_data = "data" in result
|
|
has_metadata = "metadata" in result
|
|
if not (has_data and has_metadata):
|
|
passed = False
|
|
self.log_result(
|
|
test_name, passed, "Missing required fields in response"
|
|
)
|
|
else:
|
|
data = result.get("data", {})
|
|
entities_count = len(data.get("entities", []))
|
|
relations_count = len(data.get("relationships", []))
|
|
chunks_count = len(data.get("chunks", []))
|
|
logger.info(
|
|
f"Retrieved: {entities_count} entities, {relations_count} relations, {chunks_count} chunks"
|
|
)
|
|
self.log_result(
|
|
test_name, passed, f"Status: {response.status_code}"
|
|
)
|
|
else:
|
|
self.log_result(test_name, passed, f"Status: {response.status_code}")
|
|
|
|
return passed
|
|
except Exception as e:
|
|
self.log_result(test_name, False, f"Error: {e}")
|
|
return False
|
|
|
|
async def test_graph_data(self):
|
|
"""Test graph data retrieval endpoint."""
|
|
test_name = "Graph Data Retrieval"
|
|
try:
|
|
response = await self.client.get(f"{self.base_url}/graph/label/list")
|
|
passed = response.status_code == 200
|
|
|
|
if passed:
|
|
result = response.json()
|
|
# Result is a list of labels
|
|
if isinstance(result, list):
|
|
logger.info(f"Graph contains {len(result)} unique labels")
|
|
else:
|
|
logger.info(f"Graph data: {result}")
|
|
|
|
self.log_result(test_name, passed, f"Status: {response.status_code}")
|
|
return passed
|
|
except Exception as e:
|
|
self.log_result(test_name, False, f"Error: {e}")
|
|
return False
|
|
|
|
async def run_all_tests(self, cpp_repo_path: Path):
|
|
"""Run all integration tests."""
|
|
logger.info("=" * 80)
|
|
logger.info("Starting LightRAG Integration Tests")
|
|
logger.info("=" * 80)
|
|
|
|
# Wait for server to be ready
|
|
if not await self.wait_for_server():
|
|
logger.error("Server not ready. Aborting tests.")
|
|
return False
|
|
|
|
# Test 1: Health check
|
|
await self.test_health_endpoint()
|
|
|
|
# Test 2: Index C++ files
|
|
logger.info("\n--- Testing Document Indexing ---")
|
|
cpp_files = list(cpp_repo_path.glob("**/*.cpp")) + list(
|
|
cpp_repo_path.glob("**/*.h")
|
|
)
|
|
for cpp_file in cpp_files:
|
|
if cpp_file.is_file():
|
|
await self.test_insert_file(cpp_file)
|
|
await asyncio.sleep(
|
|
0.5
|
|
) # Small delay between uploads to avoid overwhelming server
|
|
|
|
# Also insert the README
|
|
readme_file = cpp_repo_path / "README.md"
|
|
if readme_file.exists():
|
|
await self.test_insert_file(readme_file)
|
|
|
|
# Wait a bit for indexing to complete
|
|
logger.info("Waiting for indexing to complete...")
|
|
await asyncio.sleep(5)
|
|
|
|
# Test 3: Query operations
|
|
logger.info("\n--- Testing Query Operations ---")
|
|
test_queries = [
|
|
("What is the Calculator class?", "hybrid"),
|
|
("Describe the main function", "local"),
|
|
("What mathematical operations are supported?", "global"),
|
|
("How does the power function work?", "naive"),
|
|
]
|
|
|
|
for query, mode in test_queries:
|
|
await self.test_query(query, mode)
|
|
await asyncio.sleep(1) # Brief delay between queries
|
|
|
|
# Test 4: Query with structured data
|
|
logger.info("\n--- Testing Query Data Endpoint ---")
|
|
await self.test_query_with_data(
|
|
"What classes are defined in the code?", "hybrid"
|
|
)
|
|
await self.test_query_with_data("List all functions", "local")
|
|
|
|
# Test 5: Graph data retrieval
|
|
logger.info("\n--- Testing Graph Retrieval ---")
|
|
await self.test_graph_data()
|
|
|
|
# Print summary
|
|
logger.info("\n" + "=" * 80)
|
|
logger.info("Test Summary")
|
|
logger.info("=" * 80)
|
|
|
|
total = len(self.test_results)
|
|
passed = sum(1 for r in self.test_results if r["passed"])
|
|
failed = total - passed
|
|
|
|
logger.info(f"Total Tests: {total}")
|
|
logger.info(f"Passed: {passed} ✅")
|
|
logger.info(f"Failed: {failed} ❌")
|
|
|
|
if failed > 0:
|
|
logger.info("\nFailed Tests:")
|
|
for result in self.test_results:
|
|
if not result["passed"]:
|
|
logger.info(f" - {result['test']}: {result['message']}")
|
|
|
|
return failed == 0
|
|
|
|
|
|
async def main():
|
|
"""Main test execution."""
|
|
# Get test repository path
|
|
script_dir = Path(__file__).parent
|
|
cpp_repo_path = script_dir / "sample_cpp_repo"
|
|
|
|
if not cpp_repo_path.exists():
|
|
logger.error(f"Sample C++ repository not found at {cpp_repo_path}")
|
|
return 1
|
|
|
|
# Get server URL from environment or use default
|
|
base_url = os.getenv("LIGHTRAG_API_URL", "http://localhost:9621")
|
|
|
|
# Run tests
|
|
async with IntegrationTestRunner(base_url) as runner:
|
|
success = await runner.run_all_tests(cpp_repo_path)
|
|
return 0 if success else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main())
|
|
sys.exit(exit_code)
|