LightRAG/lightrag/tools/download_cache.py
clssck dd1413f3eb test(lightrag,examples): add prompt accuracy and quality tests
Add comprehensive test suites for prompt evaluation:
- test_prompt_accuracy.py: 365 lines testing prompt extraction accuracy
- test_prompt_quality_deep.py: 672 lines for deep quality analysis
- Refactor prompt.py to consolidate optimized variants (removed prompt_optimized.py)
- Apply ruff formatting and type hints across 30 files
- Update pyrightconfig.json for static type checking
- Modernize reproduce scripts and examples with improved type annotations
- Sync uv.lock dependencies
2025-12-05 16:39:52 +01:00

171 lines
5.5 KiB
Python

"""
Download all necessary cache files for offline deployment.
This module provides a CLI command to download tiktoken model cache files
for offline environments where internet access is not available.
"""
import os
import sys
from pathlib import Path
def download_tiktoken_cache(cache_dir: str | None = None, models: list[str] | None = None):
"""Download tiktoken models to local cache
Args:
cache_dir: Directory to store the cache files. If None, uses default location.
models: List of model names to download. If None, downloads common models.
Returns:
Tuple of (success_count, failed_models)
"""
try:
import tiktoken
except ImportError:
print('Error: tiktoken is not installed.')
print('Install with: pip install tiktoken')
sys.exit(1)
# Set cache directory if provided
if cache_dir:
cache_dir = os.path.abspath(cache_dir)
os.environ['TIKTOKEN_CACHE_DIR'] = cache_dir
cache_path = Path(cache_dir)
cache_path.mkdir(parents=True, exist_ok=True)
print(f'Using cache directory: {cache_dir}')
else:
cache_dir = os.environ.get('TIKTOKEN_CACHE_DIR', str(Path.home() / '.tiktoken_cache'))
print(f'Using default cache directory: {cache_dir}')
# Common models used by LightRAG and OpenAI
if models is None:
models = [
'gpt-4o-mini', # Default model for LightRAG
'gpt-4o', # GPT-4 Omni
'gpt-4', # GPT-4
'gpt-3.5-turbo', # GPT-3.5 Turbo
'text-embedding-ada-002', # Legacy embedding model
'text-embedding-3-small', # Small embedding model
'text-embedding-3-large', # Large embedding model
]
print(f'\nDownloading {len(models)} tiktoken models...')
print('=' * 70)
success_count = 0
failed_models = []
for i, model in enumerate(models, 1):
try:
print(f'[{i}/{len(models)}] Downloading {model}...', end=' ', flush=True)
encoding = tiktoken.encoding_for_model(model)
# Trigger download by encoding a test string
encoding.encode('test')
print('✓ Done')
success_count += 1
except KeyError as e:
print(f"✗ Failed: Unknown model '{model}'")
failed_models.append((model, str(e)))
except Exception as e:
print(f'✗ Failed: {e}')
failed_models.append((model, str(e)))
print('=' * 70)
print(f'\n✓ Successfully cached {success_count}/{len(models)} models')
if failed_models:
print(f'\n✗ Failed to download {len(failed_models)} models:')
for model, error in failed_models:
print(f' - {model}: {error}')
print(f'\nCache location: {cache_dir}')
print('\nFor offline deployment:')
print(' 1. Copy directory to offline server:')
print(f' tar -czf tiktoken_cache.tar.gz {cache_dir}')
print(' scp tiktoken_cache.tar.gz user@offline-server:/path/to/')
print('')
print(' 2. On offline server, extract and set environment variable:')
print(' tar -xzf tiktoken_cache.tar.gz')
print(' export TIKTOKEN_CACHE_DIR=/path/to/tiktoken_cache')
print('')
print(' 3. Or copy to default location:')
print(f' cp -r {cache_dir} ~/.tiktoken_cache/')
return success_count, failed_models
def main():
"""Main entry point for the CLI command"""
import argparse
parser = argparse.ArgumentParser(
prog='lightrag-download-cache',
description='Download cache files for LightRAG offline deployment',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Download to default location (~/.tiktoken_cache)
lightrag-download-cache
# Download to specific directory
lightrag-download-cache --cache-dir ./offline_cache/tiktoken
# Download specific models only
lightrag-download-cache --models gpt-4o-mini gpt-4
For more information, visit: https://github.com/HKUDS/LightRAG
""",
)
parser.add_argument(
'--cache-dir',
help='Cache directory path (default: ~/.tiktoken_cache)',
default=None,
)
parser.add_argument(
'--models',
nargs='+',
help='Specific models to download (default: common models)',
default=None,
)
parser.add_argument('--version', action='version', version='%(prog)s (LightRAG cache downloader)')
args = parser.parse_args()
print('=' * 70)
print('LightRAG Offline Cache Downloader')
print('=' * 70)
try:
success_count, failed_models = download_tiktoken_cache(args.cache_dir, args.models)
print('\n' + '=' * 70)
print('Download Complete')
print('=' * 70)
# Exit with error code if all downloads failed
if success_count == 0:
print('\n✗ All downloads failed. Please check your internet connection.')
sys.exit(1)
# Exit with warning code if some downloads failed
elif failed_models:
print(f'\n⚠ Some downloads failed ({len(failed_models)}/{success_count + len(failed_models)})')
sys.exit(2)
else:
print('\n✓ All cache files downloaded successfully!')
sys.exit(0)
except KeyboardInterrupt:
print('\n\n✗ Download interrupted by user')
sys.exit(130)
except Exception as e:
print(f'\n\n✗ Error: {e}')
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == '__main__':
main()