Add comprehensive test suites for prompt evaluation: - test_prompt_accuracy.py: 365 lines testing prompt extraction accuracy - test_prompt_quality_deep.py: 672 lines for deep quality analysis - Refactor prompt.py to consolidate optimized variants (removed prompt_optimized.py) - Apply ruff formatting and type hints across 30 files - Update pyrightconfig.json for static type checking - Modernize reproduce scripts and examples with improved type annotations - Sync uv.lock dependencies
171 lines
5.5 KiB
Python
171 lines
5.5 KiB
Python
"""
|
|
Download all necessary cache files for offline deployment.
|
|
|
|
This module provides a CLI command to download tiktoken model cache files
|
|
for offline environments where internet access is not available.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def download_tiktoken_cache(cache_dir: str | None = None, models: list[str] | None = None):
|
|
"""Download tiktoken models to local cache
|
|
|
|
Args:
|
|
cache_dir: Directory to store the cache files. If None, uses default location.
|
|
models: List of model names to download. If None, downloads common models.
|
|
|
|
Returns:
|
|
Tuple of (success_count, failed_models)
|
|
"""
|
|
try:
|
|
import tiktoken
|
|
except ImportError:
|
|
print('Error: tiktoken is not installed.')
|
|
print('Install with: pip install tiktoken')
|
|
sys.exit(1)
|
|
|
|
# Set cache directory if provided
|
|
if cache_dir:
|
|
cache_dir = os.path.abspath(cache_dir)
|
|
os.environ['TIKTOKEN_CACHE_DIR'] = cache_dir
|
|
cache_path = Path(cache_dir)
|
|
cache_path.mkdir(parents=True, exist_ok=True)
|
|
print(f'Using cache directory: {cache_dir}')
|
|
else:
|
|
cache_dir = os.environ.get('TIKTOKEN_CACHE_DIR', str(Path.home() / '.tiktoken_cache'))
|
|
print(f'Using default cache directory: {cache_dir}')
|
|
|
|
# Common models used by LightRAG and OpenAI
|
|
if models is None:
|
|
models = [
|
|
'gpt-4o-mini', # Default model for LightRAG
|
|
'gpt-4o', # GPT-4 Omni
|
|
'gpt-4', # GPT-4
|
|
'gpt-3.5-turbo', # GPT-3.5 Turbo
|
|
'text-embedding-ada-002', # Legacy embedding model
|
|
'text-embedding-3-small', # Small embedding model
|
|
'text-embedding-3-large', # Large embedding model
|
|
]
|
|
|
|
print(f'\nDownloading {len(models)} tiktoken models...')
|
|
print('=' * 70)
|
|
|
|
success_count = 0
|
|
failed_models = []
|
|
|
|
for i, model in enumerate(models, 1):
|
|
try:
|
|
print(f'[{i}/{len(models)}] Downloading {model}...', end=' ', flush=True)
|
|
encoding = tiktoken.encoding_for_model(model)
|
|
# Trigger download by encoding a test string
|
|
encoding.encode('test')
|
|
print('✓ Done')
|
|
success_count += 1
|
|
except KeyError as e:
|
|
print(f"✗ Failed: Unknown model '{model}'")
|
|
failed_models.append((model, str(e)))
|
|
except Exception as e:
|
|
print(f'✗ Failed: {e}')
|
|
failed_models.append((model, str(e)))
|
|
|
|
print('=' * 70)
|
|
print(f'\n✓ Successfully cached {success_count}/{len(models)} models')
|
|
|
|
if failed_models:
|
|
print(f'\n✗ Failed to download {len(failed_models)} models:')
|
|
for model, error in failed_models:
|
|
print(f' - {model}: {error}')
|
|
|
|
print(f'\nCache location: {cache_dir}')
|
|
print('\nFor offline deployment:')
|
|
print(' 1. Copy directory to offline server:')
|
|
print(f' tar -czf tiktoken_cache.tar.gz {cache_dir}')
|
|
print(' scp tiktoken_cache.tar.gz user@offline-server:/path/to/')
|
|
print('')
|
|
print(' 2. On offline server, extract and set environment variable:')
|
|
print(' tar -xzf tiktoken_cache.tar.gz')
|
|
print(' export TIKTOKEN_CACHE_DIR=/path/to/tiktoken_cache')
|
|
print('')
|
|
print(' 3. Or copy to default location:')
|
|
print(f' cp -r {cache_dir} ~/.tiktoken_cache/')
|
|
|
|
return success_count, failed_models
|
|
|
|
|
|
def main():
|
|
"""Main entry point for the CLI command"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
prog='lightrag-download-cache',
|
|
description='Download cache files for LightRAG offline deployment',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Download to default location (~/.tiktoken_cache)
|
|
lightrag-download-cache
|
|
|
|
# Download to specific directory
|
|
lightrag-download-cache --cache-dir ./offline_cache/tiktoken
|
|
|
|
# Download specific models only
|
|
lightrag-download-cache --models gpt-4o-mini gpt-4
|
|
|
|
For more information, visit: https://github.com/HKUDS/LightRAG
|
|
""",
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--cache-dir',
|
|
help='Cache directory path (default: ~/.tiktoken_cache)',
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
'--models',
|
|
nargs='+',
|
|
help='Specific models to download (default: common models)',
|
|
default=None,
|
|
)
|
|
parser.add_argument('--version', action='version', version='%(prog)s (LightRAG cache downloader)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
print('=' * 70)
|
|
print('LightRAG Offline Cache Downloader')
|
|
print('=' * 70)
|
|
|
|
try:
|
|
success_count, failed_models = download_tiktoken_cache(args.cache_dir, args.models)
|
|
|
|
print('\n' + '=' * 70)
|
|
print('Download Complete')
|
|
print('=' * 70)
|
|
|
|
# Exit with error code if all downloads failed
|
|
if success_count == 0:
|
|
print('\n✗ All downloads failed. Please check your internet connection.')
|
|
sys.exit(1)
|
|
# Exit with warning code if some downloads failed
|
|
elif failed_models:
|
|
print(f'\n⚠ Some downloads failed ({len(failed_models)}/{success_count + len(failed_models)})')
|
|
sys.exit(2)
|
|
else:
|
|
print('\n✓ All cache files downloaded successfully!')
|
|
sys.exit(0)
|
|
|
|
except KeyboardInterrupt:
|
|
print('\n\n✗ Download interrupted by user')
|
|
sys.exit(130)
|
|
except Exception as e:
|
|
print(f'\n\n✗ Error: {e}')
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|