LightRAG/lightrag/tools/clean_llm_query_cache.py

#!/usr/bin/env python3
"""
LLM Query Cache Cleanup Tool for LightRAG

This tool cleans up LLM query cache (mix:*, hybrid:*, local:*, global:*)
from PGKVStorage while preserving workspace isolation.

Usage:
    python -m lightrag.tools.clean_llm_query_cache
    # or
    python lightrag/tools/clean_llm_query_cache.py

Supported KV Storage Types:
    - PGKVStorage
"""

import asyncio
import contextlib
import os
import sys
import time
from dataclasses import dataclass, field
from typing import Any, cast

from dotenv import load_dotenv

# Add project root to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

from lightrag.kg import STORAGE_ENV_REQUIREMENTS
from lightrag.namespace import NameSpace
from lightrag.utils import setup_logger

# Load environment variables
load_dotenv(dotenv_path='.env', override=False)

# Setup logger
setup_logger('lightrag', level='INFO')

# Storage type configurations
STORAGE_TYPES = {
    '1': 'PGKVStorage',
}

# Workspace environment variable mapping
WORKSPACE_ENV_MAP = {
    'PGKVStorage': 'POSTGRES_WORKSPACE',
}

# Query cache modes
QUERY_MODES = ['mix', 'hybrid', 'local', 'global']

# Query cache types
CACHE_TYPES = ['query', 'keywords']

# Default batch size for deletion
DEFAULT_BATCH_SIZE = 1000

# ANSI color codes for terminal output
BOLD_CYAN = '\033[1;36m'
BOLD_RED = '\033[1;31m'
BOLD_GREEN = '\033[1;32m'
RESET = '\033[0m'


@dataclass
class CleanupStats:
    """Cleanup statistics and error tracking"""

    # Count by mode and cache_type before cleanup
    counts_before: dict[str, dict[str, int]] = field(default_factory=dict)

    # Deletion statistics
    total_to_delete: int = 0
    total_batches: int = 0
    successful_batches: int = 0
    failed_batches: int = 0
    successfully_deleted: int = 0
    failed_to_delete: int = 0

    # Count by mode and cache_type after cleanup
    counts_after: dict[str, dict[str, int]] = field(default_factory=dict)

    # Error tracking
    errors: list[dict[str, Any]] = field(default_factory=list)

    def add_error(self, batch_idx: int, error: Exception, batch_size: int):
        """Record batch error"""
        self.errors.append(
            {
                'batch': batch_idx,
                'error_type': type(error).__name__,
                'error_msg': str(error),
                'records_lost': batch_size,
                'timestamp': time.time(),
            }
        )
        self.failed_batches += 1
        self.failed_to_delete += batch_size

    def initialize_counts(self):
        """Initialize count dictionaries"""
        for mode in QUERY_MODES:
            self.counts_before[mode] = {'query': 0, 'keywords': 0}
            self.counts_after[mode] = {'query': 0, 'keywords': 0}


class CleanupTool:
    """LLM Query Cache Cleanup Tool"""

    def __init__(self):
        self.storage = None
        self.workspace = ''
        self.batch_size = DEFAULT_BATCH_SIZE

    def get_workspace_for_storage(self, storage_name: str) -> str:
        """Get workspace for a specific storage type

        Priority: Storage-specific env var > WORKSPACE env var > empty string

        Args:
            storage_name: Storage implementation name

        Returns:
            Workspace name
        """
        # Check storage-specific workspace
        if storage_name in WORKSPACE_ENV_MAP:
            specific_workspace = os.getenv(WORKSPACE_ENV_MAP[storage_name])
            if specific_workspace:
                return specific_workspace

        # Check generic WORKSPACE
        workspace = os.getenv('WORKSPACE', '')
        return workspace

    def check_config_ini_for_storage(self, storage_name: str) -> bool:
        """Check if config.ini has configuration for the storage type

        Args:
            storage_name: Storage implementation name

        Returns:
            True if config.ini has the necessary configuration
        """
        try:
            import configparser

            config = configparser.ConfigParser()
            config.read('config.ini', 'utf-8')

            if storage_name == 'PGKVStorage':
                return (
                    config.has_option('postgres', 'user')
                    and config.has_option('postgres', 'password')
                    and config.has_option('postgres', 'database')
                )

            return False
        except Exception:
            return False

    def check_env_vars(self, storage_name: str) -> bool:
        """Check environment variables, show warnings if missing but don't fail

        Args:
            storage_name: Storage implementation name

        Returns:
            Always returns True (warnings only, no hard failure)
        """
        required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])

        if not required_vars:
            print('✓ No environment variables required')
            return True

        missing_vars = [var for var in required_vars if var not in os.environ]

        if missing_vars:
            print(f'⚠️  Warning: Missing environment variables: {", ".join(missing_vars)}')

            # Check if config.ini has configuration
            has_config = self.check_config_ini_for_storage(storage_name)
            if has_config:
                print('   ✓ Found configuration in config.ini')
            else:
                print(f'   Will attempt to use defaults for {storage_name}')

            return True

        print('✓ All required environment variables are set')
        return True

    def get_storage_class(self, storage_name: str):
        """Dynamically import and return storage class

        Args:
            storage_name: Storage implementation name

        Returns:
            Storage class
        """
        if storage_name == 'PGKVStorage':
            from lightrag.kg.postgres_impl import PGKVStorage

            return PGKVStorage
        else:
            raise ValueError(f'Unsupported storage type: {storage_name}')

    async def initialize_storage(self, storage_name: str, workspace: str):
        """Initialize storage instance with fallback to config.ini and defaults

        Args:
            storage_name: Storage implementation name
            workspace: Workspace name

        Returns:
            Initialized storage instance

        Raises:
            Exception: If initialization fails
        """
        storage_class = self.get_storage_class(storage_name)

        # Create global config
        global_config = {
            'working_dir': os.getenv('WORKING_DIR', './rag_storage'),
            'embedding_batch_num': 10,
        }

        # Initialize storage
        storage = storage_class(
            namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
            workspace=workspace,
            global_config=global_config,
            embedding_func=cast(Any, None),
        )

        # Initialize the storage (may raise exception if connection fails)
        await storage.initialize()

        return storage

    async def count_query_caches_pg(self, storage) -> dict[str, dict[str, int]]:
        """Count query caches in PostgreSQL by mode and cache_type

        Args:
            storage: PGKVStorage instance

        Returns:
            Dictionary with counts for each mode and cache_type
        """
        from lightrag.kg.postgres_impl import namespace_to_table_name

        counts = {mode: {'query': 0, 'keywords': 0} for mode in QUERY_MODES}
        table_name = namespace_to_table_name(storage.namespace)

        print('Counting PostgreSQL records...', end='', flush=True)
        start_time = time.time()

        for mode in QUERY_MODES:
            for cache_type in CACHE_TYPES:
                query = f"""
                    SELECT COUNT(*) as count
                    FROM {table_name}
                    WHERE workspace = $1
                    AND id LIKE $2
                """
                pattern = f'{mode}:{cache_type}:%'
                result = await storage.db.query(query, [storage.workspace, pattern])
                counts[mode][cache_type] = result['count'] if result else 0

        elapsed = time.time() - start_time
        if elapsed > 1:
            print(f' (took {elapsed:.1f}s)', end='')
        print()  # New line

        return counts

    async def count_query_caches(self, storage, storage_name: str) -> dict[str, dict[str, int]]:
        """Count query caches from any storage type efficiently

        Args:
            storage: Storage instance
            storage_name: Storage type name

        Returns:
            Dictionary with counts for each mode and cache_type
        """
        if storage_name == 'PGKVStorage':
            return await self.count_query_caches_pg(storage)
        else:
            raise ValueError(f'Unsupported storage type: {storage_name}')

    async def delete_query_caches_pg(self, storage, cleanup_type: str, stats: CleanupStats):
        """Delete query caches from PostgreSQL

        Args:
            storage: PGKVStorage instance
            cleanup_type: 'all', 'query', or 'keywords'
            stats: CleanupStats object to track progress
        """
        from lightrag.kg.postgres_impl import namespace_to_table_name

        table_name = namespace_to_table_name(storage.namespace)

        # Build WHERE conditions
        conditions = []
        for mode in QUERY_MODES:
            if cleanup_type == 'all':
                conditions.append(f"id LIKE '{mode}:query:%'")
                conditions.append(f"id LIKE '{mode}:keywords:%'")
            elif cleanup_type == 'query':
                conditions.append(f"id LIKE '{mode}:query:%'")
            elif cleanup_type == 'keywords':
                conditions.append(f"id LIKE '{mode}:keywords:%'")

        where_clause = ' OR '.join(conditions)

        print('\n=== Starting Cleanup ===')
        print('💡 Executing PostgreSQL DELETE query\n')

        try:
            query = f"""
                DELETE FROM {table_name}
                WHERE workspace = $1
                AND ({where_clause})
            """

            start_time = time.time()
            # Fix: Pass dict instead of list for execute() method
            await storage.db.execute(query, {'workspace': storage.workspace})
            elapsed = time.time() - start_time

            # PostgreSQL returns deletion count
            stats.total_batches = 1
            stats.successful_batches = 1
            stats.successfully_deleted = stats.total_to_delete

            print(f'✓ Deleted {stats.successfully_deleted:,} records in {elapsed:.2f}s')

        except Exception as e:
            stats.add_error(1, e, stats.total_to_delete)
            print(f'✗ DELETE failed: {type(e).__name__}: {e!s}')

    async def delete_query_caches(self, storage, storage_name: str, cleanup_type: str, stats: CleanupStats):
        """Delete query caches from any storage type

        Args:
            storage: Storage instance
            storage_name: Storage type name
            cleanup_type: 'all', 'query', or 'keywords'
            stats: CleanupStats object to track progress
        """
        if storage_name == 'PGKVStorage':
            await self.delete_query_caches_pg(storage, cleanup_type, stats)
        else:
            raise ValueError(f'Unsupported storage type: {storage_name}')

    def print_header(self):
        """Print tool header"""
        print('\n' + '=' * 60)
        print('LLM Query Cache Cleanup Tool - LightRAG')
        print('=' * 60)

    def print_storage_types(self):
        """Print available storage types"""
        print('\nSupported KV Storage Types:')
        for key, value in STORAGE_TYPES.items():
            print(f'[{key}] {value}')

    def format_workspace(self, workspace: str) -> str:
        """Format workspace name with highlighting

        Args:
            workspace: Workspace name (may be empty)

        Returns:
            Formatted workspace string with ANSI color codes
        """
        if workspace:
            return f'{BOLD_CYAN}{workspace}{RESET}'
        else:
            return f'{BOLD_CYAN}(default){RESET}'

    def print_cache_statistics(self, counts: dict[str, dict[str, int]], title: str):
        """Print cache statistics in a formatted table

        Args:
            counts: Dictionary with counts for each mode and cache_type
            title: Title for the statistics display
        """
        print(f'\n{title}')
        print('┌' + '─' * 12 + '┬' + '─' * 12 + '┬' + '─' * 12 + '┬' + '─' * 12 + '┐')
        print(f'│ {"Mode":<10} │ {"Query":>10} │ {"Keywords":>10} │ {"Total":>10} │')
        print('├' + '─' * 12 + '┼' + '─' * 12 + '┼' + '─' * 12 + '┼' + '─' * 12 + '┤')

        total_query = 0
        total_keywords = 0

        for mode in QUERY_MODES:
            query_count = counts[mode]['query']
            keywords_count = counts[mode]['keywords']
            mode_total = query_count + keywords_count

            total_query += query_count
            total_keywords += keywords_count

            print(f'│ {mode:<10} │ {query_count:>10,} │ {keywords_count:>10,} │ {mode_total:>10,} │')

        print('├' + '─' * 12 + '┼' + '─' * 12 + '┼' + '─' * 12 + '┼' + '─' * 12 + '┤')
        grand_total = total_query + total_keywords
        print(f'│ {"Total":<10} │ {total_query:>10,} │ {total_keywords:>10,} │ {grand_total:>10,} │')
        print('└' + '─' * 12 + '┴' + '─' * 12 + '┴' + '─' * 12 + '┴' + '─' * 12 + '┘')

    def calculate_total_to_delete(self, counts: dict[str, dict[str, int]], cleanup_type: str) -> int:
        """Calculate total number of records to delete

        Args:
            counts: Dictionary with counts for each mode and cache_type
            cleanup_type: 'all', 'query', or 'keywords'

        Returns:
            Total number of records to delete
        """
        total = 0
        for mode in QUERY_MODES:
            if cleanup_type == 'all':
                total += counts[mode]['query'] + counts[mode]['keywords']
            elif cleanup_type == 'query':
                total += counts[mode]['query']
            elif cleanup_type == 'keywords':
                total += counts[mode]['keywords']
        return total

    def print_cleanup_report(self, stats: CleanupStats):
        """Print comprehensive cleanup report

        Args:
            stats: CleanupStats object with cleanup results
        """
        print('\n' + '=' * 60)
        print('Cleanup Complete - Final Report')
        print('=' * 60)

        # Overall statistics
        print('\n📊 Statistics:')
        print(f'  Total records to delete:  {stats.total_to_delete:,}')
        print(f'  Total batches:            {stats.total_batches:,}')
        print(f'  Successful batches:       {stats.successful_batches:,}')
        print(f'  Failed batches:           {stats.failed_batches:,}')
        print(f'  Successfully deleted:     {stats.successfully_deleted:,}')
        print(f'  Failed to delete:         {stats.failed_to_delete:,}')

        # Success rate
        success_rate = (stats.successfully_deleted / stats.total_to_delete * 100) if stats.total_to_delete > 0 else 0
        print(f'  Success rate:             {success_rate:.2f}%')

        # Before/After comparison
        print('\n📈 Before/After Comparison:')
        total_before = sum(counts['query'] + counts['keywords'] for counts in stats.counts_before.values())
        total_after = sum(counts['query'] + counts['keywords'] for counts in stats.counts_after.values())
        print(f'  Total caches before:      {total_before:,}')
        print(f'  Total caches after:       {total_after:,}')
        print(f'  Net reduction:            {total_before - total_after:,}')

        # Error details
        if stats.errors:
            print(f'\n⚠️  Errors encountered: {len(stats.errors)}')
            print('\nError Details:')
            print('-' * 60)

            # Group errors by type
            error_types = {}
            for error in stats.errors:
                err_type = error['error_type']
                error_types[err_type] = error_types.get(err_type, 0) + 1

            print('\nError Summary:')
            for err_type, count in sorted(error_types.items(), key=lambda x: -x[1]):
                print(f'  - {err_type}: {count} occurrence(s)')

            print('\nFirst 5 errors:')
            for i, error in enumerate(stats.errors[:5], 1):
                print(f'\n  {i}. Batch {error["batch"]}')
                print(f'     Type: {error["error_type"]}')
                print(f'     Message: {error["error_msg"]}')
                print(f'     Records lost: {error["records_lost"]:,}')

            if len(stats.errors) > 5:
                print(f'\n  ... and {len(stats.errors) - 5} more errors')

            print('\n' + '=' * 60)
            print(f'{BOLD_RED}⚠️  WARNING: Cleanup completed with errors!{RESET}')
            print('   Please review the error details above.')
            print('=' * 60)
        else:
            print('\n' + '=' * 60)
            print(f'{BOLD_GREEN}✓ SUCCESS: All records cleaned up successfully!{RESET}')
            print('=' * 60)

    async def setup_storage(self) -> tuple:
        """Setup and initialize storage

        Returns:
            Tuple of (storage_instance, storage_name, workspace)
            Returns (None, None, None) if user chooses to exit
        """
        print('\n=== Storage Setup ===')
        self.print_storage_types()

        # Custom input handling with exit support
        while True:
            choice = input('\nSelect storage type (1) (Press Enter to exit): ').strip()

            # Check for exit
            if choice == '' or choice == '0':
                print('\n✓ Cleanup cancelled by user')
                return None, None, None

            # Check if choice is valid
            if choice in STORAGE_TYPES:
                break

            print(f'✗ Invalid choice. Please enter one of: {", ".join(STORAGE_TYPES.keys())}')

        storage_name = STORAGE_TYPES[choice]

        # Check configuration (warnings only, doesn't block)
        print('\nChecking configuration...')
        self.check_env_vars(storage_name)

        # Get workspace
        workspace = self.get_workspace_for_storage(storage_name)

        # Initialize storage (real validation point)
        print('\nInitializing storage...')
        try:
            storage = await self.initialize_storage(storage_name, workspace)
            print(f'- Storage Type: {storage_name}')
            print(f'- Workspace: {workspace if workspace else "(default)"}')
            print('- Connection Status: ✓ Success')

        except Exception as e:
            print(f'✗ Initialization failed: {e}')
            print(f'\nFor {storage_name}, you can configure using:')
            print('  1. Environment variables (highest priority)')

            # Show specific environment variable requirements
            if storage_name in STORAGE_ENV_REQUIREMENTS:
                for var in STORAGE_ENV_REQUIREMENTS[storage_name]:
                    print(f'     - {var}')

            print('  2. config.ini file (medium priority)')
            if storage_name == 'PGKVStorage':
                print('     [postgres]')
                print('     host = localhost')
                print('     port = 5432')
                print('     user = postgres')
                print('     password = yourpassword')
                print('     database = lightrag')

            return None, None, None

        return storage, storage_name, workspace

    async def run(self):
        """Run the cleanup tool"""
        try:
            # Initialize shared storage (REQUIRED for storage classes to work)
            from lightrag.kg.shared_storage import initialize_share_data

            initialize_share_data(workers=1)

            # Print header
            self.print_header()

            # Setup storage
            self.storage, storage_name, self.workspace = await self.setup_storage()

            # Check if user cancelled
            if self.storage is None:
                return

            # Count query caches
            print('\nCounting query cache records...')
            try:
                counts = await self.count_query_caches(self.storage, storage_name)
            except Exception as e:
                print(f'✗ Counting failed: {e}')
                await self.storage.finalize()
                return

            # Initialize stats
            stats = CleanupStats()
            stats.initialize_counts()
            stats.counts_before = counts

            # Print statistics
            self.print_cache_statistics(counts, '📊 Query Cache Statistics (Before Cleanup):')

            # Calculate total
            total_caches = sum(counts[mode]['query'] + counts[mode]['keywords'] for mode in QUERY_MODES)

            if total_caches == 0:
                print('\n⚠️  No query caches found in storage')
                await self.storage.finalize()
                return

            # Select cleanup type
            print('\n=== Cleanup Options ===')
            print('[1] Delete all query caches (both query and keywords)')
            print('[2] Delete query caches only (keep keywords)')
            print('[3] Delete keywords caches only (keep query)')
            print('[0] Cancel')

            while True:
                choice = input('\nSelect cleanup option (0-3): ').strip()

                if choice == '0' or choice == '':
                    print('\n✓ Cleanup cancelled')
                    await self.storage.finalize()
                    return
                elif choice == '1':
                    cleanup_type = 'all'
                elif choice == '2':
                    cleanup_type = 'query'
                elif choice == '3':
                    cleanup_type = 'keywords'
                else:
                    print('✗ Invalid choice. Please enter 0, 1, 2, or 3')
                    continue

                # Calculate total to delete for the selected type
                stats.total_to_delete = self.calculate_total_to_delete(counts, cleanup_type)

                # Check if there are any records to delete
                if stats.total_to_delete == 0:
                    if cleanup_type == 'all':
                        print(f'\n{BOLD_RED}⚠️  No query caches found to delete!{RESET}')
                    elif cleanup_type == 'query':
                        print(f'\n{BOLD_RED}⚠️  No query caches found to delete! (Only keywords exist){RESET}')
                    elif cleanup_type == 'keywords':
                        print(f'\n{BOLD_RED}⚠️  No keywords caches found to delete! (Only query caches exist){RESET}')
                    print('   Please select a different cleanup option.\n')
                    continue

                # Valid selection with records to delete
                break

            # Confirm deletion
            print('\n' + '=' * 60)
            print('Cleanup Confirmation')
            print('=' * 60)
            print(f'Storage: {BOLD_CYAN}{storage_name}{RESET} (workspace: {self.format_workspace(self.workspace)})')
            print(f'Cleanup Type: {BOLD_CYAN}{cleanup_type}{RESET}')
            print(f'Records to Delete: {BOLD_RED}{stats.total_to_delete:,}{RESET} / {total_caches:,}')

            if cleanup_type == 'all':
                print(f'\n{BOLD_RED}⚠️  WARNING: This will delete ALL query caches across all modes!{RESET}')
            elif cleanup_type == 'query':
                print('\n⚠️  This will delete query caches only (keywords will be kept)')
            elif cleanup_type == 'keywords':
                print('\n⚠️  This will delete keywords caches only (query will be kept)')

            confirm = input('\nContinue with deletion? (y/n): ').strip().lower()
            if confirm != 'y':
                print('\n✓ Cleanup cancelled')
                await self.storage.finalize()
                return

            # Perform deletion
            await self.delete_query_caches(self.storage, storage_name, cleanup_type, stats)

            # Persist changes
            print('\nPersisting changes to storage...')
            try:
                await self.storage.index_done_callback()
                print('✓ Changes persisted successfully')
            except Exception as e:
                print(f'✗ Persist failed: {e}')
                stats.add_error(0, e, 0)

            # Count again to verify
            print('\nVerifying cleanup results...')
            try:
                stats.counts_after = await self.count_query_caches(self.storage, storage_name)
            except Exception as e:
                print(f'⚠️  Verification failed: {e}')
                # Use zero counts if verification fails
                stats.counts_after = {mode: {'query': 0, 'keywords': 0} for mode in QUERY_MODES}

            # Print final report
            self.print_cleanup_report(stats)

            # Print after statistics
            self.print_cache_statistics(stats.counts_after, '\n📊 Query Cache Statistics (After Cleanup):')

            # Cleanup
            await self.storage.finalize()

        except KeyboardInterrupt:
            print('\n\n✗ Cleanup interrupted by user')
        except Exception as e:
            print(f'\n✗ Cleanup failed: {e}')
            import traceback

            traceback.print_exc()
        finally:
            # Ensure cleanup
            if self.storage:
                with contextlib.suppress(Exception):
                    await self.storage.finalize()

            # Finalize shared storage
            try:
                from lightrag.kg.shared_storage import finalize_share_data

                finalize_share_data()
            except Exception:
                pass


async def async_main():
    """Async main entry point"""
    tool = CleanupTool()
    await tool.run()


def main():
    """Synchronous entry point for CLI command"""
    asyncio.run(async_main())


if __name__ == '__main__':
    main()