""" Tenant-aware LightRAG instance manager with caching and isolation. This module manages per-tenant and per-knowledge-base LightRAG instances, handling initialization, caching, cleanup, and proper isolation between tenants. """ from typing import Dict, Optional, Tuple from lightrag import LightRAG from lightrag.services.tenant_service import TenantService from lightrag.utils import logger from lightrag.security import validate_identifier, validate_working_directory import asyncio import os class TenantRAGManager: """ Manages LightRAG instances per tenant/KB combination with caching and isolation. Features: - Automatic instance caching to avoid repeated initialization - Per-tenant isolation through separate working directories - Configurable max cached instances (LRU eviction) - Async-safe initialization with double-check locking - Proper resource cleanup on instance removal """ def __init__( self, base_working_dir: str, tenant_service: TenantService, template_rag: Optional[LightRAG] = None, max_cached_instances: int = 100, ): """ Initialize the TenantRAGManager. Args: base_working_dir: Base directory for all tenant/KB data storage tenant_service: Service for retrieving tenant configuration template_rag: Template RAG instance to copy configuration from max_cached_instances: Maximum number of LightRAG instances to keep cached """ self.base_working_dir = base_working_dir self.tenant_service = tenant_service self.template_rag = template_rag self.max_cached_instances = max_cached_instances self._instances: Dict[Tuple[str, str], LightRAG] = {} self._lock = asyncio.Lock() self._access_order: list[Tuple[str, str]] = [] # Track access order for LRU logger.info( f"TenantRAGManager initialized with base_dir={base_working_dir}, " f"max_instances={max_cached_instances}, template_rag={template_rag is not None}" ) async def get_rag_instance( self, tenant_id: str, kb_id: str, user_id: Optional[str] = None, ) -> LightRAG: """ Get or create a LightRAG instance for a tenant/KB combination. This method implements double-check locking to avoid race conditions when multiple requests try to initialize the same instance concurrently. Instances are cached and reused across requests for the same tenant/KB. SECURITY: Validates user has access to requested tenant before returning instance. Args: tenant_id: The tenant ID (must be valid UUID) kb_id: The knowledge base ID (must be valid UUID) user_id: User identifier from JWT token (required for security validation) Returns: LightRAG: A properly initialized LightRAG instance for this tenant/KB Raises: ValueError: If the tenant does not exist or is inactive PermissionError: If user does not have access to the tenant HTTPException: If tenant_id or kb_id are invalid identifiers """ # SECURITY: Validate identifier format to prevent injection attacks tenant_id = validate_identifier(tenant_id, "tenant_id") kb_id = validate_identifier(kb_id, "kb_id") cache_key = (tenant_id, kb_id) # First check (fast path - no lock) if cache_key in self._instances: instance = self._instances[cache_key] # Update access order for LRU if cache_key in self._access_order: self._access_order.remove(cache_key) self._access_order.append(cache_key) logger.debug(f"Cache hit for tenant={tenant_id}, kb={kb_id}") return instance # Acquire lock for initialization async with self._lock: # Second check (double-check locking pattern) if cache_key in self._instances: instance = self._instances[cache_key] if cache_key in self._access_order: self._access_order.remove(cache_key) self._access_order.append(cache_key) logger.debug( f"Cache hit (after lock) for tenant={tenant_id}, kb={kb_id}" ) return instance logger.info(f"Creating new RAG instance for tenant={tenant_id}, kb={kb_id}") # Get tenant configuration tenant = await self.tenant_service.get_tenant(tenant_id) if not tenant or not tenant.is_active: raise ValueError(f"Tenant {tenant_id} not found or inactive") # SEC-003 FIX: Check if user authentication is required try: from lightrag.api.config import REQUIRE_USER_AUTH require_auth = REQUIRE_USER_AUTH except ImportError: require_auth = False # SECURITY: Verify user has access to this tenant if user_id: has_access = await self.tenant_service.verify_user_access( user_id, tenant_id ) if not has_access: logger.warning( f"Access denied: user={user_id} attempted to access tenant={tenant_id}" ) raise PermissionError(f"Access denied to tenant {tenant_id}") elif require_auth: logger.error( f"Access denied: user_id required but not provided for tenant={tenant_id}" ) raise PermissionError("User authentication required for tenant access") else: logger.warning( "No user_id provided for tenant access - allowing for backward compatibility" ) # SECURITY: Create and validate tenant-specific working directory # This prevents path traversal attacks tenant_working_dir, composite_workspace = validate_working_directory( self.base_working_dir, tenant_id, kb_id ) os.makedirs(tenant_working_dir, exist_ok=True) try: # Create LightRAG instance with tenant-specific configuration # Use template RAG configuration if available, otherwise use defaults if self.template_rag: # Copy configuration from template RAG instance = LightRAG( working_dir=tenant_working_dir, workspace=composite_workspace, llm_model_func=self.template_rag.llm_model_func, llm_model_name=self.template_rag.llm_model_name, llm_model_max_async=self.template_rag.llm_model_max_async, llm_model_kwargs=self.template_rag.llm_model_kwargs, embedding_func=self.template_rag.embedding_func, default_llm_timeout=self.template_rag.default_llm_timeout, default_embedding_timeout=self.template_rag.default_embedding_timeout, kv_storage=tenant.config.custom_metadata.get("kv_storage") or self.template_rag.kv_storage, vector_storage=tenant.config.custom_metadata.get( "vector_storage" ) or self.template_rag.vector_storage, graph_storage=tenant.config.custom_metadata.get("graph_storage") or self.template_rag.graph_storage, doc_status_storage=self.template_rag.doc_status_storage, vector_db_storage_cls_kwargs=self.template_rag.vector_db_storage_cls_kwargs, enable_llm_cache=self.template_rag.enable_llm_cache, enable_llm_cache_for_entity_extract=self.template_rag.enable_llm_cache_for_entity_extract, rerank_model_func=self.template_rag.rerank_model_func, chunk_token_size=self.template_rag.chunk_token_size, chunk_overlap_token_size=self.template_rag.chunk_overlap_token_size, max_parallel_insert=self.template_rag.max_parallel_insert, max_graph_nodes=self.template_rag.max_graph_nodes, addon_params=self.template_rag.addon_params, ollama_server_infos=getattr( self.template_rag, "ollama_server_infos", None ), # Override with tenant-specific settings top_k=tenant.config.top_k, chunk_top_k=getattr(tenant.config, "chunk_top_k", 40), cosine_threshold=tenant.config.cosine_threshold, ) else: # Fallback to basic configuration (will likely fail without embedding_func) instance = LightRAG( working_dir=tenant_working_dir, workspace=composite_workspace, kv_storage=tenant.config.custom_metadata.get( "kv_storage", "JsonKVStorage" ), vector_storage=tenant.config.custom_metadata.get( "vector_storage", "NanoVectorDBStorage" ), graph_storage=tenant.config.custom_metadata.get( "graph_storage", "NetworkXStorage" ), top_k=tenant.config.top_k, chunk_top_k=getattr(tenant.config, "chunk_top_k", 40), cosine_threshold=tenant.config.cosine_threshold, ) # Initialize the instance's storages await instance.initialize_storages() # Check if we need to evict oldest instance if len(self._instances) >= self.max_cached_instances: # Evict least recently used instance if self._access_order: oldest_key = self._access_order.pop(0) if oldest_key in self._instances: logger.info( f"Evicting LRU instance: tenant={oldest_key[0]}, kb={oldest_key[1]}" ) try: await self._instances[oldest_key].finalize_storages() except Exception as e: logger.error(f"Error finalizing evicted instance: {e}") del self._instances[oldest_key] # Cache the instance self._instances[cache_key] = instance self._access_order.append(cache_key) logger.info( f"RAG instance created and cached for tenant={tenant_id}, kb={kb_id}" ) return instance except Exception as e: logger.error( f"Error creating RAG instance for tenant={tenant_id}, kb={kb_id}: {e}" ) raise async def cleanup_instance(self, tenant_id: str, kb_id: str) -> None: """ Clean up and remove a cached instance. This method should be called when a knowledge base is deleted or a tenant is removed to ensure proper resource cleanup. Args: tenant_id: The tenant ID kb_id: The knowledge base ID """ cache_key = (tenant_id, kb_id) async with self._lock: if cache_key in self._instances: logger.info( f"Cleaning up RAG instance for tenant={tenant_id}, kb={kb_id}" ) try: await self._instances[cache_key].finalize_storages() except Exception as e: logger.error(f"Error finalizing instance during cleanup: {e}") del self._instances[cache_key] if cache_key in self._access_order: self._access_order.remove(cache_key) async def cleanup_tenant_instances(self, tenant_id: str) -> None: """ Clean up all cached instances for a specific tenant. This method should be called when a tenant is deleted to ensure all its knowledge bases are properly cleaned up. Args: tenant_id: The tenant ID """ async with self._lock: keys_to_remove = [k for k in self._instances.keys() if k[0] == tenant_id] for key in keys_to_remove: logger.info( f"Cleaning up RAG instance for tenant={key[0]}, kb={key[1]}" ) try: await self._instances[key].finalize_storages() except Exception as e: logger.error( f"Error finalizing instance during tenant cleanup: {e}" ) del self._instances[key] if key in self._access_order: self._access_order.remove(key) async def cleanup_all(self) -> None: """ Clean up all cached instances. This should be called during application shutdown to ensure all resources are properly released. """ async with self._lock: logger.info(f"Cleaning up all {len(self._instances)} cached RAG instances") for key, instance in list(self._instances.items()): try: await instance.finalize_storages() except Exception as e: logger.error(f"Error finalizing instance {key}: {e}") self._instances.clear() self._access_order.clear() def get_instance_count(self) -> int: """Get the current number of cached instances.""" return len(self._instances) def get_cached_keys(self) -> list[Tuple[str, str]]: """Get all currently cached tenant/KB combinations.""" return list(self._instances.keys()) def __repr__(self) -> str: """String representation of the manager state.""" return ( f"TenantRAGManager(instances={len(self._instances)}, " f"max_cached={self.max_cached_instances})" )