LightRAG/lightrag/models/tenant.py

"""Data models for tenant, knowledge base, and related configurations in LightRAG."""

from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List
from datetime import datetime
from uuid import uuid4
from enum import Enum


class Role(str, Enum):
    """User roles in the multi-tenant system."""
    ADMIN = "admin"
    EDITOR = "editor"
    VIEWER = "viewer"
    VIEWER_READONLY = "viewer:read-only"


class Permission(str, Enum):
    """Permissions in the multi-tenant system."""
    # Tenant-level permissions
    MANAGE_TENANT = "tenant:manage"
    MANAGE_MEMBERS = "tenant:manage_members"
    MANAGE_BILLING = "tenant:manage_billing"

    # KB-level permissions
    CREATE_KB = "kb:create"
    DELETE_KB = "kb:delete"
    MANAGE_KB = "kb:manage"

    # Document-level permissions
    CREATE_DOCUMENT = "document:create"
    UPDATE_DOCUMENT = "document:update"
    DELETE_DOCUMENT = "document:delete"
    READ_DOCUMENT = "document:read"

    # Query permissions
    RUN_QUERY = "query:run"
    ACCESS_KB = "kb:access"


# Role-to-permissions mapping
ROLE_PERMISSIONS = {
    Role.ADMIN: [p.value for p in Permission],
    Role.EDITOR: [
        Permission.CREATE_KB.value,
        Permission.DELETE_KB.value,
        Permission.CREATE_DOCUMENT.value,
        Permission.UPDATE_DOCUMENT.value,
        Permission.DELETE_DOCUMENT.value,
        Permission.READ_DOCUMENT.value,
        Permission.RUN_QUERY.value,
        Permission.ACCESS_KB.value,
    ],
    Role.VIEWER: [
        Permission.READ_DOCUMENT.value,
        Permission.RUN_QUERY.value,
        Permission.ACCESS_KB.value,
    ],
    Role.VIEWER_READONLY: [
        Permission.RUN_QUERY.value,
        Permission.ACCESS_KB.value,
    ],
}


@dataclass
class ResourceQuota:
    """Resource limits for a tenant."""
    max_documents: int = 10000
    max_storage_gb: float = 100.0
    max_concurrent_queries: int = 10
    max_monthly_api_calls: int = 100000
    max_kb_per_tenant: int = 50
    max_entities_per_kb: int = 100000
    max_relationships_per_kb: int = 500000


@dataclass
class TenantConfig:
    """Per-tenant configuration for models and parameters."""
    # Model selection
    llm_model: str = "gpt-4o-mini"
    embedding_model: str = "bge-m3:latest"
    rerank_model: Optional[str] = None

    # LLM parameters
    llm_model_kwargs: Dict[str, Any] = field(default_factory=dict)
    llm_temperature: float = 1.0
    llm_max_tokens: int = 4096

    # Embedding parameters
    embedding_dim: int = 1024
    embedding_batch_num: int = 10

    # Query defaults
    top_k: int = 40
    chunk_top_k: int = 20
    cosine_threshold: float = 0.2
    enable_llm_cache: bool = True
    enable_rerank: bool = True

    # Chunking defaults
    chunk_size: int = 1200
    chunk_overlap: int = 100

    # Custom tenant metadata
    custom_metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass
class KBConfig:
    """Per-knowledge-base configuration (overrides tenant defaults)."""
    # Only include fields that override tenant config
    top_k: Optional[int] = None
    chunk_size: Optional[int] = None
    cosine_threshold: Optional[float] = None
    custom_metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass
class Tenant:
    """Represents a tenant in the multi-tenant system."""
    tenant_id: str = field(default_factory=lambda: str(uuid4()))
    tenant_name: str = ""
    description: Optional[str] = None

    # Configuration
    config: TenantConfig = field(default_factory=TenantConfig)
    quota: ResourceQuota = field(default_factory=ResourceQuota)

    # Lifecycle
    is_active: bool = True
    created_at: datetime = field(default_factory=datetime.utcnow)
    updated_at: datetime = field(default_factory=datetime.utcnow)
    created_by: Optional[str] = None
    updated_by: Optional[str] = None

    # Metadata
    metadata: Dict[str, Any] = field(default_factory=dict)

    # Statistics
    kb_count: int = 0
    total_documents: int = 0
    total_storage_mb: float = 0.0

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation."""
        return {
            "tenant_id": self.tenant_id,
            "tenant_name": self.tenant_name,
            "description": self.description,
            "config": {
                "llm_model": self.config.llm_model,
                "embedding_model": self.config.embedding_model,
                "rerank_model": self.config.rerank_model,
                "chunk_size": self.config.chunk_size,
                "chunk_overlap": self.config.chunk_overlap,
                "top_k": self.config.top_k,
                "cosine_threshold": self.config.cosine_threshold,
                "enable_llm_cache": self.config.enable_llm_cache,
                "custom_metadata": self.config.custom_metadata,
            },
            "quota": {
                "max_documents": self.quota.max_documents,
                "max_storage_gb": self.quota.max_storage_gb,
                "max_concurrent_queries": self.quota.max_concurrent_queries,
                "max_monthly_api_calls": self.quota.max_monthly_api_calls,
                "max_kb_per_tenant": self.quota.max_kb_per_tenant,
            },
            "is_active": self.is_active,
            "created_at": self.created_at.isoformat(),
            "updated_at": self.updated_at.isoformat(),
            "created_by": self.created_by,
            "updated_by": self.updated_by,
            "metadata": self.metadata,
            "kb_count": self.kb_count,
            "total_documents": self.total_documents,
            "total_storage_mb": self.total_storage_mb,
        }


@dataclass
class KnowledgeBase:
    """Represents a knowledge base within a tenant."""
    kb_id: str = field(default_factory=lambda: str(uuid4()))
    tenant_id: str = ""
    kb_name: str = ""
    description: Optional[str] = None

    # Status and lifecycle
    is_active: bool = True
    status: str = "ready"  # ready | indexing | error

    # Statistics
    document_count: int = 0
    entity_count: int = 0
    relationship_count: int = 0
    chunk_count: int = 0
    storage_used_mb: float = 0.0

    # Indexing info
    last_indexed_at: Optional[datetime] = None
    index_version: int = 1

    # Configuration (can override tenant defaults)
    config: Optional[KBConfig] = None

    # Timestamps
    created_at: datetime = field(default_factory=datetime.utcnow)
    updated_at: datetime = field(default_factory=datetime.utcnow)
    created_by: Optional[str] = None
    updated_by: Optional[str] = None

    # Metadata
    metadata: Dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation."""
        return {
            "kb_id": self.kb_id,
            "tenant_id": self.tenant_id,
            "kb_name": self.kb_name,
            "description": self.description,
            "is_active": self.is_active,
            "status": self.status,
            "document_count": self.document_count,
            "entity_count": self.entity_count,
            "relationship_count": self.relationship_count,
            "chunk_count": self.chunk_count,
            "storage_used_mb": self.storage_used_mb,
            "last_indexed_at": self.last_indexed_at.isoformat() if self.last_indexed_at else None,
            "index_version": self.index_version,
            "config": self.config.__dict__ if self.config else None,
            "created_at": self.created_at.isoformat(),
            "updated_at": self.updated_at.isoformat(),
            "created_by": self.created_by,
            "updated_by": self.updated_by,
            "metadata": self.metadata,
        }


@dataclass
class TenantContext:
    """Request-scoped tenant context injected into all request handlers."""
    tenant_id: str
    kb_id: str
    user_id: str
    role: str  # admin | editor | viewer | viewer:read-only

    # Authorization
    permissions: Dict[str, bool] = field(default_factory=dict)
    knowledge_base_ids: List[str] = field(default_factory=list)  # Accessible KBs

    # Request tracking
    request_id: str = field(default_factory=lambda: str(uuid4()))
    ip_address: Optional[str] = None
    user_agent: Optional[str] = None

    # Computed properties
    @property
    def workspace_namespace(self) -> str:
        """Backward compatible workspace namespace."""
        return f"{self.tenant_id}_{self.kb_id}"

    def can_access_kb(self, kb_id: str) -> bool:
        """Check if user can access specific KB."""
        return kb_id in self.knowledge_base_ids or "*" in self.knowledge_base_ids

    def has_permission(self, permission: str) -> bool:
        """Check if user has specific permission."""
        return self.permissions.get(permission, False)

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation."""
        return {
            "tenant_id": self.tenant_id,
            "kb_id": self.kb_id,
            "user_id": self.user_id,
            "role": self.role,
            "permissions": self.permissions,
            "knowledge_base_ids": self.knowledge_base_ids,
            "request_id": self.request_id,
            "ip_address": self.ip_address,
            "user_agent": self.user_agent,
            "workspace_namespace": self.workspace_namespace,
        }