LightRAG/lightrag_webui/src/services/tenantStateManager.ts
Raphael MANSUY fe9b8ec02a
tests: stabilize integration tests + skip external services; fix multi-tenant API behavior and idempotency (#4)
* feat: Implement multi-tenant architecture with tenant and knowledge base models

- Added data models for tenants, knowledge bases, and related configurations.
- Introduced role and permission management for users in the multi-tenant system.
- Created a service layer for managing tenants and knowledge bases, including CRUD operations.
- Developed a tenant-aware instance manager for LightRAG with caching and isolation features.
- Added a migration script to transition existing workspace-based deployments to the new multi-tenant architecture.

* chore: ignore lightrag/api/webui/assets/ directory

* chore: stop tracking lightrag/api/webui/assets (ignore in .gitignore)

* feat: Initialize LightRAG Multi-Tenant Stack with PostgreSQL

- Added README.md for project overview, setup instructions, and architecture details.
- Created docker-compose.yml to define services: PostgreSQL, Redis, LightRAG API, and Web UI.
- Introduced env.example for environment variable configuration.
- Implemented init-postgres.sql for PostgreSQL schema initialization with multi-tenant support.
- Added reproduce_issue.py for testing default tenant access via API.

* feat: Enhance TenantSelector and update related components for improved multi-tenant support

* feat: Enhance testing capabilities and update documentation

- Updated Makefile to include new test commands for various modes (compatibility, isolation, multi-tenant, security, coverage, and dry-run).
- Modified API health check endpoint in Makefile to reflect new port configuration.
- Updated QUICK_START.md and README.md to reflect changes in service URLs and ports.
- Added environment variables for testing modes in env.example.
- Introduced run_all_tests.sh script to automate testing across different modes.
- Created conftest.py for pytest configuration, including database fixtures and mock services.
- Implemented database helper functions for streamlined database operations in tests.
- Added test collection hooks to skip tests based on the current MULTITENANT_MODE.

* feat: Implement multi-tenant support with demo mode enabled by default

- Added multi-tenant configuration to the environment and Docker setup.
- Created pre-configured demo tenants (acme-corp and techstart) for testing.
- Updated API endpoints to support tenant-specific data access.
- Enhanced Makefile commands for better service management and database operations.
- Introduced user-tenant membership system with role-based access control.
- Added comprehensive documentation for multi-tenant setup and usage.
- Fixed issues with document visibility in multi-tenant environments.
- Implemented necessary database migrations for user memberships and legacy support.

* feat(audit): Add final audit report for multi-tenant implementation

- Documented overall assessment, architecture overview, test results, security findings, and recommendations.
- Included detailed findings on critical security issues and architectural concerns.

fix(security): Implement security fixes based on audit findings

- Removed global RAG fallback and enforced strict tenant context.
- Configured super-admin access and required user authentication for tenant access.
- Cleared localStorage on logout and improved error handling in WebUI.

chore(logs): Create task logs for audit and security fixes implementation

- Documented actions, decisions, and next steps for both audit and security fixes.
- Summarized test results and remaining recommendations.

chore(scripts): Enhance development stack management scripts

- Added scripts for cleaning, starting, and stopping the development stack.
- Improved output messages and ensured graceful shutdown of services.

feat(starter): Initialize PostgreSQL with AGE extension support

- Created initialization scripts for PostgreSQL extensions including uuid-ossp, vector, and AGE.
- Ensured successful installation and verification of extensions.

* feat: Implement auto-select for first tenant and KB on initial load in WebUI

- Removed WEBUI_INITIAL_STATE_FIX.md as the issue is resolved.
- Added useTenantInitialization hook to automatically select the first available tenant and KB on app load.
- Integrated the new hook into the Root component of the WebUI.
- Updated RetrievalTesting component to ensure a KB is selected before allowing user interaction.
- Created end-to-end tests for multi-tenant isolation and real service interactions.
- Added scripts for starting, stopping, and cleaning the development stack.
- Enhanced API and tenant routes to support tenant-specific pipeline status initialization.
- Updated constants for backend URL to reflect the correct port.
- Improved error handling and logging in various components.

* feat: Add multi-tenant support with enhanced E2E testing scripts and client functionality

* update client

* Add integration and unit tests for multi-tenant API, models, security, and storage

- Implement integration tests for tenant and knowledge base management endpoints in `test_tenant_api_routes.py`.
- Create unit tests for tenant isolation, model validation, and role permissions in `test_tenant_models.py`.
- Add security tests to enforce role-based permissions and context validation in `test_tenant_security.py`.
- Develop tests for tenant-aware storage operations and context isolation in `test_tenant_storage_phase3.py`.

* feat(e2e): Implement OpenAI model support and database reset functionality

* Add comprehensive test suite for gpt-5-nano compatibility

- Introduced tests for parameter normalization, embeddings, and entity extraction.
- Implemented direct API testing for gpt-5-nano.
- Validated .env configuration loading and OpenAI API connectivity.
- Analyzed reasoning token overhead with various token limits.
- Documented test procedures and expected outcomes in README files.
- Ensured all tests pass for production readiness.

* kg(postgres_impl): ensure AGE extension is loaded in session and configure graph initialization

* dev: add hybrid dev helper scripts, Makefile, docker-compose.dev-db and local development docs

* feat(dev): add dev helper scripts and local development documentation for hybrid setup

* feat(multi-tenant): add detailed specifications and logs for multi-tenant improvements, including UX, backend handling, and ingestion pipeline

* feat(migration): add generated tenant/kb columns, indexes, triggers; drop unused tables; update schema and docs

* test(backward-compat): adapt tests to new StorageNameSpace/TenantService APIs (use concrete dummy storages)

* chore: multi-tenant and UX updates — docs, webui, storage, tenant service adjustments

* tests: stabilize integration tests + skip external services; fix multi-tenant API behavior and idempotency

- gpt5_nano_compatibility: add pytest-asyncio markers, skip when OPENAI key missing, prevent module-level asyncio.run collection, add conftest
- Ollama tests: add server availability check and skip markers; avoid pytest collection warnings by renaming helper classes
- Graph storage tests: rename interactive test functions to avoid pytest collection
- Document & Tenant routes: support external_ids for idempotency; ensure HTTPExceptions are re-raised
- LightRAG core: support external_ids in apipeline_enqueue_documents and idempotent logic
- Tests updated to match API changes (tenant routes & document routes)
- Add logs and scripts for inspection and audit
2025-12-04 16:04:21 +08:00

409 lines
11 KiB
TypeScript

/**
* Tenant State Manager
*
* Centralized module for managing tenant+route state handling.
* Provides state persistence across tenant switches and URL synchronization.
*
* Security: Tenant IDs are NEVER exposed in URLs. They are stored in
* sessionStorage with tenant-scoped keys and provided via X-Tenant-ID header.
*/
import { debounce } from './debounce'
// Types for route state
export interface RouteState {
currentKB?: string
page?: number
pageSize?: number
filters?: Record<string, string>
sort?: string
sortDirection?: 'asc' | 'desc'
viewMode?: 'list' | 'card' | 'graph'
query?: string
// Tab-specific state
[key: string]: any
}
export interface TenantRouteKey {
tenantId: string
routeName: string
}
// Route names that support state persistence
export type RouteName = 'documents' | 'knowledge-graph' | 'retrieval' | 'api' | 'chat'
// Session storage key prefix
const STORAGE_PREFIX = 'lightrag:tenant'
const STATE_VERSION = '1.0'
// Default state for each route
const DEFAULT_ROUTE_STATE: Record<RouteName, RouteState> = {
documents: {
page: 1,
pageSize: 10,
filters: {},
sort: 'updated_at',
sortDirection: 'desc',
viewMode: 'list',
},
'knowledge-graph': {
viewMode: 'graph',
filters: {},
query: '',
},
retrieval: {
page: 1,
pageSize: 20,
query: '',
},
api: {},
chat: {
page: 1,
pageSize: 50,
},
}
/**
* Generate storage key for tenant+route state
*/
function getStorageKey(tenantId: string, routeName: string): string {
return `${STORAGE_PREFIX}:${tenantId}:route:${routeName}`
}
/**
* Parse URL query parameters into RouteState
*/
function parseURLParams(): Partial<RouteState> {
const params = new URLSearchParams(window.location.search)
const state: Partial<RouteState> = {}
// Parse standard params
const kb = params.get('kb')
if (kb) state.currentKB = kb
const page = params.get('page')
if (page) state.page = parseInt(page, 10)
const pageSize = params.get('pageSize')
if (pageSize) state.pageSize = parseInt(pageSize, 10)
const sort = params.get('sort')
if (sort) state.sort = sort
const sortDirection = params.get('sortDirection')
if (sortDirection === 'asc' || sortDirection === 'desc') {
state.sortDirection = sortDirection
}
const viewMode = params.get('view') || params.get('viewMode')
if (viewMode === 'list' || viewMode === 'card' || viewMode === 'graph') {
state.viewMode = viewMode
}
const query = params.get('query') || params.get('q')
if (query) state.query = query
// Parse filters (format: filters=key1:value1,key2:value2)
const filtersParam = params.get('filters')
if (filtersParam) {
const filters: Record<string, string> = {}
filtersParam.split(',').forEach(pair => {
const [key, value] = pair.split(':')
if (key && value) {
filters[key] = value
}
})
if (Object.keys(filters).length > 0) {
state.filters = filters
}
}
return state
}
/**
* Serialize RouteState to URL query parameters
*/
function serializeToURLParams(state: RouteState): URLSearchParams {
const params = new URLSearchParams()
if (state.currentKB) params.set('kb', state.currentKB)
if (state.page && state.page !== 1) params.set('page', state.page.toString())
if (state.pageSize && state.pageSize !== 10) params.set('pageSize', state.pageSize.toString())
if (state.sort) params.set('sort', state.sort)
if (state.sortDirection) params.set('sortDirection', state.sortDirection)
if (state.viewMode && state.viewMode !== 'list') params.set('view', state.viewMode)
if (state.query) params.set('q', state.query)
// Serialize filters
if (state.filters && Object.keys(state.filters).length > 0) {
const filterStr = Object.entries(state.filters)
.filter(([_, v]) => v)
.map(([k, v]) => `${k}:${v}`)
.join(',')
if (filterStr) params.set('filters', filterStr)
}
return params
}
/**
* TenantStateManager class
*
* Manages tenant+route state with:
* - URL synchronization (tenant-agnostic)
* - sessionStorage persistence (tenant-scoped)
* - In-memory caching for fast access
*/
class TenantStateManager {
private memoryCache: Map<string, RouteState> = new Map()
private listeners: Map<string, Set<(state: RouteState) => void>> = new Map()
private currentTenantId: string | null = null
constructor() {
// Listen for popstate events to handle browser back/forward
if (typeof window !== 'undefined') {
window.addEventListener('popstate', () => {
this.hydrateFromURL()
})
}
}
/**
* Set the current tenant ID (from X-Tenant-ID header/localStorage)
*/
setCurrentTenant(tenantId: string | null): void {
const oldTenantId = this.currentTenantId
this.currentTenantId = tenantId
if (oldTenantId && tenantId && oldTenantId !== tenantId) {
this.onTenantSwitch(oldTenantId, tenantId)
}
}
/**
* Get the current tenant ID
*/
getCurrentTenant(): string | null {
return this.currentTenantId
}
/**
* Get state for a specific tenant and route
*/
getState(tenantId: string, routeName: RouteName): RouteState {
const key = getStorageKey(tenantId, routeName)
// Check memory cache first
if (this.memoryCache.has(key)) {
return this.memoryCache.get(key)!
}
// Try sessionStorage
try {
const stored = sessionStorage.getItem(key)
if (stored) {
const parsed = JSON.parse(stored)
if (parsed.version === STATE_VERSION) {
const state = parsed.state as RouteState
this.memoryCache.set(key, state)
return state
}
}
} catch (e) {
console.warn('[TenantStateManager] Failed to parse stored state:', e)
}
// Return default state
const defaultState = { ...DEFAULT_ROUTE_STATE[routeName] }
this.memoryCache.set(key, defaultState)
return defaultState
}
/**
* Set state for a specific tenant and route
*/
setState(tenantId: string, routeName: RouteName, state: Partial<RouteState>): void {
const key = getStorageKey(tenantId, routeName)
const currentState = this.getState(tenantId, routeName)
const newState = { ...currentState, ...state }
// Update memory cache
this.memoryCache.set(key, newState)
// Persist to sessionStorage
try {
sessionStorage.setItem(key, JSON.stringify({
version: STATE_VERSION,
state: newState,
updatedAt: new Date().toISOString(),
}))
} catch (e) {
console.warn('[TenantStateManager] Failed to persist state:', e)
}
// Notify listeners
this.notifyListeners(key, newState)
}
/**
* Hydrate state from URL parameters
* URL is tenant-agnostic, so this only restores UI state
*/
hydrateFromURL(): Partial<RouteState> {
return parseURLParams()
}
/**
* Sync current route state to URL
* URL remains tenant-agnostic (no tenant ID in URL)
*/
syncToURL = debounce((routeName: RouteName, state: RouteState): void => {
const params = serializeToURLParams(state)
const newURL = params.toString()
? `${window.location.pathname}?${params.toString()}${window.location.hash}`
: `${window.location.pathname}${window.location.hash}`
// Use replaceState to avoid polluting browser history for filter/sort changes
window.history.replaceState({ routeName, state }, '', newURL)
}, 300)
/**
* Push state to URL (creates new history entry)
* Use for explicit user navigation actions
*/
pushToURL(routeName: RouteName, state: RouteState): void {
const params = serializeToURLParams(state)
const newURL = params.toString()
? `${window.location.pathname}?${params.toString()}${window.location.hash}`
: `${window.location.pathname}${window.location.hash}`
window.history.pushState({ routeName, state }, '', newURL)
}
/**
* Handle tenant switch
* Restores previously saved state for the new tenant
*/
onTenantSwitch(oldTenantId: string, newTenantId: string): void {
console.log(`[TenantStateManager] Tenant switch: ${oldTenantId} -> ${newTenantId}`)
// Clear URL params when switching tenants (tenant-agnostic URLs)
// The new tenant's state will be loaded from sessionStorage
const hash = window.location.hash
window.history.replaceState({}, '', `${window.location.pathname}${hash}`)
// Trigger re-render by notifying all listeners for the new tenant
for (const [key, listeners] of this.listeners.entries()) {
if (key.includes(`:${newTenantId}:`)) {
const routeName = key.split(':route:')[1] as RouteName
const state = this.getState(newTenantId, routeName)
listeners.forEach(listener => listener(state))
}
}
}
/**
* Subscribe to state changes for a specific tenant+route
*/
subscribe(tenantId: string, routeName: RouteName, listener: (state: RouteState) => void): () => void {
const key = getStorageKey(tenantId, routeName)
if (!this.listeners.has(key)) {
this.listeners.set(key, new Set())
}
this.listeners.get(key)!.add(listener)
// Return unsubscribe function
return () => {
const listeners = this.listeners.get(key)
if (listeners) {
listeners.delete(listener)
if (listeners.size === 0) {
this.listeners.delete(key)
}
}
}
}
/**
* Notify all listeners for a specific key
*/
private notifyListeners(key: string, state: RouteState): void {
const listeners = this.listeners.get(key)
if (listeners) {
listeners.forEach(listener => listener(state))
}
}
/**
* Clear all state for a specific tenant
*/
clearTenantState(tenantId: string): void {
// Clear from memory cache
for (const key of this.memoryCache.keys()) {
if (key.includes(`:${tenantId}:`)) {
this.memoryCache.delete(key)
}
}
// Clear from sessionStorage
const keysToRemove: string[] = []
for (let i = 0; i < sessionStorage.length; i++) {
const key = sessionStorage.key(i)
if (key && key.includes(`:${tenantId}:`)) {
keysToRemove.push(key)
}
}
keysToRemove.forEach(key => sessionStorage.removeItem(key))
}
/**
* Get last selected KB for a tenant
*/
getLastSelectedKB(tenantId: string): string | null {
try {
const key = `${STORAGE_PREFIX}:${tenantId}:lastKB`
return sessionStorage.getItem(key)
} catch {
return null
}
}
/**
* Set last selected KB for a tenant
*/
setLastSelectedKB(tenantId: string, kbId: string): void {
try {
const key = `${STORAGE_PREFIX}:${tenantId}:lastKB`
sessionStorage.setItem(key, kbId)
} catch (e) {
console.warn('[TenantStateManager] Failed to save last KB:', e)
}
}
/**
* Merge URL state with stored state
* URL parameters take precedence over stored state
*/
mergeWithURL(tenantId: string, routeName: RouteName): RouteState {
const storedState = this.getState(tenantId, routeName)
const urlState = this.hydrateFromURL()
return { ...storedState, ...urlState }
}
/**
* Reset state for a route to defaults
*/
resetRouteState(tenantId: string, routeName: RouteName): void {
const defaultState = { ...DEFAULT_ROUTE_STATE[routeName] }
this.setState(tenantId, routeName, defaultState)
}
}
// Singleton instance
export const tenantStateManager = new TenantStateManager()
// Export default for convenience
export default tenantStateManager