LightRAG/scripts/migrate_workspace_to_tenant.py
2025-12-05 14:31:13 +08:00

356 lines
11 KiB
Python

#!/usr/bin/env python
"""
Workspace-to-Tenant Migration Script
Migrates existing single-tenant workspace-based deployments to multi-tenant architecture.
This script:
1. Scans existing workspace directories
2. Creates a default tenant for each workspace
3. Creates a default knowledge base within each tenant
4. Preserves all existing data structure for backward compatibility
Usage:
python migrate_workspace_to_tenant.py --working-dir /path/to/rag_storage
python migrate_workspace_to_tenant.py --working-dir /path/to/rag_storage --dry-run
python migrate_workspace_to_tenant.py --working-dir /path/to/rag_storage --skip-backup
"""
import asyncio
import argparse
import sys
import shutil
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Optional
from lightrag.services.tenant_service import TenantService
from lightrag.utils import logger
class WorkspaceToTenantMigrator:
"""
Handles migration from workspace-based to multi-tenant architecture.
"""
def __init__(self, working_dir: str, dry_run: bool = False, backup: bool = True):
"""
Initialize the migrator.
Args:
working_dir: Root directory containing workspace folders
dry_run: If True, simulate migration without making changes
backup: If True, create backup before migration
"""
self.working_dir = Path(working_dir)
self.dry_run = dry_run
self.backup = backup
self.tenant_service = TenantService()
self.migration_log: List[str] = []
self.error_log: List[str] = []
def validate_working_dir(self) -> bool:
"""Validate that working directory exists."""
if not self.working_dir.exists():
self.error_log.append(
f"Working directory does not exist: {self.working_dir}"
)
return False
if not self.working_dir.is_dir():
self.error_log.append(f"Path is not a directory: {self.working_dir}")
return False
return True
def discover_workspaces(self) -> List[str]:
"""
Discover existing workspace directories.
Workspaces are identified by common RAG storage files like:
- kv_store_*.json
- doc_status_storage.json
- rag_storage.db
Returns:
List of workspace directory names
"""
workspaces = []
# Check for common RAG storage files
for item in self.working_dir.iterdir():
if not item.is_dir():
continue
# Skip special directories
if item.name.startswith((".", "__")):
continue
# Check if directory contains RAG storage files
has_rag_files = (
any(
[
(item / f"kv_store_{name}.json").exists()
for name in [
"full_docs",
"text_chunks",
"entities",
"relations",
]
]
)
or (item / "doc_status_storage.json").exists()
)
if has_rag_files or item.name.startswith("workspace_"):
workspaces.append(item.name)
return sorted(workspaces)
def backup_working_dir(self) -> Optional[Path]:
"""
Create a backup of the working directory.
Returns:
Path to backup directory, or None if backup failed
"""
if not self.backup:
return None
backup_dir = (
self.working_dir.parent
/ f"{self.working_dir.name}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
)
try:
msg = f"Creating backup at {backup_dir}"
logger.info(msg)
self.migration_log.append(msg)
if not self.dry_run:
shutil.copytree(self.working_dir, backup_dir)
return backup_dir
except Exception as e:
msg = f"Failed to create backup: {e}"
logger.error(msg)
self.error_log.append(msg)
return None
async def migrate_workspace(self, workspace_name: str) -> bool:
"""
Migrate a single workspace to multi-tenant structure.
Args:
workspace_name: Name of the workspace to migrate
Returns:
True if migration successful, False otherwise
"""
try:
msg = f"\nMigrating workspace: {workspace_name}"
logger.info(msg)
self.migration_log.append(msg)
# Create tenant from workspace
tenant_name = workspace_name if workspace_name != "" else "default"
if not self.dry_run:
tenant = await self.tenant_service.create_tenant(
tenant_name=tenant_name,
config=None, # Use default config
)
msg = f" ✓ Created tenant '{tenant_name}' with ID: {tenant.tenant_id}"
logger.info(msg)
self.migration_log.append(msg)
# Create default knowledge base
kb = await self.tenant_service.create_knowledge_base(
tenant_id=tenant.tenant_id,
kb_name="default",
description="Default knowledge base (migrated from workspace)",
)
msg = f" ✓ Created default KB with ID: {kb.kb_id}"
logger.info(msg)
self.migration_log.append(msg)
else:
msg = f" [DRY RUN] Would create tenant '{tenant_name}' with default KB"
logger.info(msg)
self.migration_log.append(msg)
return True
except Exception as e:
msg = f" ✗ Failed to migrate workspace '{workspace_name}': {e}"
logger.error(msg)
self.error_log.append(msg)
return False
async def migrate_all_workspaces(self, workspaces: List[str]) -> Dict[str, bool]:
"""
Migrate all discovered workspaces.
Args:
workspaces: List of workspace names to migrate
Returns:
Dictionary mapping workspace name to migration status
"""
results = {}
for workspace in workspaces:
success = await self.migrate_workspace(workspace)
results[workspace] = success
return results
def generate_report(self, workspaces: List[str], results: Dict[str, bool]) -> str:
"""
Generate a migration report.
Args:
workspaces: List of workspaces processed
results: Migration results
Returns:
Formatted report string
"""
successful = sum(1 for v in results.values() if v)
failed = len(workspaces) - successful
report = f"""
╔══════════════════════════════════════════════════════════════╗
║ WORKSPACE-TO-TENANT MIGRATION REPORT ║
╚══════════════════════════════════════════════════════════════╝
Working Directory: {self.working_dir}
Dry Run Mode: {self.dry_run}
Workspaces Processed: {len(workspaces)}
Successfully Migrated: {successful}
Failed: {failed}
Migration Log:
"""
for line in self.migration_log:
report += f"\n{line}"
if self.error_log:
report += "\n\nErrors Encountered:"
for error in self.error_log:
report += f"\n{error}"
report += "\n"
return report
async def run(self) -> bool:
"""
Execute the migration process.
Returns:
True if migration completed successfully, False otherwise
"""
# Validate setup
if not self.validate_working_dir():
logger.error("Validation failed")
return False
# Discover workspaces
workspaces = self.discover_workspaces()
if not workspaces:
msg = "No workspaces found to migrate"
logger.warning(msg)
self.migration_log.append(msg)
return True
msg = f"Discovered {len(workspaces)} workspace(s): {', '.join(workspaces)}"
logger.info(msg)
self.migration_log.append(msg)
# Create backup if not dry-run
if not self.dry_run:
backup_path = self.backup_working_dir()
if not backup_path and self.backup:
logger.warning("Backup failed but continuing with migration")
# Migrate workspaces
results = await self.migrate_all_workspaces(workspaces)
# Generate and display report
report = self.generate_report(workspaces, results)
print(report)
# Save report to file
report_path = (
self.working_dir
/ f"migration_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
)
try:
if not self.dry_run:
with open(report_path, "w") as f:
f.write(report)
logger.info(f"Migration report saved to {report_path}")
except Exception as e:
logger.error(f"Failed to save migration report: {e}")
# Return success if no failures
return all(results.values())
def main():
"""Main entry point for migration script."""
parser = argparse.ArgumentParser(
description="Migrate workspace-based deployment to multi-tenant architecture",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Perform actual migration
python migrate_workspace_to_tenant.py --working-dir /path/to/rag_storage
# Preview what would be migrated without making changes
python migrate_workspace_to_tenant.py --working-dir /path/to/rag_storage --dry-run
# Migrate without creating backup
python migrate_workspace_to_tenant.py --working-dir /path/to/rag_storage --skip-backup
""",
)
parser.add_argument(
"--working-dir",
required=True,
help="Path to the working directory containing workspaces",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Simulate migration without making actual changes",
)
parser.add_argument(
"--skip-backup",
action="store_true",
help="Skip creating a backup of the working directory",
)
args = parser.parse_args()
# Create migrator
migrator = WorkspaceToTenantMigrator(
working_dir=args.working_dir, dry_run=args.dry_run, backup=not args.skip_backup
)
# Run migration
try:
success = asyncio.run(migrator.run())
sys.exit(0 if success else 1)
except KeyboardInterrupt:
logger.warning("Migration interrupted by user")
sys.exit(1)
except Exception as e:
logger.error(f"Migration failed: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
main()