430 lines
17 KiB
Python
430 lines
17 KiB
Python
"""Container lifecycle manager for OpenRAG TUI."""
|
|
|
|
import asyncio
|
|
import json
|
|
import subprocess
|
|
import time
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, AsyncIterator
|
|
|
|
from ..utils.platform import PlatformDetector, RuntimeInfo, RuntimeType
|
|
from utils.gpu_detection import detect_gpu_devices
|
|
|
|
|
|
class ServiceStatus(Enum):
|
|
"""Container service status."""
|
|
UNKNOWN = "unknown"
|
|
RUNNING = "running"
|
|
STOPPED = "stopped"
|
|
STARTING = "starting"
|
|
STOPPING = "stopping"
|
|
ERROR = "error"
|
|
MISSING = "missing"
|
|
|
|
|
|
@dataclass
|
|
class ServiceInfo:
|
|
"""Container service information."""
|
|
name: str
|
|
status: ServiceStatus
|
|
health: Optional[str] = None
|
|
ports: List[str] = None
|
|
image: Optional[str] = None
|
|
image_digest: Optional[str] = None
|
|
created: Optional[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.ports is None:
|
|
self.ports = []
|
|
|
|
|
|
class ContainerManager:
|
|
"""Manages Docker/Podman container lifecycle for OpenRAG."""
|
|
|
|
def __init__(self, compose_file: Optional[Path] = None):
|
|
self.platform_detector = PlatformDetector()
|
|
self.runtime_info = self.platform_detector.detect_runtime()
|
|
self.compose_file = compose_file or Path("docker-compose.yml")
|
|
self.cpu_compose_file = Path("docker-compose-cpu.yml")
|
|
self.services_cache: Dict[str, ServiceInfo] = {}
|
|
self.last_status_update = 0
|
|
# Auto-select CPU compose if no GPU available
|
|
try:
|
|
has_gpu, _ = detect_gpu_devices()
|
|
self.use_cpu_compose = not has_gpu
|
|
except Exception:
|
|
self.use_cpu_compose = True
|
|
|
|
# Expected services based on compose files
|
|
self.expected_services = [
|
|
"openrag-backend",
|
|
"openrag-frontend",
|
|
"opensearch",
|
|
"dashboards",
|
|
"langflow"
|
|
]
|
|
|
|
# Map container names to service names
|
|
self.container_name_map = {
|
|
"openrag-backend": "openrag-backend",
|
|
"openrag-frontend": "openrag-frontend",
|
|
"os": "opensearch",
|
|
"osdash": "dashboards",
|
|
"langflow": "langflow"
|
|
}
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if container runtime is available."""
|
|
return self.runtime_info.runtime_type != RuntimeType.NONE
|
|
|
|
def get_runtime_info(self) -> RuntimeInfo:
|
|
"""Get container runtime information."""
|
|
return self.runtime_info
|
|
|
|
def get_installation_help(self) -> str:
|
|
"""Get installation instructions if runtime is not available."""
|
|
return self.platform_detector.get_installation_instructions()
|
|
|
|
async def _run_compose_command(self, args: List[str], cpu_mode: Optional[bool] = None) -> tuple[bool, str, str]:
|
|
"""Run a compose command and return (success, stdout, stderr)."""
|
|
if not self.is_available():
|
|
return False, "", "No container runtime available"
|
|
|
|
if cpu_mode is None:
|
|
cpu_mode = self.use_cpu_compose
|
|
compose_file = self.cpu_compose_file if cpu_mode else self.compose_file
|
|
cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args
|
|
|
|
try:
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
cwd=Path.cwd()
|
|
)
|
|
|
|
stdout, stderr = await process.communicate()
|
|
stdout_text = stdout.decode() if stdout else ""
|
|
stderr_text = stderr.decode() if stderr else ""
|
|
|
|
success = process.returncode == 0
|
|
return success, stdout_text, stderr_text
|
|
|
|
except Exception as e:
|
|
return False, "", f"Command execution failed: {e}"
|
|
|
|
async def _run_runtime_command(self, args: List[str]) -> tuple[bool, str, str]:
|
|
"""Run a runtime command (docker/podman) and return (success, stdout, stderr)."""
|
|
if not self.is_available():
|
|
return False, "", "No container runtime available"
|
|
|
|
cmd = self.runtime_info.runtime_command + args
|
|
|
|
try:
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
|
|
stdout, stderr = await process.communicate()
|
|
stdout_text = stdout.decode() if stdout else ""
|
|
stderr_text = stderr.decode() if stderr else ""
|
|
|
|
success = process.returncode == 0
|
|
return success, stdout_text, stderr_text
|
|
|
|
except Exception as e:
|
|
return False, "", f"Command execution failed: {e}"
|
|
|
|
async def get_service_status(self, force_refresh: bool = False) -> Dict[str, ServiceInfo]:
|
|
"""Get current status of all services."""
|
|
current_time = time.time()
|
|
|
|
# Use cache if recent and not forcing refresh
|
|
if not force_refresh and current_time - self.last_status_update < 5:
|
|
return self.services_cache
|
|
|
|
services = {}
|
|
|
|
# Get compose service status
|
|
success, stdout, stderr = await self._run_compose_command(["ps", "--format", "json"])
|
|
|
|
if success and stdout.strip():
|
|
try:
|
|
# Parse JSON output - each line is a separate JSON object
|
|
for line in stdout.strip().split('\n'):
|
|
if line.strip() and line.startswith('{'):
|
|
service = json.loads(line)
|
|
container_name = service.get("Name", "")
|
|
|
|
# Map container name to service name
|
|
service_name = self.container_name_map.get(container_name)
|
|
if not service_name:
|
|
continue
|
|
|
|
state = service.get("State", "").lower()
|
|
|
|
# Map compose states to our status enum
|
|
if "running" in state:
|
|
status = ServiceStatus.RUNNING
|
|
elif "exited" in state or "stopped" in state:
|
|
status = ServiceStatus.STOPPED
|
|
elif "starting" in state:
|
|
status = ServiceStatus.STARTING
|
|
else:
|
|
status = ServiceStatus.UNKNOWN
|
|
|
|
# Extract health - use Status if Health is empty
|
|
health = service.get("Health", "") or service.get("Status", "N/A")
|
|
|
|
# Extract ports
|
|
ports_str = service.get("Ports", "")
|
|
ports = [p.strip() for p in ports_str.split(",") if p.strip()] if ports_str else []
|
|
|
|
# Extract image
|
|
image = service.get("Image", "N/A")
|
|
|
|
services[service_name] = ServiceInfo(
|
|
name=service_name,
|
|
status=status,
|
|
health=health,
|
|
ports=ports,
|
|
image=image,
|
|
)
|
|
|
|
except json.JSONDecodeError:
|
|
# Fallback to parsing text output
|
|
lines = stdout.strip().split('\n')
|
|
for line in lines[1:]: # Skip header
|
|
if line.strip():
|
|
parts = line.split()
|
|
if len(parts) >= 3:
|
|
name = parts[0]
|
|
|
|
# Only include our expected services
|
|
if name not in self.expected_services:
|
|
continue
|
|
|
|
state = parts[2].lower()
|
|
|
|
if "up" in state:
|
|
status = ServiceStatus.RUNNING
|
|
elif "exit" in state:
|
|
status = ServiceStatus.STOPPED
|
|
else:
|
|
status = ServiceStatus.UNKNOWN
|
|
|
|
services[name] = ServiceInfo(name=name, status=status)
|
|
|
|
# Add expected services that weren't found
|
|
for expected in self.expected_services:
|
|
if expected not in services:
|
|
services[expected] = ServiceInfo(name=expected, status=ServiceStatus.MISSING)
|
|
|
|
self.services_cache = services
|
|
self.last_status_update = current_time
|
|
|
|
return services
|
|
|
|
async def get_images_digests(self, images: List[str]) -> Dict[str, str]:
|
|
"""Return a map of image -> digest/ID (sha256:...)."""
|
|
digests: Dict[str, str] = {}
|
|
for image in images:
|
|
if not image or image in digests:
|
|
continue
|
|
success, stdout, _ = await self._run_runtime_command([
|
|
"image", "inspect", image, "--format", "{{.Id}}"
|
|
])
|
|
if success and stdout.strip():
|
|
digests[image] = stdout.strip().splitlines()[0]
|
|
return digests
|
|
|
|
def _parse_compose_images(self) -> list[str]:
|
|
"""Best-effort parse of image names from compose files without YAML dependency."""
|
|
images: set[str] = set()
|
|
for compose in [self.compose_file, self.cpu_compose_file]:
|
|
try:
|
|
if not compose.exists():
|
|
continue
|
|
for line in compose.read_text().splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith('#'):
|
|
continue
|
|
if line.startswith('image:'):
|
|
# image: repo/name:tag
|
|
val = line.split(':', 1)[1].strip()
|
|
# Remove quotes if present
|
|
if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
|
|
val = val[1:-1]
|
|
images.add(val)
|
|
except Exception:
|
|
continue
|
|
return list(images)
|
|
|
|
async def get_project_images_info(self) -> list[tuple[str, str]]:
|
|
"""
|
|
Return list of (image, digest_or_id) for images referenced by compose files.
|
|
If an image isn't present locally, returns '-' for its digest.
|
|
"""
|
|
expected = self._parse_compose_images()
|
|
results: list[tuple[str, str]] = []
|
|
for image in expected:
|
|
digest = '-'
|
|
success, stdout, _ = await self._run_runtime_command([
|
|
'image', 'inspect', image, '--format', '{{.Id}}'
|
|
])
|
|
if success and stdout.strip():
|
|
digest = stdout.strip().splitlines()[0]
|
|
results.append((image, digest))
|
|
results.sort(key=lambda x: x[0])
|
|
return results
|
|
|
|
async def start_services(self, cpu_mode: bool = False) -> AsyncIterator[tuple[bool, str]]:
|
|
"""Start all services and yield progress updates."""
|
|
yield False, "Starting OpenRAG services..."
|
|
|
|
success, stdout, stderr = await self._run_compose_command(["up", "-d"], cpu_mode)
|
|
|
|
if success:
|
|
yield True, "Services started successfully"
|
|
else:
|
|
yield False, f"Failed to start services: {stderr}"
|
|
|
|
async def stop_services(self) -> AsyncIterator[tuple[bool, str]]:
|
|
"""Stop all services and yield progress updates."""
|
|
yield False, "Stopping OpenRAG services..."
|
|
|
|
success, stdout, stderr = await self._run_compose_command(["down"])
|
|
|
|
if success:
|
|
yield True, "Services stopped successfully"
|
|
else:
|
|
yield False, f"Failed to stop services: {stderr}"
|
|
|
|
async def restart_services(self, cpu_mode: bool = False) -> AsyncIterator[tuple[bool, str]]:
|
|
"""Restart all services and yield progress updates."""
|
|
yield False, "Restarting OpenRAG services..."
|
|
|
|
success, stdout, stderr = await self._run_compose_command(["restart"], cpu_mode)
|
|
|
|
if success:
|
|
yield True, "Services restarted successfully"
|
|
else:
|
|
yield False, f"Failed to restart services: {stderr}"
|
|
|
|
async def upgrade_services(self, cpu_mode: bool = False) -> AsyncIterator[tuple[bool, str]]:
|
|
"""Upgrade services (pull latest images and restart) and yield progress updates."""
|
|
yield False, "Pulling latest images..."
|
|
|
|
# Pull latest images
|
|
success, stdout, stderr = await self._run_compose_command(["pull"], cpu_mode)
|
|
|
|
if not success:
|
|
yield False, f"Failed to pull images: {stderr}"
|
|
return
|
|
|
|
yield False, "Images updated, restarting services..."
|
|
|
|
# Restart with new images
|
|
success, stdout, stderr = await self._run_compose_command(["up", "-d", "--force-recreate"], cpu_mode)
|
|
|
|
if success:
|
|
yield True, "Services upgraded and restarted successfully"
|
|
else:
|
|
yield False, f"Failed to restart services after upgrade: {stderr}"
|
|
|
|
async def reset_services(self) -> AsyncIterator[tuple[bool, str]]:
|
|
"""Reset all services (stop, remove containers/volumes, clear data) and yield progress updates."""
|
|
yield False, "Stopping all services..."
|
|
|
|
# Stop and remove everything
|
|
success, stdout, stderr = await self._run_compose_command([
|
|
"down",
|
|
"--volumes",
|
|
"--remove-orphans",
|
|
"--rmi", "local"
|
|
])
|
|
|
|
if not success:
|
|
yield False, f"Failed to stop services: {stderr}"
|
|
return
|
|
|
|
yield False, "Cleaning up container data..."
|
|
|
|
# Additional cleanup - remove any remaining containers/volumes
|
|
# This is more thorough than just compose down
|
|
await self._run_runtime_command(["system", "prune", "-f"])
|
|
|
|
yield True, "System reset completed - all containers, volumes, and local images removed"
|
|
|
|
async def get_service_logs(self, service_name: str, lines: int = 100) -> tuple[bool, str]:
|
|
"""Get logs for a specific service."""
|
|
success, stdout, stderr = await self._run_compose_command(["logs", "--tail", str(lines), service_name])
|
|
|
|
if success:
|
|
return True, stdout
|
|
else:
|
|
return False, f"Failed to get logs: {stderr}"
|
|
|
|
async def follow_service_logs(self, service_name: str) -> AsyncIterator[str]:
|
|
"""Follow logs for a specific service."""
|
|
if not self.is_available():
|
|
yield "No container runtime available"
|
|
return
|
|
|
|
compose_file = self.cpu_compose_file if self.use_cpu_compose else self.compose_file
|
|
cmd = self.runtime_info.compose_command + ["-f", str(compose_file), "logs", "-f", service_name]
|
|
|
|
try:
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.STDOUT,
|
|
cwd=Path.cwd()
|
|
)
|
|
|
|
while True:
|
|
line = await process.stdout.readline()
|
|
if line:
|
|
yield line.decode().rstrip()
|
|
else:
|
|
break
|
|
|
|
except Exception as e:
|
|
yield f"Error following logs: {e}"
|
|
|
|
async def get_system_stats(self) -> Dict[str, Dict[str, str]]:
|
|
"""Get system resource usage statistics."""
|
|
stats = {}
|
|
|
|
# Get container stats
|
|
success, stdout, stderr = await self._run_runtime_command(["stats", "--no-stream", "--format", "json"])
|
|
|
|
if success and stdout.strip():
|
|
try:
|
|
for line in stdout.strip().split('\n'):
|
|
if line.strip():
|
|
data = json.loads(line)
|
|
name = data.get("Name", data.get("Container", ""))
|
|
if name:
|
|
stats[name] = {
|
|
"cpu": data.get("CPUPerc", "0%"),
|
|
"memory": data.get("MemUsage", "0B / 0B"),
|
|
"memory_percent": data.get("MemPerc", "0%"),
|
|
"network": data.get("NetIO", "0B / 0B"),
|
|
"disk": data.get("BlockIO", "0B / 0B"),
|
|
}
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
return stats
|
|
|
|
def check_podman_macos_memory(self) -> tuple[bool, str]:
|
|
"""Check if Podman VM has sufficient memory on macOS."""
|
|
if self.runtime_info.runtime_type != RuntimeType.PODMAN:
|
|
return True, "Not using Podman"
|
|
|
|
return self.platform_detector.check_podman_macos_memory()[:2] # Return is_sufficient, message
|