diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 92c9a976..a70dd24d 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -36,6 +36,7 @@ jobs: run: | docker system prune -af || true docker builder prune -af || true + docker-compose -f docker-compose.yml down -v --remove-orphans || true - run: df -h diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml new file mode 100644 index 00000000..4496c4ac --- /dev/null +++ b/docker-compose.gpu.yml @@ -0,0 +1,7 @@ +services: + openrag-backend: + environment: + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - NVIDIA_VISIBLE_DEVICES=all + gpus: all + diff --git a/docker-compose.yml b/docker-compose.yml index fbba580f..a74d3c12 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -72,8 +72,6 @@ services: - WATSONX_ENDPOINT=${WATSONX_ENDPOINT} - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID} - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT} - - NVIDIA_DRIVER_CAPABILITIES=compute,utility - - NVIDIA_VISIBLE_DEVICES=all - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID} - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET} - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID} @@ -85,7 +83,6 @@ services: - ./documents:/app/documents:Z - ./keys:/app/keys:Z - ./flows:/app/flows:U,z - gpus: all openrag-frontend: image: langflowai/openrag-frontend:${OPENRAG_VERSION:-latest} @@ -127,10 +124,10 @@ services: - CONNECTOR_TYPE=system - CONNECTOR_TYPE_URL=url - OPENRAG-QUERY-FILTER="{}" + - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD} - FILENAME=None - MIMETYPE=None - FILESIZE=0 - - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD} - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE - LANGFLOW_LOG_LEVEL=DEBUG - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN} diff --git a/src/tui/_assets/docker-compose-cpu.yml b/src/tui/_assets/docker-compose-cpu.yml deleted file mode 120000 index 5ad7a663..00000000 --- a/src/tui/_assets/docker-compose-cpu.yml +++ /dev/null @@ -1 +0,0 @@ -../../../docker-compose-cpu.yml \ No newline at end of file diff --git a/src/tui/_assets/docker-compose.gpu.yml b/src/tui/_assets/docker-compose.gpu.yml new file mode 120000 index 00000000..bfebbedd --- /dev/null +++ b/src/tui/_assets/docker-compose.gpu.yml @@ -0,0 +1 @@ +../../../docker-compose.gpu.yml \ No newline at end of file diff --git a/src/tui/main.py b/src/tui/main.py index 19468473..d27db184 100644 --- a/src/tui/main.py +++ b/src/tui/main.py @@ -485,7 +485,7 @@ def copy_compose_files(*, force: bool = False) -> None: logger.debug(f"Could not access compose assets: {e}") return - for filename in ("docker-compose.yml", "docker-compose-cpu.yml"): + for filename in ("docker-compose.yml", "docker-compose.gpu.yml"): destination = Path(filename) if destination.exists() and not force: continue diff --git a/src/tui/managers/container_manager.py b/src/tui/managers/container_manager.py index 7e75f4a0..2da92322 100644 --- a/src/tui/managers/container_manager.py +++ b/src/tui/managers/container_manager.py @@ -57,15 +57,15 @@ class ContainerManager: self.platform_detector = PlatformDetector() self.runtime_info = self.platform_detector.detect_runtime() self.compose_file = compose_file or self._find_compose_file("docker-compose.yml") - self.cpu_compose_file = self._find_compose_file("docker-compose-cpu.yml") + self.gpu_compose_file = self._find_compose_file("docker-compose.gpu.yml") self.services_cache: Dict[str, ServiceInfo] = {} self.last_status_update = 0 - # Auto-select CPU compose if no GPU available + # Auto-select GPU override if GPU is available try: has_gpu, _ = detect_gpu_devices() - self.use_cpu_compose = not has_gpu + self.use_gpu_compose = has_gpu except Exception: - self.use_cpu_compose = True + self.use_gpu_compose = False # Expected services based on compose files self.expected_services = [ @@ -234,9 +234,15 @@ class ContainerManager: return False, "", "No container runtime available" if cpu_mode is None: - cpu_mode = self.use_cpu_compose - compose_file = self.cpu_compose_file if cpu_mode else self.compose_file - cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args + use_gpu = self.use_gpu_compose + else: + use_gpu = not cpu_mode + + # Build compose command with override pattern + cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)] + if use_gpu and self.gpu_compose_file.exists(): + cmd.extend(["-f", str(self.gpu_compose_file)]) + cmd.extend(args) try: process = await asyncio.create_subprocess_exec( @@ -270,9 +276,15 @@ class ContainerManager: return if cpu_mode is None: - cpu_mode = self.use_cpu_compose - compose_file = self.cpu_compose_file if cpu_mode else self.compose_file - cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args + use_gpu = self.use_gpu_compose + else: + use_gpu = not cpu_mode + + # Build compose command with override pattern + cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)] + if use_gpu and self.gpu_compose_file.exists(): + cmd.extend(["-f", str(self.gpu_compose_file)]) + cmd.extend(args) try: process = await asyncio.create_subprocess_exec( @@ -333,9 +345,15 @@ class ContainerManager: return if cpu_mode is None: - cpu_mode = self.use_cpu_compose - compose_file = self.cpu_compose_file if cpu_mode else self.compose_file - cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args + use_gpu = self.use_gpu_compose + else: + use_gpu = not cpu_mode + + # Build compose command with override pattern + cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)] + if use_gpu and self.gpu_compose_file.exists(): + cmd.extend(["-f", str(self.gpu_compose_file)]) + cmd.extend(args) try: process = await asyncio.create_subprocess_exec( @@ -642,44 +660,61 @@ class ContainerManager: """Get resolved image names from compose files using docker/podman compose, with robust fallbacks.""" images: set[str] = set() - compose_files = [self.compose_file, self.cpu_compose_file] - for compose_file in compose_files: + # Try both GPU and CPU modes to get all images + for use_gpu in [True, False]: try: - if not compose_file or not compose_file.exists(): - continue + # Build compose command with override pattern + cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)] + if use_gpu and self.gpu_compose_file.exists(): + cmd.extend(["-f", str(self.gpu_compose_file)]) + cmd.extend(["config", "--format", "json"]) - cpu_mode = (compose_file == self.cpu_compose_file) - - # Try JSON format first - success, stdout, _ = await self._run_compose_command( - ["config", "--format", "json"], - cpu_mode=cpu_mode + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=Path.cwd(), ) + stdout, stderr = await process.communicate() + stdout_text = stdout.decode() if stdout else "" - if success and stdout.strip(): - from_cfg = self._extract_images_from_compose_config(stdout, tried_json=True) + if process.returncode == 0 and stdout_text.strip(): + from_cfg = self._extract_images_from_compose_config(stdout_text, tried_json=True) if from_cfg: images.update(from_cfg) - continue # this compose file succeeded; move to next file + continue # Fallback to YAML output (for older compose versions) - success, stdout, _ = await self._run_compose_command( - ["config"], - cpu_mode=cpu_mode - ) + cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)] + if use_gpu and self.gpu_compose_file.exists(): + cmd.extend(["-f", str(self.gpu_compose_file)]) + cmd.append("config") - if success and stdout.strip(): - from_cfg = self._extract_images_from_compose_config(stdout, tried_json=False) + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=Path.cwd(), + ) + stdout, stderr = await process.communicate() + stdout_text = stdout.decode() if stdout else "" + + if process.returncode == 0 and stdout_text.strip(): + from_cfg = self._extract_images_from_compose_config(stdout_text, tried_json=False) if from_cfg: images.update(from_cfg) continue except Exception: - # Keep behavior resilient—just continue to next file + # Keep behavior resilient—just continue to next mode continue # Fallback: manual parsing if compose config didn't work if not images: + compose_files = [self.compose_file] + if self.gpu_compose_file.exists(): + compose_files.append(self.gpu_compose_file) + for compose in compose_files: try: if not compose.exists(): @@ -729,8 +764,11 @@ class ContainerManager: yield False, "No container runtime available" return - # Diagnostic info about compose files - compose_file = self.cpu_compose_file if (cpu_mode if cpu_mode is not None else self.use_cpu_compose) else self.compose_file + # Determine GPU mode + if cpu_mode is None: + use_gpu = self.use_gpu_compose + else: + use_gpu = not cpu_mode # Show the search process for debugging if hasattr(self, '_compose_search_log'): @@ -741,9 +779,12 @@ class ContainerManager: # Show runtime detection info runtime_cmd_str = " ".join(self.runtime_info.compose_command) yield False, f"Using compose command: {runtime_cmd_str}", False - yield False, f"Final compose file: {compose_file.absolute()}", False - if not compose_file.exists(): - yield False, f"ERROR: Compose file not found at {compose_file.absolute()}", False + compose_files_str = str(self.compose_file.absolute()) + if use_gpu and self.gpu_compose_file.exists(): + compose_files_str += f" + {self.gpu_compose_file.absolute()}" + yield False, f"Compose files: {compose_files_str}", False + if not self.compose_file.exists(): + yield False, f"ERROR: Base compose file not found at {self.compose_file.absolute()}", False return # Check for port conflicts before starting @@ -912,16 +953,11 @@ class ContainerManager: yield "No container runtime available" return - compose_file = ( - self.cpu_compose_file if self.use_cpu_compose else self.compose_file - ) - cmd = self.runtime_info.compose_command + [ - "-f", - str(compose_file), - "logs", - "-f", - service_name, - ] + # Build compose command with override pattern + cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)] + if self.use_gpu_compose and self.gpu_compose_file.exists(): + cmd.extend(["-f", str(self.gpu_compose_file)]) + cmd.extend(["logs", "-f", service_name]) try: process = await asyncio.create_subprocess_exec( diff --git a/src/tui/screens/monitor.py b/src/tui/screens/monitor.py index d72ba619..cc6ea18c 100644 --- a/src/tui/screens/monitor.py +++ b/src/tui/screens/monitor.py @@ -33,13 +33,14 @@ class MonitorScreen(Screen): ("u", "upgrade", "Upgrade"), ("x", "reset", "Reset"), ("l", "logs", "View Logs"), + ("g", "toggle_mode", "Toggle GPU/CPU"), ("j", "cursor_down", "Move Down"), ("k", "cursor_up", "Move Up"), ] def __init__(self): super().__init__() - self.container_manager = ContainerManager() + self._container_manager = None # Use app's shared instance self.docling_manager = DoclingManager() self.services_table = None self.docling_table = None @@ -52,6 +53,13 @@ class MonitorScreen(Screen): # Track which table was last selected for mutual exclusion self._last_selected_table = None + @property + def container_manager(self) -> ContainerManager: + """Get the shared container manager from the app.""" + if self._container_manager is None: + self._container_manager = self.app.container_manager + return self._container_manager + def on_unmount(self) -> None: """Clean up when the screen is unmounted.""" if hasattr(self, 'docling_manager'): @@ -69,10 +77,10 @@ class MonitorScreen(Screen): def _create_services_tab(self) -> ComposeResult: """Create the services monitoring tab.""" - # Current mode indicator + toggle + # GPU/CPU mode section + yield Static("GPU Mode", id="mode-indicator", classes="tab-header") yield Horizontal( - Static("", id="mode-indicator"), - Button("Toggle Mode", id="toggle-mode-btn"), + Button("Switch to CPU Mode", id="toggle-mode-btn"), classes="button-row", id="mode-row", ) @@ -623,22 +631,21 @@ class MonitorScreen(Screen): def _update_mode_row(self) -> None: """Update the mode indicator and toggle button label.""" try: - use_cpu = getattr(self.container_manager, "use_cpu_compose", True) + use_gpu = getattr(self.container_manager, "use_gpu_compose", False) indicator = self.query_one("#mode-indicator", Static) - mode_text = "Mode: CPU (no GPU detected)" if use_cpu else "Mode: GPU" - indicator.update(mode_text) + indicator.update("GPU Mode" if use_gpu else "CPU Mode") toggle_btn = self.query_one("#toggle-mode-btn", Button) - toggle_btn.label = "Switch to GPU Mode" if use_cpu else "Switch to CPU Mode" + toggle_btn.label = "Switch to CPU Mode" if use_gpu else "Switch to GPU Mode" except Exception: pass def action_toggle_mode(self) -> None: """Toggle between CPU/GPU compose files and refresh view.""" try: - current = getattr(self.container_manager, "use_cpu_compose", True) - self.container_manager.use_cpu_compose = not current + current = getattr(self.container_manager, "use_gpu_compose", False) + self.container_manager.use_gpu_compose = not current self.notify( - "Switched to GPU compose" if not current else "Switched to CPU compose", + "Switched to GPU mode" if not current else "Switched to CPU mode", severity="information", ) self._update_mode_row()