Merge pull request #511 from langflow-ai/fix/docker-compose

fix: change docker compose gpu to be an override
2025-11-26 06:49:18 -05:00 · 2025-11-26 06:49:18 -05:00 · 59b88a7020
commit 59b88a7020
parent eeebc674e2 157cebff4e
8 changed files with 113 additions and 65 deletions
--- a/.github/workflows/test-integration.yml
+++ b/.github/workflows/test-integration.yml
@ -36,6 +36,7 @@ jobs:
        run: |
          docker system prune -af || true
          docker builder prune -af || true
+          docker-compose -f docker-compose.yml down -v --remove-orphans || true

      - run: df -h
      
--- a/docker-compose.gpu.yml
+++ b/docker-compose.gpu.yml
@ -0,0 +1,7 @@
+services:
+  openrag-backend:
+    environment:
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - NVIDIA_VISIBLE_DEVICES=all
+    gpus: all
+
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -72,8 +72,6 @@ services:
      - WATSONX_ENDPOINT=${WATSONX_ENDPOINT}
      - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID}
      - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT}
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-      - NVIDIA_VISIBLE_DEVICES=all
      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
      - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
@ -85,7 +83,6 @@ services:
      - ./documents:/app/documents:Z
      - ./keys:/app/keys:Z
      - ./flows:/app/flows:U,z
-    gpus: all

  openrag-frontend:
    image: langflowai/openrag-frontend:${OPENRAG_VERSION:-latest}
@ -127,10 +124,10 @@ services:
      - CONNECTOR_TYPE=system
      - CONNECTOR_TYPE_URL=url
      - OPENRAG-QUERY-FILTER="{}"
+      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - FILENAME=None
      - MIMETYPE=None
      - FILESIZE=0
-      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
      - LANGFLOW_LOG_LEVEL=DEBUG
      - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
--- a/src/tui/_assets/docker-compose-cpu.yml
+++ b/src/tui/_assets/docker-compose-cpu.yml
@ -1 +0,0 @@
-../../../docker-compose-cpu.yml
--- a/src/tui/_assets/docker-compose.gpu.yml
+++ b/src/tui/_assets/docker-compose.gpu.yml
@ -0,0 +1 @@
+../../../docker-compose.gpu.yml
--- a/src/tui/main.py
+++ b/src/tui/main.py
@ -485,7 +485,7 @@ def copy_compose_files(*, force: bool = False) -> None:
        logger.debug(f"Could not access compose assets: {e}")
        return

-    for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
+    for filename in ("docker-compose.yml", "docker-compose.gpu.yml"):
        destination = Path(filename)
        if destination.exists() and not force:
            continue
--- a/src/tui/managers/container_manager.py
+++ b/src/tui/managers/container_manager.py
@ -57,15 +57,15 @@ class ContainerManager:
        self.platform_detector = PlatformDetector()
        self.runtime_info = self.platform_detector.detect_runtime()
        self.compose_file = compose_file or self._find_compose_file("docker-compose.yml")
-        self.cpu_compose_file = self._find_compose_file("docker-compose-cpu.yml")
+        self.gpu_compose_file = self._find_compose_file("docker-compose.gpu.yml")
        self.services_cache: Dict[str, ServiceInfo] = {}
        self.last_status_update = 0
-        # Auto-select CPU compose if no GPU available
+        # Auto-select GPU override if GPU is available
        try:
            has_gpu, _ = detect_gpu_devices()
-            self.use_cpu_compose = not has_gpu
+            self.use_gpu_compose = has_gpu
        except Exception:
-            self.use_cpu_compose = True
+            self.use_gpu_compose = False

        # Expected services based on compose files
        self.expected_services = [
@ -234,9 +234,15 @@ class ContainerManager:
            return False, "", "No container runtime available"

        if cpu_mode is None:
-            cpu_mode = self.use_cpu_compose
-        compose_file = self.cpu_compose_file if cpu_mode else self.compose_file
-        cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args
+            use_gpu = self.use_gpu_compose
+        else:
+            use_gpu = not cpu_mode
+        
+        # Build compose command with override pattern
+        cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
+        if use_gpu and self.gpu_compose_file.exists():
+            cmd.extend(["-f", str(self.gpu_compose_file)])
+        cmd.extend(args)

        try:
            process = await asyncio.create_subprocess_exec(
@ -270,9 +276,15 @@ class ContainerManager:
            return

        if cpu_mode is None:
-            cpu_mode = self.use_cpu_compose
-        compose_file = self.cpu_compose_file if cpu_mode else self.compose_file
-        cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args
+            use_gpu = self.use_gpu_compose
+        else:
+            use_gpu = not cpu_mode
+        
+        # Build compose command with override pattern
+        cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
+        if use_gpu and self.gpu_compose_file.exists():
+            cmd.extend(["-f", str(self.gpu_compose_file)])
+        cmd.extend(args)

        try:
            process = await asyncio.create_subprocess_exec(
@ -333,9 +345,15 @@ class ContainerManager:
            return

        if cpu_mode is None:
-            cpu_mode = self.use_cpu_compose
-        compose_file = self.cpu_compose_file if cpu_mode else self.compose_file
-        cmd = self.runtime_info.compose_command + ["-f", str(compose_file)] + args
+            use_gpu = self.use_gpu_compose
+        else:
+            use_gpu = not cpu_mode
+        
+        # Build compose command with override pattern
+        cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
+        if use_gpu and self.gpu_compose_file.exists():
+            cmd.extend(["-f", str(self.gpu_compose_file)])
+        cmd.extend(args)

        try:
            process = await asyncio.create_subprocess_exec(
@ -642,44 +660,61 @@ class ContainerManager:
        """Get resolved image names from compose files using docker/podman compose, with robust fallbacks."""
        images: set[str] = set()

-        compose_files = [self.compose_file, self.cpu_compose_file]
-        for compose_file in compose_files:
+        # Try both GPU and CPU modes to get all images
+        for use_gpu in [True, False]:
            try:
-                if not compose_file or not compose_file.exists():
-                    continue
+                # Build compose command with override pattern
+                cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
+                if use_gpu and self.gpu_compose_file.exists():
+                    cmd.extend(["-f", str(self.gpu_compose_file)])
+                cmd.extend(["config", "--format", "json"])

-                cpu_mode = (compose_file == self.cpu_compose_file)
-
-                # Try JSON format first
-                success, stdout, _ = await self._run_compose_command(
-                    ["config", "--format", "json"],
-                    cpu_mode=cpu_mode
+                process = await asyncio.create_subprocess_exec(
+                    *cmd,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
+                    cwd=Path.cwd(),
                )
+                stdout, stderr = await process.communicate()
+                stdout_text = stdout.decode() if stdout else ""

-                if success and stdout.strip():
-                    from_cfg = self._extract_images_from_compose_config(stdout, tried_json=True)
+                if process.returncode == 0 and stdout_text.strip():
+                    from_cfg = self._extract_images_from_compose_config(stdout_text, tried_json=True)
                    if from_cfg:
                        images.update(from_cfg)
-                        continue  # this compose file succeeded; move to next file
+                        continue

                # Fallback to YAML output (for older compose versions)
-                success, stdout, _ = await self._run_compose_command(
-                    ["config"],
-                    cpu_mode=cpu_mode
-                )
+                cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
+                if use_gpu and self.gpu_compose_file.exists():
+                    cmd.extend(["-f", str(self.gpu_compose_file)])
+                cmd.append("config")

-                if success and stdout.strip():
-                    from_cfg = self._extract_images_from_compose_config(stdout, tried_json=False)
+                process = await asyncio.create_subprocess_exec(
+                    *cmd,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
+                    cwd=Path.cwd(),
+                )
+                stdout, stderr = await process.communicate()
+                stdout_text = stdout.decode() if stdout else ""
+
+                if process.returncode == 0 and stdout_text.strip():
+                    from_cfg = self._extract_images_from_compose_config(stdout_text, tried_json=False)
                    if from_cfg:
                        images.update(from_cfg)
                        continue

            except Exception:
-                # Keep behavior resilient—just continue to next file
+                # Keep behavior resilient—just continue to next mode
                continue

        # Fallback: manual parsing if compose config didn't work
        if not images:
+            compose_files = [self.compose_file]
+            if self.gpu_compose_file.exists():
+                compose_files.append(self.gpu_compose_file)
+            
            for compose in compose_files:
                try:
                    if not compose.exists():
@ -729,8 +764,11 @@ class ContainerManager:
            yield False, "No container runtime available"
            return

-        # Diagnostic info about compose files
-        compose_file = self.cpu_compose_file if (cpu_mode if cpu_mode is not None else self.use_cpu_compose) else self.compose_file
+        # Determine GPU mode
+        if cpu_mode is None:
+            use_gpu = self.use_gpu_compose
+        else:
+            use_gpu = not cpu_mode

        # Show the search process for debugging
        if hasattr(self, '_compose_search_log'):
@ -741,9 +779,12 @@ class ContainerManager:
        # Show runtime detection info
        runtime_cmd_str = " ".join(self.runtime_info.compose_command)
        yield False, f"Using compose command: {runtime_cmd_str}", False
-        yield False, f"Final compose file: {compose_file.absolute()}", False
-        if not compose_file.exists():
-            yield False, f"ERROR: Compose file not found at {compose_file.absolute()}", False
+        compose_files_str = str(self.compose_file.absolute())
+        if use_gpu and self.gpu_compose_file.exists():
+            compose_files_str += f" + {self.gpu_compose_file.absolute()}"
+        yield False, f"Compose files: {compose_files_str}", False
+        if not self.compose_file.exists():
+            yield False, f"ERROR: Base compose file not found at {self.compose_file.absolute()}", False
            return

        # Check for port conflicts before starting
@ -912,16 +953,11 @@ class ContainerManager:
            yield "No container runtime available"
            return

-        compose_file = (
-            self.cpu_compose_file if self.use_cpu_compose else self.compose_file
-        )
-        cmd = self.runtime_info.compose_command + [
-            "-f",
-            str(compose_file),
-            "logs",
-            "-f",
-            service_name,
-        ]
+        # Build compose command with override pattern
+        cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
+        if self.use_gpu_compose and self.gpu_compose_file.exists():
+            cmd.extend(["-f", str(self.gpu_compose_file)])
+        cmd.extend(["logs", "-f", service_name])

        try:
            process = await asyncio.create_subprocess_exec(
--- a/src/tui/screens/monitor.py
+++ b/src/tui/screens/monitor.py
@ -33,13 +33,14 @@ class MonitorScreen(Screen):
        ("u", "upgrade", "Upgrade"),
        ("x", "reset", "Reset"),
        ("l", "logs", "View Logs"),
+        ("g", "toggle_mode", "Toggle GPU/CPU"),
        ("j", "cursor_down", "Move Down"),
        ("k", "cursor_up", "Move Up"),
    ]

    def __init__(self):
        super().__init__()
-        self.container_manager = ContainerManager()
+        self._container_manager = None  # Use app's shared instance
        self.docling_manager = DoclingManager()
        self.services_table = None
        self.docling_table = None
@ -52,6 +53,13 @@ class MonitorScreen(Screen):
        # Track which table was last selected for mutual exclusion
        self._last_selected_table = None

+    @property
+    def container_manager(self) -> ContainerManager:
+        """Get the shared container manager from the app."""
+        if self._container_manager is None:
+            self._container_manager = self.app.container_manager
+        return self._container_manager
+
    def on_unmount(self) -> None:
        """Clean up when the screen is unmounted."""
        if hasattr(self, 'docling_manager'):
@ -69,10 +77,10 @@ class MonitorScreen(Screen):

    def _create_services_tab(self) -> ComposeResult:
        """Create the services monitoring tab."""
-        # Current mode indicator + toggle
+        # GPU/CPU mode section
+        yield Static("GPU Mode", id="mode-indicator", classes="tab-header")
        yield Horizontal(
-            Static("", id="mode-indicator"),
-            Button("Toggle Mode", id="toggle-mode-btn"),
+            Button("Switch to CPU Mode", id="toggle-mode-btn"),
            classes="button-row",
            id="mode-row",
        )
@ -623,22 +631,21 @@ class MonitorScreen(Screen):
    def _update_mode_row(self) -> None:
        """Update the mode indicator and toggle button label."""
        try:
-            use_cpu = getattr(self.container_manager, "use_cpu_compose", True)
+            use_gpu = getattr(self.container_manager, "use_gpu_compose", False)
            indicator = self.query_one("#mode-indicator", Static)
-            mode_text = "Mode: CPU (no GPU detected)" if use_cpu else "Mode: GPU"
-            indicator.update(mode_text)
+            indicator.update("GPU Mode" if use_gpu else "CPU Mode")
            toggle_btn = self.query_one("#toggle-mode-btn", Button)
-            toggle_btn.label = "Switch to GPU Mode" if use_cpu else "Switch to CPU Mode"
+            toggle_btn.label = "Switch to CPU Mode" if use_gpu else "Switch to GPU Mode"
        except Exception:
            pass

    def action_toggle_mode(self) -> None:
        """Toggle between CPU/GPU compose files and refresh view."""
        try:
-            current = getattr(self.container_manager, "use_cpu_compose", True)
-            self.container_manager.use_cpu_compose = not current
+            current = getattr(self.container_manager, "use_gpu_compose", False)
+            self.container_manager.use_gpu_compose = not current
            self.notify(
-                "Switched to GPU compose" if not current else "Switched to CPU compose",
+                "Switched to GPU mode" if not current else "Switched to CPU mode",
                severity="information",
            )
            self._update_mode_row()