tui: docling pid and host detection

This commit is contained in:
phact 2025-10-06 14:46:51 -04:00
parent 14c3a8f3d1
commit c450ecc50a
3 changed files with 298 additions and 77 deletions

2
.gitignore vendored
View file

@ -21,3 +21,5 @@ wheels/
.DS_Store
config/
.docling.pid

View file

@ -31,10 +31,14 @@ class DoclingManager:
self._process: Optional[subprocess.Popen] = None
self._port = 5001
self._host = "127.0.0.1"
self._host = self._get_host_for_containers() # Get appropriate host IP based on runtime
self._running = False
self._external_process = False
# PID file to track docling-serve across sessions (in current working directory)
from pathlib import Path
self._pid_file = Path.cwd() / ".docling.pid"
# Log storage - simplified, no queue
self._log_buffer: List[str] = []
self._max_log_lines = 1000
@ -42,22 +46,198 @@ class DoclingManager:
self._initialized = True
def cleanup(self):
"""Cleanup resources and stop any running processes."""
if self._process and self._process.poll() is None:
self._add_log_entry("Cleaning up docling-serve process on exit")
try:
self._process.terminate()
self._process.wait(timeout=5)
except subprocess.TimeoutExpired:
self._process.kill()
self._process.wait()
except Exception as e:
self._add_log_entry(f"Error during cleanup: {e}")
# Try to recover existing process from PID file
self._recover_from_pid_file()
self._running = False
self._process = None
def _get_host_for_containers(self) -> str:
"""
Return a host IP that containers can reach (a bridge/CNI gateway).
Prefers Docker/Podman network gateways; falls back to bridge interfaces.
"""
import subprocess, json, shutil, re, logging
logger = logging.getLogger(__name__)
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
gateways = []
compose_gateways = [] # Highest priority - compose project networks
active_gateways = [] # Medium priority - networks with containers
# ---- Docker: enumerate networks and collect gateways
if shutil.which("docker"):
try:
ls = run(["docker", "network", "ls", "--format", "{{.Name}}"])
if ls.returncode == 0:
for name in filter(None, ls.stdout.splitlines()):
try:
insp = run(["docker", "network", "inspect", name, "--format", "{{json .}}"])
if insp.returncode == 0 and insp.stdout.strip():
nw = json.loads(insp.stdout)[0] if insp.stdout.strip().startswith("[") else json.loads(insp.stdout)
ipam = nw.get("IPAM", {})
containers = nw.get("Containers", {})
for cfg in ipam.get("Config", []) or []:
gw = cfg.get("Gateway")
if gw:
# Highest priority: compose networks (ending in _default)
if name.endswith("_default"):
compose_gateways.append(gw)
# Medium priority: networks with active containers
elif len(containers) > 0:
active_gateways.append(gw)
# Low priority: empty networks
else:
gateways.append(gw)
except Exception:
pass
except Exception:
pass
# ---- Podman: enumerate networks and collect gateways (netavark)
if shutil.which("podman"):
try:
# modern podman supports JSON format
ls = run(["podman", "network", "ls", "--format", "json"])
if ls.returncode == 0 and ls.stdout.strip():
for net in json.loads(ls.stdout):
name = net.get("name") or net.get("Name")
if not name:
continue
try:
insp = run(["podman", "network", "inspect", name, "--format", "json"])
if insp.returncode == 0 and insp.stdout.strip():
arr = json.loads(insp.stdout)
for item in (arr if isinstance(arr, list) else [arr]):
for sn in item.get("subnets", []) or []:
gw = sn.get("gateway")
if gw:
# Prioritize compose/project networks
if name.endswith("_default") or "_" in name:
compose_gateways.append(gw)
else:
gateways.append(gw)
except Exception:
pass
except Exception:
pass
# ---- Fallback: parse host interfaces for common bridges
if not gateways:
try:
if shutil.which("ip"):
show = run(["ip", "-o", "-4", "addr", "show"])
if show.returncode == 0:
for line in show.stdout.splitlines():
# e.g. "12: br-3f0f... inet 172.18.0.1/16 ..."
m = re.search(r"^\d+:\s+([a-zA-Z0-9_.:-]+)\s+.*\binet\s+(\d+\.\d+\.\d+\.\d+)/", line)
if not m:
continue
ifname, ip = m.group(1), m.group(2)
if ifname == "docker0" or ifname.startswith(("br-", "cni")):
gateways.append(ip)
else:
# As a last resort, try net-tools ifconfig output
if shutil.which("ifconfig"):
show = run(["ifconfig"])
for block in show.stdout.split("\n\n"):
if any(block.strip().startswith(n) for n in ("docker0", "cni", "br-")):
m = re.search(r"inet (?:addr:)?(\d+\.\d+\.\d+\.\d+)", block)
if m:
gateways.append(m.group(1))
except Exception:
pass
# Dedup, prioritizing: 1) compose networks, 2) active networks, 3) all others
seen, uniq = set(), []
# First: compose project networks (_default suffix)
for ip in compose_gateways:
if ip not in seen:
uniq.append(ip)
seen.add(ip)
# Second: networks with active containers
for ip in active_gateways:
if ip not in seen:
uniq.append(ip)
seen.add(ip)
# Third: all other gateways
for ip in gateways:
if ip not in seen:
uniq.append(ip)
seen.add(ip)
if uniq:
if len(uniq) > 1:
logger.info("Container-reachable host IP candidates: %s", ", ".join(uniq))
else:
logger.info("Container-reachable host IP: %s", uniq[0])
return uniq[0]
# Nothing found: warn clearly
logger.warning(
"No container bridge IP found. If using rootless Podman (slirp4netns), there is no host bridge; publish ports or use 10.0.2.2 from the container."
)
# Returning localhost is honest only for same-namespace; keep it explicit:
return "127.0.0.1"
def cleanup(self):
"""Cleanup resources but keep docling-serve running across sessions."""
# Don't stop the process on exit - let it persist
# Just clean up our references
self._add_log_entry("TUI exiting - docling-serve will continue running")
# Note: We keep the PID file so we can reconnect in future sessions
def _save_pid(self, pid: int) -> None:
"""Save the process PID to a file for persistence across sessions."""
try:
self._pid_file.write_text(str(pid))
self._add_log_entry(f"Saved PID {pid} to {self._pid_file}")
except Exception as e:
self._add_log_entry(f"Failed to save PID file: {e}")
def _load_pid(self) -> Optional[int]:
"""Load the process PID from file."""
try:
if self._pid_file.exists():
pid_str = self._pid_file.read_text().strip()
if pid_str.isdigit():
return int(pid_str)
except Exception as e:
self._add_log_entry(f"Failed to load PID file: {e}")
return None
def _clear_pid_file(self) -> None:
"""Remove the PID file."""
try:
if self._pid_file.exists():
self._pid_file.unlink()
self._add_log_entry("Cleared PID file")
except Exception as e:
self._add_log_entry(f"Failed to clear PID file: {e}")
def _is_process_running(self, pid: int) -> bool:
"""Check if a process with the given PID is running."""
try:
# Send signal 0 to check if process exists (doesn't actually send a signal)
os.kill(pid, 0)
return True
except OSError:
return False
def _recover_from_pid_file(self) -> None:
"""Try to recover connection to existing docling-serve process from PID file."""
pid = self._load_pid()
if pid is not None:
if self._is_process_running(pid):
self._add_log_entry(f"Recovered existing docling-serve process (PID: {pid})")
# Mark as external process since we didn't start it in this session
self._external_process = True
self._running = True
# Note: We don't have a Popen object for this process, but that's OK
# We'll detect it's running via port check
else:
self._add_log_entry(f"Stale PID file found (PID: {pid} not running)")
self._clear_pid_file()
def _add_log_entry(self, message: str) -> None:
"""Add a log entry to the buffer (thread-safe)."""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
@ -70,43 +250,35 @@ class DoclingManager:
self._log_buffer = self._log_buffer[-self._max_log_lines:]
def is_running(self) -> bool:
"""Check if docling serve is running."""
# First check our internal state
internal_running = self._running and self._process is not None and self._process.poll() is None
# If we think it's not running, check if something is listening on the port
# This handles cases where docling-serve was started outside the TUI
if not internal_running:
try:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(0.5)
result = s.connect_ex((self._host, self._port))
s.close()
# If port is in use, something is running there
if result == 0:
# Only log this once when we first detect external process
if not self._external_process:
self._add_log_entry(f"Detected external docling-serve running on {self._host}:{self._port}")
# Set a flag to indicate this is an external process
self._external_process = True
return True
except Exception as e:
# Only log errors occasionally to avoid spam
if not hasattr(self, '_last_port_error') or self._last_port_error != str(e):
self._add_log_entry(f"Error checking port: {e}")
self._last_port_error = str(e)
else:
# If we started it, it's not external
"""Check if docling serve is running (by PID only)."""
# Check if we have a direct process handle
if self._process is not None and self._process.poll() is None:
self._running = True
self._external_process = False
return True
return internal_running
# Check if we have a PID from file
pid = self._load_pid()
if pid is not None and self._is_process_running(pid):
self._running = True
self._external_process = True
return True
# No running process found
self._running = False
self._external_process = False
return False
def get_status(self) -> Dict[str, Any]:
"""Get current status of docling serve."""
if self.is_running():
pid = self._process.pid if self._process else None
# Try to get PID from process handle first, then from PID file
pid = None
if self._process:
pid = self._process.pid
else:
pid = self._load_pid()
return {
"status": "running",
"port": self._port,
@ -127,13 +299,28 @@ class DoclingManager:
"pid": None
}
async def start(self, port: int = 5001, host: str = "127.0.0.1", enable_ui: bool = False) -> Tuple[bool, str]:
async def start(self, port: int = 5001, host: Optional[str] = None, enable_ui: bool = False) -> Tuple[bool, str]:
"""Start docling serve as external process."""
if self.is_running():
return False, "Docling serve is already running"
self._port = port
self._host = host
# Use provided host or the bridge IP we detected in __init__
if host is not None:
self._host = host
# else: keep self._host as already set in __init__
# Check if port is already in use before trying to start
import socket
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(0.5)
result = s.connect_ex((self._host, self._port))
s.close()
if result == 0:
return False, f"Port {self._port} on {self._host} is already in use by another process. Please stop it first."
except Exception as e:
self._add_log_entry(f"Error checking port availability: {e}")
# Clear log buffer when starting
self._log_buffer = []
@ -146,14 +333,14 @@ class DoclingManager:
if shutil.which("uv") and (os.path.exists("pyproject.toml") or os.getenv("VIRTUAL_ENV")):
cmd = [
"uv", "run", "python", "-m", "docling_serve", "run",
"--host", host,
"--port", str(port),
"--host", self._host,
"--port", str(self._port),
]
else:
cmd = [
sys.executable, "-m", "docling_serve", "run",
"--host", host,
"--port", str(port),
"--host", self._host,
"--port", str(self._port),
]
if enable_ui:
@ -173,6 +360,9 @@ class DoclingManager:
self._running = True
self._add_log_entry("External process started")
# Save the PID to file for persistence
self._save_pid(self._process.pid)
# Start a thread to capture output
self._start_output_capture()
@ -192,11 +382,11 @@ class DoclingManager:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(0.5)
result = s.connect_ex((host, port))
result = s.connect_ex((self._host, self._port))
s.close()
if result == 0:
self._add_log_entry(f"Docling-serve is now listening on {host}:{port}")
self._add_log_entry(f"Docling-serve is now listening on {self._host}:{self._port}")
break
except:
pass
@ -298,9 +488,12 @@ class DoclingManager:
try:
self._add_log_entry("Stopping docling-serve process")
pid_to_stop = None
if self._process:
# We started this process, so we can stop it directly
self._add_log_entry(f"Terminating our process (PID: {self._process.pid})")
# We have a direct process handle
pid_to_stop = self._process.pid
self._add_log_entry(f"Terminating our process (PID: {pid_to_stop})")
self._process.terminate()
# Wait for it to stop
@ -315,16 +508,32 @@ class DoclingManager:
self._add_log_entry("Process force killed")
elif self._external_process:
# This is an external process, we can't stop it directly
self._add_log_entry("Cannot stop external docling-serve process - it was started outside the TUI")
self._running = False
self._external_process = False
return False, "Cannot stop external docling-serve process. Please stop it manually."
# This is a process we recovered from PID file
pid_to_stop = self._load_pid()
if pid_to_stop and self._is_process_running(pid_to_stop):
self._add_log_entry(f"Stopping process from PID file (PID: {pid_to_stop})")
try:
os.kill(pid_to_stop, 15) # SIGTERM
# Wait a bit for graceful shutdown
await asyncio.sleep(2)
if self._is_process_running(pid_to_stop):
# Still running, force kill
self._add_log_entry(f"Force killing process (PID: {pid_to_stop})")
os.kill(pid_to_stop, 9) # SIGKILL
except Exception as e:
self._add_log_entry(f"Error stopping external process: {e}")
return False, f"Error stopping external process: {str(e)}"
else:
self._add_log_entry("External process not found")
return False, "Process not found"
self._running = False
self._process = None
self._external_process = False
# Clear the PID file since we intentionally stopped the service
self._clear_pid_file()
self._add_log_entry("Docling serve stopped successfully")
return True, "Docling serve stopped successfully"

View file

@ -336,8 +336,31 @@ class WelcomeScreen(Screen):
self.call_after_refresh(self._focus_appropriate_button)
async def _start_all_services(self) -> None:
"""Start all services: native first, then containers."""
# Step 1: Start native services (docling-serve)
"""Start all services: containers first, then native services."""
# Step 1: Start container services first (to create the network)
if self.container_manager.is_available():
command_generator = self.container_manager.start_services()
modal = CommandOutputModal(
"Starting Container Services",
command_generator,
on_complete=self._on_containers_started_start_native,
)
self.app.push_screen(modal)
else:
self.notify("No container runtime available", severity="warning")
# Still try to start native services
await self._start_native_services_after_containers()
async def _on_containers_started_start_native(self) -> None:
"""Called after containers start successfully, now start native services."""
# Update container state
self._detect_services_sync()
# Now start native services (docling-serve can now detect the compose network)
await self._start_native_services_after_containers()
async def _start_native_services_after_containers(self) -> None:
"""Start native services after containers have been started."""
if not self.docling_manager.is_running():
self.notify("Starting native services...", severity="information")
success, message = await self.docling_manager.start()
@ -345,25 +368,12 @@ class WelcomeScreen(Screen):
self.notify(message, severity="information")
else:
self.notify(f"Failed to start native services: {message}", severity="error")
# Continue anyway - user might want containers even if native fails
else:
self.notify("Native services already running", severity="information")
# Update state
self.docling_running = self.docling_manager.is_running()
# Step 2: Start container services
if self.container_manager.is_available():
command_generator = self.container_manager.start_services()
modal = CommandOutputModal(
"Starting Container Services",
command_generator,
on_complete=self._on_services_operation_complete,
)
self.app.push_screen(modal)
else:
self.notify("No container runtime available", severity="warning")
await self._refresh_welcome_content()
await self._refresh_welcome_content()
async def _stop_all_services(self) -> None:
"""Stop all services: containers first, then native."""