From 3a6a05d0437a88084a58fe476c65930eabd923fc Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Fri, 14 Nov 2025 17:25:22 -0300 Subject: [PATCH 1/3] Fix: reduce docling and provider banner refresh interval, implemented Starting on docling TUI (#404) * Fixed refetch interval to be 3 seconds when Docling is unhealthy, fixed query to refetch on window focus * Changed time to refetch provider health * Added starting state to Docling on the TUI --- .../app/api/queries/useDoclingHealthQuery.ts | 9 +++- .../app/api/queries/useProviderHealthQuery.ts | 7 +++ src/tui/managers/docling_manager.py | 29 ++++++++++++ src/tui/screens/monitor.py | 44 ++++++++++++++++--- 4 files changed, 80 insertions(+), 9 deletions(-) diff --git a/frontend/src/app/api/queries/useDoclingHealthQuery.ts b/frontend/src/app/api/queries/useDoclingHealthQuery.ts index 88c0a39b..01441f4b 100644 --- a/frontend/src/app/api/queries/useDoclingHealthQuery.ts +++ b/frontend/src/app/api/queries/useDoclingHealthQuery.ts @@ -56,8 +56,13 @@ export const useDoclingHealthQuery = ( queryKey: ["docling-health"], queryFn: checkDoclingHealth, retry: 1, - refetchInterval: 30000, // Check every 30 seconds - staleTime: 25000, // Consider data stale after 25 seconds + refetchInterval: (query) => { + // If healthy, check every 30 seconds; otherwise check every 3 seconds + return query.state.data?.status === "healthy" ? 30000 : 3000; + }, + refetchOnWindowFocus: true, + refetchOnMount: true, + staleTime: 30000, // Consider data stale after 25 seconds ...options, }, queryClient, diff --git a/frontend/src/app/api/queries/useProviderHealthQuery.ts b/frontend/src/app/api/queries/useProviderHealthQuery.ts index d4038cfc..82ca2db2 100644 --- a/frontend/src/app/api/queries/useProviderHealthQuery.ts +++ b/frontend/src/app/api/queries/useProviderHealthQuery.ts @@ -92,6 +92,13 @@ export const useProviderHealthQuery = ( queryKey: ["provider", "health"], queryFn: checkProviderHealth, retry: false, // Don't retry health checks automatically + refetchInterval: (query) => { + // If healthy, check every 30 seconds; otherwise check every 3 seconds + return query.state.data?.status === "healthy" ? 30000 : 3000; + }, + refetchOnWindowFocus: true, + refetchOnMount: true, + staleTime: 30000, // Consider data stale after 25 seconds enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete ...options, }, diff --git a/src/tui/managers/docling_manager.py b/src/tui/managers/docling_manager.py index e58a5b1e..109cb7c1 100644 --- a/src/tui/managers/docling_manager.py +++ b/src/tui/managers/docling_manager.py @@ -34,6 +34,7 @@ class DoclingManager: # Bind to all interfaces by default (can be overridden with DOCLING_BIND_HOST env var) self._host = os.getenv('DOCLING_BIND_HOST', '0.0.0.0') self._running = False + self._starting = False self._external_process = False # PID file to track docling-serve across sessions (in current working directory) @@ -126,6 +127,7 @@ class DoclingManager: if self._process is not None and self._process.poll() is None: self._running = True self._external_process = False + self._starting = False # Clear starting flag if service is running return True # Check if we have a PID from file @@ -133,6 +135,7 @@ class DoclingManager: if pid is not None and self._is_process_running(pid): self._running = True self._external_process = True + self._starting = False # Clear starting flag if service is running return True # No running process found @@ -142,6 +145,19 @@ class DoclingManager: def get_status(self) -> Dict[str, Any]: """Get current status of docling serve.""" + # Check for starting state first + if self._starting: + display_host = "localhost" if self._host == "0.0.0.0" else self._host + return { + "status": "starting", + "port": self._port, + "host": self._host, + "endpoint": None, + "docs_url": None, + "ui_url": None, + "pid": None + } + if self.is_running(): # Try to get PID from process handle first, then from PID file pid = None @@ -196,6 +212,9 @@ class DoclingManager: except Exception as e: self._add_log_entry(f"Error checking port availability: {e}") + # Set starting flag to show "Starting" status in UI + self._starting = True + # Clear log buffer when starting self._log_buffer = [] self._add_log_entry("Starting docling serve as external process...") @@ -261,6 +280,8 @@ class DoclingManager: if result == 0: self._add_log_entry(f"Docling-serve is now listening on {self._host}:{self._port}") + # Service is now running, clear starting flag + self._starting = False break except: pass @@ -294,16 +315,24 @@ class DoclingManager: self._add_log_entry(f"Error reading final output: {e}") self._running = False + self._starting = False return False, f"Docling serve process exited immediately (code: {return_code})" + # If we get here and the process is still running but not listening yet, + # clear the starting flag anyway (it's running, just not ready) + if self._process.poll() is None: + self._starting = False + display_host = "localhost" if self._host == "0.0.0.0" else self._host return True, f"Docling serve starting on http://{display_host}:{port}" except FileNotFoundError: + self._starting = False return False, "docling-serve not available. Please install: uv add docling-serve" except Exception as e: self._running = False self._process = None + self._starting = False return False, f"Error starting docling serve: {str(e)}" def _start_output_capture(self): diff --git a/src/tui/screens/monitor.py b/src/tui/screens/monitor.py index 91df51f6..01c243c6 100644 --- a/src/tui/screens/monitor.py +++ b/src/tui/screens/monitor.py @@ -206,10 +206,21 @@ class MonitorScreen(Screen): # Add docling serve to its own table docling_status = self.docling_manager.get_status() - docling_running = docling_status["status"] == "running" - docling_status_text = "running" if docling_running else "stopped" - docling_style = "bold green" if docling_running else "bold red" - docling_port = f"{docling_status['host']}:{docling_status['port']}" if docling_running else "N/A" + docling_status_value = docling_status["status"] + docling_running = docling_status_value == "running" + docling_starting = docling_status_value == "starting" + + if docling_running: + docling_status_text = "running" + docling_style = "bold green" + elif docling_starting: + docling_status_text = "starting" + docling_style = "bold yellow" + else: + docling_status_text = "stopped" + docling_style = "bold red" + + docling_port = f"{docling_status['host']}:{docling_status['port']}" if (docling_running or docling_starting) else "N/A" docling_pid = str(docling_status.get("pid")) if docling_status.get("pid") else "N/A" if self.docling_table: @@ -375,15 +386,25 @@ class MonitorScreen(Screen): """Start docling serve.""" self.operation_in_progress = True try: - success, message = await self.docling_manager.start() + # Start the service (this sets _starting = True internally at the start) + # Create task and let it begin executing (which sets the flag) + start_task = asyncio.create_task(self.docling_manager.start()) + # Give it a tiny moment to set the _starting flag + await asyncio.sleep(0.1) + # Refresh immediately to show "Starting" status + await self._refresh_services() + # Now wait for start to complete + success, message = await start_task if success: self.notify(message, severity="information") else: self.notify(f"Failed to start docling serve: {message}", severity="error") - # Refresh the services table to show updated status + # Refresh again to show final status (running or stopped) await self._refresh_services() except Exception as e: self.notify(f"Error starting docling serve: {str(e)}", severity="error") + # Refresh on error to clear starting status + await self._refresh_services() finally: self.operation_in_progress = False @@ -646,7 +667,11 @@ class MonitorScreen(Screen): suffix = f"-{random.randint(10000, 99999)}" # Add docling serve controls - docling_running = self.docling_manager.is_running() + docling_status = self.docling_manager.get_status() + docling_status_value = docling_status["status"] + docling_running = docling_status_value == "running" + docling_starting = docling_status_value == "starting" + if docling_running: docling_controls.mount( Button("Stop", variant="error", id=f"docling-stop-btn{suffix}") @@ -654,6 +679,11 @@ class MonitorScreen(Screen): docling_controls.mount( Button("Restart", variant="primary", id=f"docling-restart-btn{suffix}") ) + elif docling_starting: + # Show disabled button or no button when starting + start_btn = Button("Starting...", variant="warning", id=f"docling-start-btn{suffix}") + start_btn.disabled = True + docling_controls.mount(start_btn) else: docling_controls.mount( Button("Start", variant="success", id=f"docling-start-btn{suffix}") From 4b9d7599fce16a91a48cb7feda9b89d10f1b2f09 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Fri, 14 Nov 2025 17:42:47 -0300 Subject: [PATCH 2/3] fix: add Thinking state to agent response, fix ollama and watsonx overwriting values on the onboarding (#405) * Added thinking message to assistant message * fixed ibm and ollama overwriting values --- .../app/chat/components/assistant-message.tsx | 6 ++++-- frontend/src/app/globals.css | 18 ++++++++++++++++++ .../onboarding/components/ibm-onboarding.tsx | 2 +- .../components/ollama-onboarding.tsx | 2 +- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/frontend/src/app/chat/components/assistant-message.tsx b/frontend/src/app/chat/components/assistant-message.tsx index 9b109813..0f24dd8c 100644 --- a/frontend/src/app/chat/components/assistant-message.tsx +++ b/frontend/src/app/chat/components/assistant-message.tsx @@ -83,8 +83,10 @@ export function AssistantMessage({ )} chatMessage={ isStreaming - ? content + - ' ' + ? (content.trim() + ? content + + ' ' + : 'Thinking') : content } /> diff --git a/frontend/src/app/globals.css b/frontend/src/app/globals.css index 7ffab80e..4765ef8c 100644 --- a/frontend/src/app/globals.css +++ b/frontend/src/app/globals.css @@ -365,4 +365,22 @@ width: 100%; height: 30px; } + + .thinking-dots::after { + content: "."; + animation: thinking-dots 1.4s steps(3, end) infinite; + } + + @keyframes thinking-dots { + 0% { + content: "."; + } + 33.33% { + content: ".."; + } + 66.66%, + 100% { + content: "..."; + } + } } diff --git a/frontend/src/app/onboarding/components/ibm-onboarding.tsx b/frontend/src/app/onboarding/components/ibm-onboarding.tsx index d3540977..3d480248 100644 --- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx +++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx @@ -26,7 +26,7 @@ export function IBMOnboarding({ setIsLoadingModels?: (isLoading: boolean) => void; alreadyConfigured?: boolean; }) { - const [endpoint, setEndpoint] = useState("https://us-south.ml.cloud.ibm.com"); + const [endpoint, setEndpoint] = useState(alreadyConfigured ? "" : "https://us-south.ml.cloud.ibm.com"); const [apiKey, setApiKey] = useState(""); const [projectId, setProjectId] = useState(""); diff --git a/frontend/src/app/onboarding/components/ollama-onboarding.tsx b/frontend/src/app/onboarding/components/ollama-onboarding.tsx index e9d2fa1b..99c26d2a 100644 --- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx +++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx @@ -25,7 +25,7 @@ export function OllamaOnboarding({ isEmbedding?: boolean; alreadyConfigured?: boolean; }) { - const [endpoint, setEndpoint] = useState(`http://localhost:11434`); + const [endpoint, setEndpoint] = useState(alreadyConfigured ? undefined : `http://localhost:11434`); const [showConnecting, setShowConnecting] = useState(false); const debouncedEndpoint = useDebouncedValue(endpoint, 500); From c295431484005b385ff611d9e481bd4514bc4f96 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Fri, 14 Nov 2025 18:09:47 -0300 Subject: [PATCH 3/3] fix: refactor models validation to fix bugs related to ollama, watsonx and openai (#406) * Fixed models service to try api key with first available model * fixed ibm onboarding to not disable query when no data is available * make ibm query disabled when not configured * enable ollama query only when configured or endpoint present * enable get openai models query when already configured * just enable get from env when not configured * Simplify ollama models validation * fix max_tokens error on gpt 4o --- .../onboarding/components/ibm-onboarding.tsx | 10 +-- .../components/ollama-onboarding.tsx | 1 + .../components/openai-onboarding.tsx | 4 +- src/api/provider_validation.py | 16 +++- src/services/models_service.py | 80 +++++++++---------- 5 files changed, 60 insertions(+), 51 deletions(-) diff --git a/frontend/src/app/onboarding/components/ibm-onboarding.tsx b/frontend/src/app/onboarding/components/ibm-onboarding.tsx index 3d480248..3bb830b6 100644 --- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx +++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx @@ -73,13 +73,11 @@ export function IBMOnboarding({ error: modelsError, } = useGetIBMModelsQuery( { - endpoint: debouncedEndpoint, - apiKey: debouncedApiKey, - projectId: debouncedProjectId, - }, - { - enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId, + endpoint: debouncedEndpoint ? debouncedEndpoint : undefined, + apiKey: debouncedApiKey ? debouncedApiKey : undefined, + projectId: debouncedProjectId ? debouncedProjectId : undefined, }, + { enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured }, ); // Use custom hook for model selection logic diff --git a/frontend/src/app/onboarding/components/ollama-onboarding.tsx b/frontend/src/app/onboarding/components/ollama-onboarding.tsx index 99c26d2a..e85366ba 100644 --- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx +++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx @@ -36,6 +36,7 @@ export function OllamaOnboarding({ error: modelsError, } = useGetOllamaModelsQuery( debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined, + { enabled: !!debouncedEndpoint || alreadyConfigured }, ); // Use custom hook for model selection logic diff --git a/frontend/src/app/onboarding/components/openai-onboarding.tsx b/frontend/src/app/onboarding/components/openai-onboarding.tsx index d4fc73a4..47c427a9 100644 --- a/frontend/src/app/onboarding/components/openai-onboarding.tsx +++ b/frontend/src/app/onboarding/components/openai-onboarding.tsx @@ -34,7 +34,7 @@ export function OpenAIOnboarding({ alreadyConfigured?: boolean; }) { const [apiKey, setApiKey] = useState(""); - const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey); + const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured); const debouncedApiKey = useDebouncedValue(apiKey, 500); // Fetch models from API when API key is provided @@ -48,7 +48,7 @@ export function OpenAIOnboarding({ : debouncedApiKey ? { apiKey: debouncedApiKey } : undefined, - { enabled: debouncedApiKey !== "" || getFromEnv }, + { enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured }, ); // Use custom hook for model selection logic const { diff --git a/src/api/provider_validation.py b/src/api/provider_validation.py index e51cc3bc..2fcc1e65 100644 --- a/src/api/provider_validation.py +++ b/src/api/provider_validation.py @@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No } # Simple tool calling test - payload = { + base_payload = { "model": llm_model, "messages": [ {"role": "user", "content": "What tools do you have available?"} @@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No } } ], - "max_tokens": 50, } async with httpx.AsyncClient() as client: + # Try with max_tokens first + payload = {**base_payload, "max_tokens": 50} response = await client.post( "https://api.openai.com/v1/chat/completions", headers=headers, @@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No timeout=30.0, ) + # If max_tokens doesn't work, try with max_completion_tokens + if response.status_code != 200: + logger.info("max_tokens parameter failed, trying max_completion_tokens instead") + payload = {**base_payload, "max_completion_tokens": 50} + response = await client.post( + "https://api.openai.com/v1/chat/completions", + headers=headers, + json=payload, + timeout=30.0, + ) + if response.status_code != 200: logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}") raise Exception(f"OpenAI API error: {response.status_code}") diff --git a/src/services/models_service.py b/src/services/models_service.py index 28dee73a..f26d0594 100644 --- a/src/services/models_service.py +++ b/src/services/models_service.py @@ -1,6 +1,5 @@ import httpx from typing import Dict, List -from api.provider_validation import test_embedding from utils.container_utils import transform_localhost_url from utils.logging_config import get_logger @@ -229,20 +228,14 @@ class ModelsService: f"Model: {model_name}, Capabilities: {capabilities}" ) - # Check if model has required capabilities + # Check if model has embedding capability + has_embedding = "embedding" in capabilities + # Check if model has required capabilities for language models has_completion = DESIRED_CAPABILITY in capabilities has_tools = TOOL_CALLING_CAPABILITY in capabilities - # Check if it's an embedding model - try: - await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name) - is_embedding = True - except Exception as e: - logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}") - is_embedding = False - - if is_embedding: - # Embedding models only need completion capability + if has_embedding: + # Embedding models have embedding capability embedding_models.append( { "value": model_name, @@ -250,7 +243,7 @@ class ModelsService: "default": "nomic-embed-text" in model_name.lower(), } ) - elif not is_embedding and has_completion and has_tools: + if has_completion and has_tools: # Language models need both completion and tool calling language_models.append( { @@ -333,34 +326,6 @@ class ModelsService: if project_id: headers["Project-ID"] = project_id - # Validate credentials with a minimal completion request - async with httpx.AsyncClient() as client: - validation_url = f"{watson_endpoint}/ml/v1/text/generation" - validation_params = {"version": "2024-09-16"} - validation_payload = { - "input": "test", - "model_id": "ibm/granite-3-2b-instruct", - "project_id": project_id, - "parameters": { - "max_new_tokens": 1, - }, - } - - validation_response = await client.post( - validation_url, - headers=headers, - params=validation_params, - json=validation_payload, - timeout=10.0, - ) - - if validation_response.status_code != 200: - raise Exception( - f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}" - ) - - logger.info("IBM Watson credentials validated successfully") - # Fetch foundation models using the correct endpoint models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs" @@ -424,6 +389,39 @@ class ModelsService: } ) + # Validate credentials with the first available LLM model + if language_models: + first_llm_model = language_models[0]["value"] + + async with httpx.AsyncClient() as client: + validation_url = f"{watson_endpoint}/ml/v1/text/generation" + validation_params = {"version": "2024-09-16"} + validation_payload = { + "input": "test", + "model_id": first_llm_model, + "project_id": project_id, + "parameters": { + "max_new_tokens": 1, + }, + } + + validation_response = await client.post( + validation_url, + headers=headers, + params=validation_params, + json=validation_payload, + timeout=10.0, + ) + + if validation_response.status_code != 200: + raise Exception( + f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}" + ) + + logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}") + else: + logger.warning("No language models available to validate credentials") + if not language_models and not embedding_models: raise Exception("No IBM models retrieved from API")