From 3a6a05d0437a88084a58fe476c65930eabd923fc Mon Sep 17 00:00:00 2001
From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com>
Date: Fri, 14 Nov 2025 17:25:22 -0300
Subject: [PATCH 1/3] Fix: reduce docling and provider banner refresh interval,
implemented Starting on docling TUI (#404)
* Fixed refetch interval to be 3 seconds when Docling is unhealthy, fixed query to refetch on window focus
* Changed time to refetch provider health
* Added starting state to Docling on the TUI
---
.../app/api/queries/useDoclingHealthQuery.ts | 9 +++-
.../app/api/queries/useProviderHealthQuery.ts | 7 +++
src/tui/managers/docling_manager.py | 29 ++++++++++++
src/tui/screens/monitor.py | 44 ++++++++++++++++---
4 files changed, 80 insertions(+), 9 deletions(-)
diff --git a/frontend/src/app/api/queries/useDoclingHealthQuery.ts b/frontend/src/app/api/queries/useDoclingHealthQuery.ts
index 88c0a39b..01441f4b 100644
--- a/frontend/src/app/api/queries/useDoclingHealthQuery.ts
+++ b/frontend/src/app/api/queries/useDoclingHealthQuery.ts
@@ -56,8 +56,13 @@ export const useDoclingHealthQuery = (
queryKey: ["docling-health"],
queryFn: checkDoclingHealth,
retry: 1,
- refetchInterval: 30000, // Check every 30 seconds
- staleTime: 25000, // Consider data stale after 25 seconds
+ refetchInterval: (query) => {
+ // If healthy, check every 30 seconds; otherwise check every 3 seconds
+ return query.state.data?.status === "healthy" ? 30000 : 3000;
+ },
+ refetchOnWindowFocus: true,
+ refetchOnMount: true,
+ staleTime: 30000, // Consider data stale after 25 seconds
...options,
},
queryClient,
diff --git a/frontend/src/app/api/queries/useProviderHealthQuery.ts b/frontend/src/app/api/queries/useProviderHealthQuery.ts
index d4038cfc..82ca2db2 100644
--- a/frontend/src/app/api/queries/useProviderHealthQuery.ts
+++ b/frontend/src/app/api/queries/useProviderHealthQuery.ts
@@ -92,6 +92,13 @@ export const useProviderHealthQuery = (
queryKey: ["provider", "health"],
queryFn: checkProviderHealth,
retry: false, // Don't retry health checks automatically
+ refetchInterval: (query) => {
+ // If healthy, check every 30 seconds; otherwise check every 3 seconds
+ return query.state.data?.status === "healthy" ? 30000 : 3000;
+ },
+ refetchOnWindowFocus: true,
+ refetchOnMount: true,
+ staleTime: 30000, // Consider data stale after 25 seconds
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
...options,
},
diff --git a/src/tui/managers/docling_manager.py b/src/tui/managers/docling_manager.py
index e58a5b1e..109cb7c1 100644
--- a/src/tui/managers/docling_manager.py
+++ b/src/tui/managers/docling_manager.py
@@ -34,6 +34,7 @@ class DoclingManager:
# Bind to all interfaces by default (can be overridden with DOCLING_BIND_HOST env var)
self._host = os.getenv('DOCLING_BIND_HOST', '0.0.0.0')
self._running = False
+ self._starting = False
self._external_process = False
# PID file to track docling-serve across sessions (in current working directory)
@@ -126,6 +127,7 @@ class DoclingManager:
if self._process is not None and self._process.poll() is None:
self._running = True
self._external_process = False
+ self._starting = False # Clear starting flag if service is running
return True
# Check if we have a PID from file
@@ -133,6 +135,7 @@ class DoclingManager:
if pid is not None and self._is_process_running(pid):
self._running = True
self._external_process = True
+ self._starting = False # Clear starting flag if service is running
return True
# No running process found
@@ -142,6 +145,19 @@ class DoclingManager:
def get_status(self) -> Dict[str, Any]:
"""Get current status of docling serve."""
+ # Check for starting state first
+ if self._starting:
+ display_host = "localhost" if self._host == "0.0.0.0" else self._host
+ return {
+ "status": "starting",
+ "port": self._port,
+ "host": self._host,
+ "endpoint": None,
+ "docs_url": None,
+ "ui_url": None,
+ "pid": None
+ }
+
if self.is_running():
# Try to get PID from process handle first, then from PID file
pid = None
@@ -196,6 +212,9 @@ class DoclingManager:
except Exception as e:
self._add_log_entry(f"Error checking port availability: {e}")
+ # Set starting flag to show "Starting" status in UI
+ self._starting = True
+
# Clear log buffer when starting
self._log_buffer = []
self._add_log_entry("Starting docling serve as external process...")
@@ -261,6 +280,8 @@ class DoclingManager:
if result == 0:
self._add_log_entry(f"Docling-serve is now listening on {self._host}:{self._port}")
+ # Service is now running, clear starting flag
+ self._starting = False
break
except:
pass
@@ -294,16 +315,24 @@ class DoclingManager:
self._add_log_entry(f"Error reading final output: {e}")
self._running = False
+ self._starting = False
return False, f"Docling serve process exited immediately (code: {return_code})"
+ # If we get here and the process is still running but not listening yet,
+ # clear the starting flag anyway (it's running, just not ready)
+ if self._process.poll() is None:
+ self._starting = False
+
display_host = "localhost" if self._host == "0.0.0.0" else self._host
return True, f"Docling serve starting on http://{display_host}:{port}"
except FileNotFoundError:
+ self._starting = False
return False, "docling-serve not available. Please install: uv add docling-serve"
except Exception as e:
self._running = False
self._process = None
+ self._starting = False
return False, f"Error starting docling serve: {str(e)}"
def _start_output_capture(self):
diff --git a/src/tui/screens/monitor.py b/src/tui/screens/monitor.py
index 91df51f6..01c243c6 100644
--- a/src/tui/screens/monitor.py
+++ b/src/tui/screens/monitor.py
@@ -206,10 +206,21 @@ class MonitorScreen(Screen):
# Add docling serve to its own table
docling_status = self.docling_manager.get_status()
- docling_running = docling_status["status"] == "running"
- docling_status_text = "running" if docling_running else "stopped"
- docling_style = "bold green" if docling_running else "bold red"
- docling_port = f"{docling_status['host']}:{docling_status['port']}" if docling_running else "N/A"
+ docling_status_value = docling_status["status"]
+ docling_running = docling_status_value == "running"
+ docling_starting = docling_status_value == "starting"
+
+ if docling_running:
+ docling_status_text = "running"
+ docling_style = "bold green"
+ elif docling_starting:
+ docling_status_text = "starting"
+ docling_style = "bold yellow"
+ else:
+ docling_status_text = "stopped"
+ docling_style = "bold red"
+
+ docling_port = f"{docling_status['host']}:{docling_status['port']}" if (docling_running or docling_starting) else "N/A"
docling_pid = str(docling_status.get("pid")) if docling_status.get("pid") else "N/A"
if self.docling_table:
@@ -375,15 +386,25 @@ class MonitorScreen(Screen):
"""Start docling serve."""
self.operation_in_progress = True
try:
- success, message = await self.docling_manager.start()
+ # Start the service (this sets _starting = True internally at the start)
+ # Create task and let it begin executing (which sets the flag)
+ start_task = asyncio.create_task(self.docling_manager.start())
+ # Give it a tiny moment to set the _starting flag
+ await asyncio.sleep(0.1)
+ # Refresh immediately to show "Starting" status
+ await self._refresh_services()
+ # Now wait for start to complete
+ success, message = await start_task
if success:
self.notify(message, severity="information")
else:
self.notify(f"Failed to start docling serve: {message}", severity="error")
- # Refresh the services table to show updated status
+ # Refresh again to show final status (running or stopped)
await self._refresh_services()
except Exception as e:
self.notify(f"Error starting docling serve: {str(e)}", severity="error")
+ # Refresh on error to clear starting status
+ await self._refresh_services()
finally:
self.operation_in_progress = False
@@ -646,7 +667,11 @@ class MonitorScreen(Screen):
suffix = f"-{random.randint(10000, 99999)}"
# Add docling serve controls
- docling_running = self.docling_manager.is_running()
+ docling_status = self.docling_manager.get_status()
+ docling_status_value = docling_status["status"]
+ docling_running = docling_status_value == "running"
+ docling_starting = docling_status_value == "starting"
+
if docling_running:
docling_controls.mount(
Button("Stop", variant="error", id=f"docling-stop-btn{suffix}")
@@ -654,6 +679,11 @@ class MonitorScreen(Screen):
docling_controls.mount(
Button("Restart", variant="primary", id=f"docling-restart-btn{suffix}")
)
+ elif docling_starting:
+ # Show disabled button or no button when starting
+ start_btn = Button("Starting...", variant="warning", id=f"docling-start-btn{suffix}")
+ start_btn.disabled = True
+ docling_controls.mount(start_btn)
else:
docling_controls.mount(
Button("Start", variant="success", id=f"docling-start-btn{suffix}")
From 4b9d7599fce16a91a48cb7feda9b89d10f1b2f09 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com>
Date: Fri, 14 Nov 2025 17:42:47 -0300
Subject: [PATCH 2/3] fix: add Thinking state to agent response, fix ollama and
watsonx overwriting values on the onboarding (#405)
* Added thinking message to assistant message
* fixed ibm and ollama overwriting values
---
.../app/chat/components/assistant-message.tsx | 6 ++++--
frontend/src/app/globals.css | 18 ++++++++++++++++++
.../onboarding/components/ibm-onboarding.tsx | 2 +-
.../components/ollama-onboarding.tsx | 2 +-
4 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/frontend/src/app/chat/components/assistant-message.tsx b/frontend/src/app/chat/components/assistant-message.tsx
index 9b109813..0f24dd8c 100644
--- a/frontend/src/app/chat/components/assistant-message.tsx
+++ b/frontend/src/app/chat/components/assistant-message.tsx
@@ -83,8 +83,10 @@ export function AssistantMessage({
)}
chatMessage={
isStreaming
- ? content +
- ' '
+ ? (content.trim()
+ ? content +
+ ' '
+ : 'Thinking')
: content
}
/>
diff --git a/frontend/src/app/globals.css b/frontend/src/app/globals.css
index 7ffab80e..4765ef8c 100644
--- a/frontend/src/app/globals.css
+++ b/frontend/src/app/globals.css
@@ -365,4 +365,22 @@
width: 100%;
height: 30px;
}
+
+ .thinking-dots::after {
+ content: ".";
+ animation: thinking-dots 1.4s steps(3, end) infinite;
+ }
+
+ @keyframes thinking-dots {
+ 0% {
+ content: ".";
+ }
+ 33.33% {
+ content: "..";
+ }
+ 66.66%,
+ 100% {
+ content: "...";
+ }
+ }
}
diff --git a/frontend/src/app/onboarding/components/ibm-onboarding.tsx b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
index d3540977..3d480248 100644
--- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
@@ -26,7 +26,7 @@ export function IBMOnboarding({
setIsLoadingModels?: (isLoading: boolean) => void;
alreadyConfigured?: boolean;
}) {
- const [endpoint, setEndpoint] = useState("https://us-south.ml.cloud.ibm.com");
+ const [endpoint, setEndpoint] = useState(alreadyConfigured ? "" : "https://us-south.ml.cloud.ibm.com");
const [apiKey, setApiKey] = useState("");
const [projectId, setProjectId] = useState("");
diff --git a/frontend/src/app/onboarding/components/ollama-onboarding.tsx b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
index e9d2fa1b..99c26d2a 100644
--- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
@@ -25,7 +25,7 @@ export function OllamaOnboarding({
isEmbedding?: boolean;
alreadyConfigured?: boolean;
}) {
- const [endpoint, setEndpoint] = useState(`http://localhost:11434`);
+ const [endpoint, setEndpoint] = useState(alreadyConfigured ? undefined : `http://localhost:11434`);
const [showConnecting, setShowConnecting] = useState(false);
const debouncedEndpoint = useDebouncedValue(endpoint, 500);
From c295431484005b385ff611d9e481bd4514bc4f96 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com>
Date: Fri, 14 Nov 2025 18:09:47 -0300
Subject: [PATCH 3/3] fix: refactor models validation to fix bugs related to
ollama, watsonx and openai (#406)
* Fixed models service to try api key with first available model
* fixed ibm onboarding to not disable query when no data is available
* make ibm query disabled when not configured
* enable ollama query only when configured or endpoint present
* enable get openai models query when already configured
* just enable get from env when not configured
* Simplify ollama models validation
* fix max_tokens error on gpt 4o
---
.../onboarding/components/ibm-onboarding.tsx | 10 +--
.../components/ollama-onboarding.tsx | 1 +
.../components/openai-onboarding.tsx | 4 +-
src/api/provider_validation.py | 16 +++-
src/services/models_service.py | 80 +++++++++----------
5 files changed, 60 insertions(+), 51 deletions(-)
diff --git a/frontend/src/app/onboarding/components/ibm-onboarding.tsx b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
index 3d480248..3bb830b6 100644
--- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
@@ -73,13 +73,11 @@ export function IBMOnboarding({
error: modelsError,
} = useGetIBMModelsQuery(
{
- endpoint: debouncedEndpoint,
- apiKey: debouncedApiKey,
- projectId: debouncedProjectId,
- },
- {
- enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId,
+ endpoint: debouncedEndpoint ? debouncedEndpoint : undefined,
+ apiKey: debouncedApiKey ? debouncedApiKey : undefined,
+ projectId: debouncedProjectId ? debouncedProjectId : undefined,
},
+ { enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured },
);
// Use custom hook for model selection logic
diff --git a/frontend/src/app/onboarding/components/ollama-onboarding.tsx b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
index 99c26d2a..e85366ba 100644
--- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
@@ -36,6 +36,7 @@ export function OllamaOnboarding({
error: modelsError,
} = useGetOllamaModelsQuery(
debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined,
+ { enabled: !!debouncedEndpoint || alreadyConfigured },
);
// Use custom hook for model selection logic
diff --git a/frontend/src/app/onboarding/components/openai-onboarding.tsx b/frontend/src/app/onboarding/components/openai-onboarding.tsx
index d4fc73a4..47c427a9 100644
--- a/frontend/src/app/onboarding/components/openai-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/openai-onboarding.tsx
@@ -34,7 +34,7 @@ export function OpenAIOnboarding({
alreadyConfigured?: boolean;
}) {
const [apiKey, setApiKey] = useState("");
- const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey);
+ const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured);
const debouncedApiKey = useDebouncedValue(apiKey, 500);
// Fetch models from API when API key is provided
@@ -48,7 +48,7 @@ export function OpenAIOnboarding({
: debouncedApiKey
? { apiKey: debouncedApiKey }
: undefined,
- { enabled: debouncedApiKey !== "" || getFromEnv },
+ { enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured },
);
// Use custom hook for model selection logic
const {
diff --git a/src/api/provider_validation.py b/src/api/provider_validation.py
index e51cc3bc..2fcc1e65 100644
--- a/src/api/provider_validation.py
+++ b/src/api/provider_validation.py
@@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
}
# Simple tool calling test
- payload = {
+ base_payload = {
"model": llm_model,
"messages": [
{"role": "user", "content": "What tools do you have available?"}
@@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
}
}
],
- "max_tokens": 50,
}
async with httpx.AsyncClient() as client:
+ # Try with max_tokens first
+ payload = {**base_payload, "max_tokens": 50}
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
@@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
timeout=30.0,
)
+ # If max_tokens doesn't work, try with max_completion_tokens
+ if response.status_code != 200:
+ logger.info("max_tokens parameter failed, trying max_completion_tokens instead")
+ payload = {**base_payload, "max_completion_tokens": 50}
+ response = await client.post(
+ "https://api.openai.com/v1/chat/completions",
+ headers=headers,
+ json=payload,
+ timeout=30.0,
+ )
+
if response.status_code != 200:
logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}")
raise Exception(f"OpenAI API error: {response.status_code}")
diff --git a/src/services/models_service.py b/src/services/models_service.py
index 28dee73a..f26d0594 100644
--- a/src/services/models_service.py
+++ b/src/services/models_service.py
@@ -1,6 +1,5 @@
import httpx
from typing import Dict, List
-from api.provider_validation import test_embedding
from utils.container_utils import transform_localhost_url
from utils.logging_config import get_logger
@@ -229,20 +228,14 @@ class ModelsService:
f"Model: {model_name}, Capabilities: {capabilities}"
)
- # Check if model has required capabilities
+ # Check if model has embedding capability
+ has_embedding = "embedding" in capabilities
+ # Check if model has required capabilities for language models
has_completion = DESIRED_CAPABILITY in capabilities
has_tools = TOOL_CALLING_CAPABILITY in capabilities
- # Check if it's an embedding model
- try:
- await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name)
- is_embedding = True
- except Exception as e:
- logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}")
- is_embedding = False
-
- if is_embedding:
- # Embedding models only need completion capability
+ if has_embedding:
+ # Embedding models have embedding capability
embedding_models.append(
{
"value": model_name,
@@ -250,7 +243,7 @@ class ModelsService:
"default": "nomic-embed-text" in model_name.lower(),
}
)
- elif not is_embedding and has_completion and has_tools:
+ if has_completion and has_tools:
# Language models need both completion and tool calling
language_models.append(
{
@@ -333,34 +326,6 @@ class ModelsService:
if project_id:
headers["Project-ID"] = project_id
- # Validate credentials with a minimal completion request
- async with httpx.AsyncClient() as client:
- validation_url = f"{watson_endpoint}/ml/v1/text/generation"
- validation_params = {"version": "2024-09-16"}
- validation_payload = {
- "input": "test",
- "model_id": "ibm/granite-3-2b-instruct",
- "project_id": project_id,
- "parameters": {
- "max_new_tokens": 1,
- },
- }
-
- validation_response = await client.post(
- validation_url,
- headers=headers,
- params=validation_params,
- json=validation_payload,
- timeout=10.0,
- )
-
- if validation_response.status_code != 200:
- raise Exception(
- f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
- )
-
- logger.info("IBM Watson credentials validated successfully")
-
# Fetch foundation models using the correct endpoint
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
@@ -424,6 +389,39 @@ class ModelsService:
}
)
+ # Validate credentials with the first available LLM model
+ if language_models:
+ first_llm_model = language_models[0]["value"]
+
+ async with httpx.AsyncClient() as client:
+ validation_url = f"{watson_endpoint}/ml/v1/text/generation"
+ validation_params = {"version": "2024-09-16"}
+ validation_payload = {
+ "input": "test",
+ "model_id": first_llm_model,
+ "project_id": project_id,
+ "parameters": {
+ "max_new_tokens": 1,
+ },
+ }
+
+ validation_response = await client.post(
+ validation_url,
+ headers=headers,
+ params=validation_params,
+ json=validation_payload,
+ timeout=10.0,
+ )
+
+ if validation_response.status_code != 200:
+ raise Exception(
+ f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
+ )
+
+ logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}")
+ else:
+ logger.warning("No language models available to validate credentials")
+
if not language_models and not embedding_models:
raise Exception("No IBM models retrieved from API")