diff --git a/README.md b/README.md index a7abbbe6..38beff99 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,13 @@    OpenSearch    - Starlette + Langflow    - Next.js -    - Ask DeepWiki -OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration. +OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with [Starlette](https://github.com/Kludex/starlette) and [Next.js](https://github.com/vercel/next.js). Powered by [OpenSearch](https://github.com/opensearch-project/OpenSearch), [Langflow](https://github.com/langflow-ai/langflow), and [Docling](https://github.com/docling-project/docling). + +Ask DeepWiki
@@ -48,7 +47,7 @@ To launch OpenRAG with the TUI, do the following: The TUI opens and guides you through OpenRAG setup. -For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx). +For the full TUI guide, see [TUI](https://docs.openr.ag/get-started/tui). ## Docker Deployment @@ -114,7 +113,7 @@ To deploy OpenRAG with Docker: | OpenSearch | http://localhost:9200 | Vector database for document storage. | | OpenSearch Dashboards | http://localhost:5601 | Database administration interface. | -6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx). +6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](https://docs.openr.ag/quickstart). To stop `docling serve`, run: @@ -122,11 +121,11 @@ To deploy OpenRAG with Docker: uv run python scripts/docling_ctl.py stop ``` -For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx). +For more information, see [Deploy with Docker](https://docs.openr.ag/get-started/docker). ## Troubleshooting -For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx). +For common issues and fixes, see [Troubleshoot](https://docs.openr.ag/support/troubleshoot). ## Development diff --git a/frontend/components/docling-health-banner.tsx b/frontend/components/docling-health-banner.tsx index afa79deb..673d952d 100644 --- a/frontend/components/docling-health-banner.tsx +++ b/frontend/components/docling-health-banner.tsx @@ -86,13 +86,26 @@ function DoclingSetupDialog({ ); } -export function DoclingHealthBanner({ className }: DoclingHealthBannerProps) { +// Custom hook to check docling health status +export function useDoclingHealth() { const { data: health, isLoading, isError } = useDoclingHealthQuery(); - const [showDialog, setShowDialog] = useState(false); const isHealthy = health?.status === "healthy" && !isError; const isUnhealthy = health?.status === "unhealthy" || isError; + return { + health, + isLoading, + isError, + isHealthy, + isUnhealthy, + }; +} + +export function DoclingHealthBanner({ className }: DoclingHealthBannerProps) { + const { isLoading, isHealthy, isUnhealthy } = useDoclingHealth(); + const [showDialog, setShowDialog] = useState(false); + // Only show banner when service is unhealthy if (isLoading || isHealthy) { return null; diff --git a/frontend/src/app/onboarding/page.tsx b/frontend/src/app/onboarding/page.tsx index ca938eeb..094349c7 100644 --- a/frontend/src/app/onboarding/page.tsx +++ b/frontend/src/app/onboarding/page.tsx @@ -7,6 +7,7 @@ import { type OnboardingVariables, useOnboardingMutation, } from "@/app/api/mutations/useOnboardingMutation"; +import { DoclingHealthBanner, useDoclingHealth } from "@/components/docling-health-banner"; import IBMLogo from "@/components/logo/ibm-logo"; import OllamaLogo from "@/components/logo/ollama-logo"; import OpenAILogo from "@/components/logo/openai-logo"; @@ -34,6 +35,7 @@ import { OpenAIOnboarding } from "./components/openai-onboarding"; function OnboardingPage() { const { data: settingsDb, isLoading: isSettingsLoading } = useGetSettingsQuery(); + const { isHealthy: isDoclingHealthy } = useDoclingHealth(); const redirect = "/"; @@ -114,7 +116,7 @@ function OnboardingPage() { onboardingMutation.mutate(onboardingData); }; - const isComplete = !!settings.llm_model && !!settings.embedding_model; + const isComplete = !!settings.llm_model && !!settings.embedding_model && isDoclingHealthy; return (
@@ -130,6 +132,8 @@ function OnboardingPage() { )} /> + +

@@ -197,7 +201,9 @@ function OnboardingPage() { {!isComplete && ( - Please fill in all required fields + {!!settings.llm_model && !!settings.embedding_model && !isDoclingHealthy + ? "docling-serve must be running to continue" + : "Please fill in all required fields"} )} diff --git a/src/api/settings.py b/src/api/settings.py index 879988d5..17c57a60 100644 --- a/src/api/settings.py +++ b/src/api/settings.py @@ -536,7 +536,142 @@ async def onboarding(request, flows_service): {"error": "No valid fields provided for update"}, status_code=400 ) + # Initialize the OpenSearch index now that we have the embedding model configured + try: + # Import here to avoid circular imports + from main import init_index + + logger.info( + "Initializing OpenSearch index after onboarding configuration" + ) + await init_index() + logger.info("OpenSearch index initialization completed successfully") + except Exception as e: + if isinstance(e, ValueError): + logger.error( + "Failed to initialize OpenSearch index after onboarding", + error=str(e), + ) + return JSONResponse( + { + "error": str(e), + "edited": True, + }, + status_code=400, + ) + logger.error( + "Failed to initialize OpenSearch index after onboarding", + error=str(e), + ) + # Don't fail the entire onboarding process if index creation fails + # The application can still work, but document operations may fail + # Save the updated configuration (this will mark it as edited) + + # If model_provider was updated, assign the new provider to flows + if "model_provider" in body: + provider = body["model_provider"].strip().lower() + try: + flow_result = await flows_service.assign_model_provider(provider) + + if flow_result.get("success"): + logger.info( + f"Successfully assigned {provider} to flows", + flow_result=flow_result, + ) + else: + logger.warning( + f"Failed to assign {provider} to flows", + flow_result=flow_result, + ) + # Continue even if flow assignment fails - configuration was still saved + + except Exception as e: + logger.error( + "Error assigning model provider to flows", + provider=provider, + error=str(e), + ) + raise + + # Set Langflow global variables based on provider + try: + # Set API key for IBM/Watson providers + if (provider == "watsonx") and "api_key" in body: + api_key = body["api_key"] + await clients._create_langflow_global_variable( + "WATSONX_API_KEY", api_key, modify=True + ) + logger.info("Set WATSONX_API_KEY global variable in Langflow") + + # Set project ID for IBM/Watson providers + if (provider == "watsonx") and "project_id" in body: + project_id = body["project_id"] + await clients._create_langflow_global_variable( + "WATSONX_PROJECT_ID", project_id, modify=True + ) + logger.info( + "Set WATSONX_PROJECT_ID global variable in Langflow" + ) + + # Set API key for OpenAI provider + if provider == "openai" and "api_key" in body: + api_key = body["api_key"] + await clients._create_langflow_global_variable( + "OPENAI_API_KEY", api_key, modify=True + ) + logger.info("Set OPENAI_API_KEY global variable in Langflow") + + # Set base URL for Ollama provider + if provider == "ollama" and "endpoint" in body: + endpoint = transform_localhost_url(body["endpoint"]) + + await clients._create_langflow_global_variable( + "OLLAMA_BASE_URL", endpoint, modify=True + ) + logger.info("Set OLLAMA_BASE_URL global variable in Langflow") + + await flows_service.change_langflow_model_value( + provider, + body["embedding_model"], + body["llm_model"], + body["endpoint"], + ) + + except Exception as e: + logger.error( + "Failed to set Langflow global variables", + provider=provider, + error=str(e), + ) + raise + + # Handle sample data ingestion if requested + if should_ingest_sample_data: + try: + # Import the function here to avoid circular imports + from main import ingest_default_documents_when_ready + + # Get services from the current app state + # We need to access the app instance to get services + app = request.scope.get("app") + if app and hasattr(app.state, "services"): + services = app.state.services + logger.info( + "Starting sample data ingestion as requested in onboarding" + ) + await ingest_default_documents_when_ready(services) + logger.info("Sample data ingestion completed successfully") + else: + logger.error( + "Could not access services for sample data ingestion" + ) + + except Exception as e: + logger.error( + "Failed to complete sample data ingestion", error=str(e) + ) + # Don't fail the entire onboarding process if sample data fails if config_manager.save_config_file(current_config): updated_fields = [ k for k in body.keys() if k != "sample_data" @@ -546,144 +681,19 @@ async def onboarding(request, flows_service): updated_fields=updated_fields, ) - # If model_provider was updated, assign the new provider to flows - if "model_provider" in body: - provider = body["model_provider"].strip().lower() - try: - flow_result = await flows_service.assign_model_provider(provider) - - if flow_result.get("success"): - logger.info( - f"Successfully assigned {provider} to flows", - flow_result=flow_result, - ) - else: - logger.warning( - f"Failed to assign {provider} to flows", - flow_result=flow_result, - ) - # Continue even if flow assignment fails - configuration was still saved - - except Exception as e: - logger.error( - "Error assigning model provider to flows", - provider=provider, - error=str(e), - ) - # Continue even if flow assignment fails - configuration was still saved - - # Set Langflow global variables based on provider - if "model_provider" in body: - provider = body["model_provider"].strip().lower() - - try: - # Set API key for IBM/Watson providers - if (provider == "watsonx") and "api_key" in body: - api_key = body["api_key"] - await clients._create_langflow_global_variable( - "WATSONX_API_KEY", api_key, modify=True - ) - logger.info("Set WATSONX_API_KEY global variable in Langflow") - - # Set project ID for IBM/Watson providers - if (provider == "watsonx") and "project_id" in body: - project_id = body["project_id"] - await clients._create_langflow_global_variable( - "WATSONX_PROJECT_ID", project_id, modify=True - ) - logger.info( - "Set WATSONX_PROJECT_ID global variable in Langflow" - ) - - # Set API key for OpenAI provider - if provider == "openai" and "api_key" in body: - api_key = body["api_key"] - await clients._create_langflow_global_variable( - "OPENAI_API_KEY", api_key, modify=True - ) - logger.info("Set OPENAI_API_KEY global variable in Langflow") - - # Set base URL for Ollama provider - if provider == "ollama" and "endpoint" in body: - endpoint = transform_localhost_url(body["endpoint"]) - - await clients._create_langflow_global_variable( - "OLLAMA_BASE_URL", endpoint, modify=True - ) - logger.info("Set OLLAMA_BASE_URL global variable in Langflow") - - await flows_service.change_langflow_model_value( - provider, - body["embedding_model"], - body["llm_model"], - body["endpoint"], - ) - - except Exception as e: - logger.error( - "Failed to set Langflow global variables", - provider=provider, - error=str(e), - ) - # Continue even if setting global variables fails - - # Initialize the OpenSearch index now that we have the embedding model configured - try: - # Import here to avoid circular imports - from main import init_index - - logger.info( - "Initializing OpenSearch index after onboarding configuration" - ) - await init_index() - logger.info("OpenSearch index initialization completed successfully") - except Exception as e: - logger.error( - "Failed to initialize OpenSearch index after onboarding", - error=str(e), - ) - # Don't fail the entire onboarding process if index creation fails - # The application can still work, but document operations may fail - - # Handle sample data ingestion if requested - if should_ingest_sample_data: - try: - # Import the function here to avoid circular imports - from main import ingest_default_documents_when_ready - - # Get services from the current app state - # We need to access the app instance to get services - app = request.scope.get("app") - if app and hasattr(app.state, "services"): - services = app.state.services - logger.info( - "Starting sample data ingestion as requested in onboarding" - ) - await ingest_default_documents_when_ready(services) - logger.info("Sample data ingestion completed successfully") - else: - logger.error( - "Could not access services for sample data ingestion" - ) - - except Exception as e: - logger.error( - "Failed to complete sample data ingestion", error=str(e) - ) - # Don't fail the entire onboarding process if sample data fails - - return JSONResponse( - { - "message": "Onboarding configuration updated successfully", - "edited": True, # Confirm that config is now marked as edited - "sample_data_ingested": should_ingest_sample_data, - } - ) else: return JSONResponse( {"error": "Failed to save configuration"}, status_code=500 ) + return JSONResponse( + { + "message": "Onboarding configuration updated successfully", + "edited": True, # Confirm that config is now marked as edited + "sample_data_ingested": should_ingest_sample_data, + } + ) + except Exception as e: logger.error("Failed to update onboarding settings", error=str(e)) return JSONResponse( diff --git a/src/config/settings.py b/src/config/settings.py index b1ffba79..2edb0d69 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -81,12 +81,6 @@ OPENAI_EMBEDDING_DIMENSIONS = { "text-embedding-ada-002": 1536, } -OLLAMA_EMBEDDING_DIMENSIONS = { - "nomic-embed-text": 768, - "all-minilm": 384, - "mxbai-embed-large": 1024, -} - WATSONX_EMBEDDING_DIMENSIONS = { # IBM Models "ibm/granite-embedding-107m-multilingual": 384, diff --git a/src/main.py b/src/main.py index 1094f8b5..b16a50d5 100644 --- a/src/main.py +++ b/src/main.py @@ -168,7 +168,12 @@ async def init_index(): embedding_model = config.knowledge.embedding_model # Create dynamic index body based on the configured embedding model - dynamic_index_body = create_dynamic_index_body(embedding_model) + # Pass provider and endpoint for dynamic dimension resolution (Ollama probing) + dynamic_index_body = await create_dynamic_index_body( + embedding_model, + provider=config.provider.model_provider, + endpoint=config.provider.endpoint + ) # Create documents index if not await clients.opensearch.indices.exists(index=INDEX_NAME): diff --git a/src/utils/embeddings.py b/src/utils/embeddings.py index 26029a7f..20acda9e 100644 --- a/src/utils/embeddings.py +++ b/src/utils/embeddings.py @@ -1,14 +1,128 @@ -from config.settings import OLLAMA_EMBEDDING_DIMENSIONS, OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS +import httpx +from config.settings import OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS +from utils.container_utils import transform_localhost_url from utils.logging_config import get_logger logger = get_logger(__name__) -def get_embedding_dimensions(model_name: str) -> int: + +async def _probe_ollama_embedding_dimension(endpoint: str, model_name: str) -> int: + """Probe Ollama server to get embedding dimension for a model. + + Args: + endpoint: Ollama server endpoint (e.g., "http://localhost:11434") + model_name: Name of the embedding model + + Returns: + The embedding dimension. + + Raises: + ValueError: If the dimension cannot be determined. + """ + transformed_endpoint = transform_localhost_url(endpoint) + url = f"{transformed_endpoint}/api/embeddings" + test_input = "test" + + async with httpx.AsyncClient() as client: + errors: list[str] = [] + + # Try modern API format first (input parameter) + modern_payload = { + "model": model_name, + "input": test_input, + "prompt": test_input, + } + + try: + response = await client.post(url, json=modern_payload, timeout=10.0) + response.raise_for_status() + data = response.json() + + # Check for embedding in response + if "embedding" in data: + dimension = len(data["embedding"]) + if dimension > 0: + logger.info( + f"Probed Ollama model '{model_name}': dimension={dimension}" + ) + return dimension + elif "embeddings" in data and len(data["embeddings"]) > 0: + dimension = len(data["embeddings"][0]) + if dimension > 0: + logger.info( + f"Probed Ollama model '{model_name}': dimension={dimension}" + ) + return dimension + + errors.append("response did not include non-zero embedding vector") + except Exception as modern_error: # noqa: BLE001 - log and fall back to legacy payload + logger.debug( + "Modern Ollama embeddings API probe failed", + model=model_name, + endpoint=transformed_endpoint, + error=str(modern_error), + ) + errors.append(str(modern_error)) + + # Try legacy API format (prompt parameter) + legacy_payload = { + "model": model_name, + "prompt": test_input, + } + + try: + response = await client.post(url, json=legacy_payload, timeout=10.0) + response.raise_for_status() + data = response.json() + + if "embedding" in data: + dimension = len(data["embedding"]) + if dimension > 0: + logger.info( + f"Probed Ollama model '{model_name}' (legacy): dimension={dimension}" + ) + return dimension + elif "embeddings" in data and len(data["embeddings"]) > 0: + dimension = len(data["embeddings"][0]) + if dimension > 0: + logger.info( + f"Probed Ollama model '{model_name}' (legacy): dimension={dimension}" + ) + return dimension + + errors.append("legacy response did not include non-zero embedding vector") + except Exception as legacy_error: # noqa: BLE001 - collect and raise a helpful error later + logger.warning( + "Legacy Ollama embeddings API probe failed", + model=model_name, + endpoint=transformed_endpoint, + error=str(legacy_error), + ) + errors.append(str(legacy_error)) + + # remove the first instance of this error to show either it or the actual error from any of the two methods + errors.remove("All connection attempts failed") + + raise ValueError( + f"Failed to determine embedding dimensions for Ollama model '{model_name}'. " + f"Verify the Ollama server at '{endpoint}' is reachable and the model is available. " + f"Error: {errors[0]}" + ) + + +async def get_embedding_dimensions(model_name: str, provider: str = None, endpoint: str = None) -> int: """Get the embedding dimensions for a given model name.""" + if provider and provider.lower() == "ollama": + if not endpoint: + raise ValueError( + "Ollama endpoint is required to determine embedding dimensions. Please provide a valid endpoint." + ) + return await _probe_ollama_embedding_dimension(endpoint, model_name) + # Check all model dictionaries - all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **OLLAMA_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS} + all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS} model_name = model_name.lower().strip().split(":")[0] @@ -23,9 +137,22 @@ def get_embedding_dimensions(model_name: str) -> int: return VECTOR_DIM -def create_dynamic_index_body(embedding_model: str) -> dict: - """Create a dynamic index body configuration based on the embedding model.""" - dimensions = get_embedding_dimensions(embedding_model) +async def create_dynamic_index_body( + embedding_model: str, + provider: str = None, + endpoint: str = None +) -> dict: + """Create a dynamic index body configuration based on the embedding model. + + Args: + embedding_model: Name of the embedding model + provider: Provider name (e.g., "ollama", "openai", "watsonx") + endpoint: Endpoint URL for the provider (used for Ollama probing) + + Returns: + OpenSearch index body configuration + """ + dimensions = await get_embedding_dimensions(embedding_model, provider, endpoint) return { "settings": {