Merge branch 'main' into multi-embedding-support

2025-10-11 02:44:16 -04:00 · 2025-10-11 02:44:16 -04:00 · b9f109ea7d
commit b9f109ea7d
parent a7c5a9f8f3 434e129f12
7 changed files with 313 additions and 159 deletions
--- a/README.md
+++ b/README.md
@ -7,14 +7,13 @@
  &nbsp;&nbsp;
  <a href="https://github.com/opensearch-project/OpenSearch"><img src="https://img.shields.io/badge/OpenSearch-005EB8?style=flat&logo=opensearch&logoColor=white" alt="OpenSearch"></a>
  &nbsp;&nbsp;
-  <a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
+  <a href="https://github.com/docling-project/docling"><img src="https://img.shields.io/badge/Docling-000000?style=flat" alt="Langflow"></a>
  &nbsp;&nbsp;
-  <a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
-  &nbsp;&nbsp;
-  <a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
 </div>

-OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration.
+OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with [Starlette](https://github.com/Kludex/starlette) and [Next.js](https://github.com/vercel/next.js). Powered by [OpenSearch](https://github.com/opensearch-project/OpenSearch), [Langflow](https://github.com/langflow-ai/langflow), and [Docling](https://github.com/docling-project/docling).
+
+<a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>

 </div>
 <div align="center">
@ -48,7 +47,7 @@ To launch OpenRAG with the TUI, do the following:

    The TUI opens and guides you through OpenRAG setup.

-For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx).
+For the full TUI guide, see [TUI](https://docs.openr.ag/get-started/tui).

 ## Docker Deployment

@ -114,7 +113,7 @@ To deploy OpenRAG with Docker:
   | OpenSearch | http://localhost:9200 | Vector database for document storage. |
   | OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |

-6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx).
+6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](https://docs.openr.ag/quickstart).

    To stop `docling serve`, run:
    
@ -122,11 +121,11 @@ To deploy OpenRAG with Docker:
    uv run python scripts/docling_ctl.py stop
    ```

-For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx).
+For more information, see [Deploy with Docker](https://docs.openr.ag/get-started/docker).

 ## Troubleshooting

-For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx).
+For common issues and fixes, see [Troubleshoot](https://docs.openr.ag/support/troubleshoot).

 ## Development

--- a/frontend/components/docling-health-banner.tsx
+++ b/frontend/components/docling-health-banner.tsx
@ -86,13 +86,26 @@ function DoclingSetupDialog({
  );
 }

-export function DoclingHealthBanner({ className }: DoclingHealthBannerProps) {
+// Custom hook to check docling health status
+export function useDoclingHealth() {
  const { data: health, isLoading, isError } = useDoclingHealthQuery();
-  const [showDialog, setShowDialog] = useState(false);

  const isHealthy = health?.status === "healthy" && !isError;
  const isUnhealthy = health?.status === "unhealthy" || isError;

+  return {
+    health,
+    isLoading,
+    isError,
+    isHealthy,
+    isUnhealthy,
+  };
+}
+
+export function DoclingHealthBanner({ className }: DoclingHealthBannerProps) {
+  const { isLoading, isHealthy, isUnhealthy } = useDoclingHealth();
+  const [showDialog, setShowDialog] = useState(false);
+
  // Only show banner when service is unhealthy
  if (isLoading || isHealthy) {
    return null;
--- a/frontend/src/app/onboarding/page.tsx
+++ b/frontend/src/app/onboarding/page.tsx
@ -7,6 +7,7 @@ import {
  type OnboardingVariables,
  useOnboardingMutation,
 } from "@/app/api/mutations/useOnboardingMutation";
+import { DoclingHealthBanner, useDoclingHealth } from "@/components/docling-health-banner";
 import IBMLogo from "@/components/logo/ibm-logo";
 import OllamaLogo from "@/components/logo/ollama-logo";
 import OpenAILogo from "@/components/logo/openai-logo";
@ -34,6 +35,7 @@ import { OpenAIOnboarding } from "./components/openai-onboarding";
 function OnboardingPage() {
  const { data: settingsDb, isLoading: isSettingsLoading } =
    useGetSettingsQuery();
+  const { isHealthy: isDoclingHealthy } = useDoclingHealth();

  const redirect = "/";

@ -114,7 +116,7 @@ function OnboardingPage() {
    onboardingMutation.mutate(onboardingData);
  };

-  const isComplete = !!settings.llm_model && !!settings.embedding_model;
+  const isComplete = !!settings.llm_model && !!settings.embedding_model && isDoclingHealthy;

  return (
    <div className="min-h-dvh w-full flex gap-5 flex-col items-center justify-center bg-background relative p-4">
@ -130,6 +132,8 @@ function OnboardingPage() {
        )}
      />

+      <DoclingHealthBanner className="absolute top-0 left-0 right-0 w-full z-20" />
+
      <div className="flex flex-col items-center gap-5 min-h-[550px] w-full z-10">
        <div className="flex flex-col items-center justify-center gap-4">
          <h1 className="text-2xl font-medium font-chivo">
@ -197,7 +201,9 @@ function OnboardingPage() {
              </TooltipTrigger>
              {!isComplete && (
                <TooltipContent>
-                  Please fill in all required fields
+                  {!!settings.llm_model && !!settings.embedding_model && !isDoclingHealthy
+                    ? "docling-serve must be running to continue"
+                    : "Please fill in all required fields"}
                </TooltipContent>
              )}
            </Tooltip>
--- a/src/api/settings.py
+++ b/src/api/settings.py
@ -536,7 +536,142 @@ async def onboarding(request, flows_service):
                {"error": "No valid fields provided for update"}, status_code=400
            )

+        # Initialize the OpenSearch index now that we have the embedding model configured
+        try:
+            # Import here to avoid circular imports
+            from main import init_index
+
+            logger.info(
+                "Initializing OpenSearch index after onboarding configuration"
+            )
+            await init_index()
+            logger.info("OpenSearch index initialization completed successfully")
+        except Exception as e:
+            if isinstance(e, ValueError):
+                logger.error(
+                    "Failed to initialize OpenSearch index after onboarding",
+                    error=str(e),
+                )
+                return JSONResponse(
+                    {
+                        "error": str(e),
+                        "edited": True,
+                    },
+                    status_code=400,
+                )
+            logger.error(
+                "Failed to initialize OpenSearch index after onboarding",
+                error=str(e),
+            )
+            # Don't fail the entire onboarding process if index creation fails
+            # The application can still work, but document operations may fail
+
        # Save the updated configuration (this will mark it as edited)
+        
+        # If model_provider was updated, assign the new provider to flows
+        if "model_provider" in body:
+            provider = body["model_provider"].strip().lower()
+            try:
+                flow_result = await flows_service.assign_model_provider(provider)
+
+                if flow_result.get("success"):
+                    logger.info(
+                        f"Successfully assigned {provider} to flows",
+                        flow_result=flow_result,
+                    )
+                else:
+                    logger.warning(
+                        f"Failed to assign {provider} to flows",
+                        flow_result=flow_result,
+                    )
+                    # Continue even if flow assignment fails - configuration was still saved
+
+            except Exception as e:
+                logger.error(
+                    "Error assigning model provider to flows",
+                    provider=provider,
+                    error=str(e),
+                )
+                raise
+
+            # Set Langflow global variables based on provider
+            try:
+                # Set API key for IBM/Watson providers
+                if (provider == "watsonx") and "api_key" in body:
+                    api_key = body["api_key"]
+                    await clients._create_langflow_global_variable(
+                        "WATSONX_API_KEY", api_key, modify=True
+                    )
+                    logger.info("Set WATSONX_API_KEY global variable in Langflow")
+
+                # Set project ID for IBM/Watson providers
+                if (provider == "watsonx") and "project_id" in body:
+                    project_id = body["project_id"]
+                    await clients._create_langflow_global_variable(
+                        "WATSONX_PROJECT_ID", project_id, modify=True
+                    )
+                    logger.info(
+                        "Set WATSONX_PROJECT_ID global variable in Langflow"
+                    )
+
+                # Set API key for OpenAI provider
+                if provider == "openai" and "api_key" in body:
+                    api_key = body["api_key"]
+                    await clients._create_langflow_global_variable(
+                        "OPENAI_API_KEY", api_key, modify=True
+                    )
+                    logger.info("Set OPENAI_API_KEY global variable in Langflow")
+
+                # Set base URL for Ollama provider
+                if provider == "ollama" and "endpoint" in body:
+                    endpoint = transform_localhost_url(body["endpoint"])
+
+                    await clients._create_langflow_global_variable(
+                        "OLLAMA_BASE_URL", endpoint, modify=True
+                    )
+                    logger.info("Set OLLAMA_BASE_URL global variable in Langflow")
+
+                await flows_service.change_langflow_model_value(
+                    provider,
+                    body["embedding_model"],
+                    body["llm_model"],
+                    body["endpoint"],
+                )
+
+            except Exception as e:
+                logger.error(
+                    "Failed to set Langflow global variables",
+                    provider=provider,
+                    error=str(e),
+                )
+                raise
+
+        # Handle sample data ingestion if requested
+        if should_ingest_sample_data:
+            try:
+                # Import the function here to avoid circular imports
+                from main import ingest_default_documents_when_ready
+
+                # Get services from the current app state
+                # We need to access the app instance to get services
+                app = request.scope.get("app")
+                if app and hasattr(app.state, "services"):
+                    services = app.state.services
+                    logger.info(
+                        "Starting sample data ingestion as requested in onboarding"
+                    )
+                    await ingest_default_documents_when_ready(services)
+                    logger.info("Sample data ingestion completed successfully")
+                else:
+                    logger.error(
+                        "Could not access services for sample data ingestion"
+                    )
+
+            except Exception as e:
+                logger.error(
+                    "Failed to complete sample data ingestion", error=str(e)
+                )
+                # Don't fail the entire onboarding process if sample data fails
        if config_manager.save_config_file(current_config):
            updated_fields = [
                k for k in body.keys() if k != "sample_data"
@ -546,144 +681,19 @@ async def onboarding(request, flows_service):
                updated_fields=updated_fields,
            )

-            # If model_provider was updated, assign the new provider to flows
-            if "model_provider" in body:
-                provider = body["model_provider"].strip().lower()
-                try:
-                    flow_result = await flows_service.assign_model_provider(provider)
-
-                    if flow_result.get("success"):
-                        logger.info(
-                            f"Successfully assigned {provider} to flows",
-                            flow_result=flow_result,
-                        )
-                    else:
-                        logger.warning(
-                            f"Failed to assign {provider} to flows",
-                            flow_result=flow_result,
-                        )
-                        # Continue even if flow assignment fails - configuration was still saved
-
-                except Exception as e:
-                    logger.error(
-                        "Error assigning model provider to flows",
-                        provider=provider,
-                        error=str(e),
-                    )
-                    # Continue even if flow assignment fails - configuration was still saved
-
-            # Set Langflow global variables based on provider
-            if "model_provider" in body:
-                provider = body["model_provider"].strip().lower()
-
-                try:
-                    # Set API key for IBM/Watson providers
-                    if (provider == "watsonx") and "api_key" in body:
-                        api_key = body["api_key"]
-                        await clients._create_langflow_global_variable(
-                            "WATSONX_API_KEY", api_key, modify=True
-                        )
-                        logger.info("Set WATSONX_API_KEY global variable in Langflow")
-
-                    # Set project ID for IBM/Watson providers
-                    if (provider == "watsonx") and "project_id" in body:
-                        project_id = body["project_id"]
-                        await clients._create_langflow_global_variable(
-                            "WATSONX_PROJECT_ID", project_id, modify=True
-                        )
-                        logger.info(
-                            "Set WATSONX_PROJECT_ID global variable in Langflow"
-                        )
-
-                    # Set API key for OpenAI provider
-                    if provider == "openai" and "api_key" in body:
-                        api_key = body["api_key"]
-                        await clients._create_langflow_global_variable(
-                            "OPENAI_API_KEY", api_key, modify=True
-                        )
-                        logger.info("Set OPENAI_API_KEY global variable in Langflow")
-
-                    # Set base URL for Ollama provider
-                    if provider == "ollama" and "endpoint" in body:
-                        endpoint = transform_localhost_url(body["endpoint"])
-
-                        await clients._create_langflow_global_variable(
-                            "OLLAMA_BASE_URL", endpoint, modify=True
-                        )
-                        logger.info("Set OLLAMA_BASE_URL global variable in Langflow")
-
-                    await flows_service.change_langflow_model_value(
-                        provider,
-                        body["embedding_model"],
-                        body["llm_model"],
-                        body["endpoint"],
-                    )
-
-                except Exception as e:
-                    logger.error(
-                        "Failed to set Langflow global variables",
-                        provider=provider,
-                        error=str(e),
-                    )
-                    # Continue even if setting global variables fails
-
-            # Initialize the OpenSearch index now that we have the embedding model configured
-            try:
-                # Import here to avoid circular imports
-                from main import init_index
-
-                logger.info(
-                    "Initializing OpenSearch index after onboarding configuration"
-                )
-                await init_index()
-                logger.info("OpenSearch index initialization completed successfully")
-            except Exception as e:
-                logger.error(
-                    "Failed to initialize OpenSearch index after onboarding",
-                    error=str(e),
-                )
-                # Don't fail the entire onboarding process if index creation fails
-                # The application can still work, but document operations may fail
-
-            # Handle sample data ingestion if requested
-            if should_ingest_sample_data:
-                try:
-                    # Import the function here to avoid circular imports
-                    from main import ingest_default_documents_when_ready
-
-                    # Get services from the current app state
-                    # We need to access the app instance to get services
-                    app = request.scope.get("app")
-                    if app and hasattr(app.state, "services"):
-                        services = app.state.services
-                        logger.info(
-                            "Starting sample data ingestion as requested in onboarding"
-                        )
-                        await ingest_default_documents_when_ready(services)
-                        logger.info("Sample data ingestion completed successfully")
-                    else:
-                        logger.error(
-                            "Could not access services for sample data ingestion"
-                        )
-
-                except Exception as e:
-                    logger.error(
-                        "Failed to complete sample data ingestion", error=str(e)
-                    )
-                    # Don't fail the entire onboarding process if sample data fails
-
-            return JSONResponse(
-                {
-                    "message": "Onboarding configuration updated successfully",
-                    "edited": True,  # Confirm that config is now marked as edited
-                    "sample_data_ingested": should_ingest_sample_data,
-                }
-            )
        else:
            return JSONResponse(
                {"error": "Failed to save configuration"}, status_code=500
            )

+        return JSONResponse(
+            {
+                "message": "Onboarding configuration updated successfully",
+                "edited": True,  # Confirm that config is now marked as edited
+                "sample_data_ingested": should_ingest_sample_data,
+            }
+        )
+
    except Exception as e:
        logger.error("Failed to update onboarding settings", error=str(e))
        return JSONResponse(
--- a/src/config/settings.py
+++ b/src/config/settings.py
@ -81,12 +81,6 @@ OPENAI_EMBEDDING_DIMENSIONS = {
        "text-embedding-ada-002": 1536,
    }

-OLLAMA_EMBEDDING_DIMENSIONS = {
-    "nomic-embed-text": 768,
-    "all-minilm": 384,
-    "mxbai-embed-large": 1024,
-}
-
 WATSONX_EMBEDDING_DIMENSIONS = {
 # IBM Models
 "ibm/granite-embedding-107m-multilingual": 384,  
--- a/src/main.py
+++ b/src/main.py
@ -168,7 +168,12 @@ async def init_index():
    embedding_model = config.knowledge.embedding_model

    # Create dynamic index body based on the configured embedding model
-    dynamic_index_body = create_dynamic_index_body(embedding_model)
+    # Pass provider and endpoint for dynamic dimension resolution (Ollama probing)
+    dynamic_index_body = await create_dynamic_index_body(
+        embedding_model,
+        provider=config.provider.model_provider,
+        endpoint=config.provider.endpoint
+    )

    # Create documents index
    if not await clients.opensearch.indices.exists(index=INDEX_NAME):
--- a/src/utils/embeddings.py
+++ b/src/utils/embeddings.py
@ -1,14 +1,128 @@
-from config.settings import OLLAMA_EMBEDDING_DIMENSIONS, OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
+import httpx
+from config.settings import OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
+from utils.container_utils import transform_localhost_url
 from utils.logging_config import get_logger


 logger = get_logger(__name__)

-def get_embedding_dimensions(model_name: str) -> int:
+
+async def _probe_ollama_embedding_dimension(endpoint: str, model_name: str) -> int:
+    """Probe Ollama server to get embedding dimension for a model.
+
+    Args:
+        endpoint: Ollama server endpoint (e.g., "http://localhost:11434")
+        model_name: Name of the embedding model
+
+    Returns:
+        The embedding dimension.
+
+    Raises:
+        ValueError: If the dimension cannot be determined.
+    """
+    transformed_endpoint = transform_localhost_url(endpoint)
+    url = f"{transformed_endpoint}/api/embeddings"
+    test_input = "test"
+
+    async with httpx.AsyncClient() as client:
+        errors: list[str] = []
+
+        # Try modern API format first (input parameter)
+        modern_payload = {
+            "model": model_name,
+            "input": test_input,
+            "prompt": test_input,
+        }
+
+        try:
+            response = await client.post(url, json=modern_payload, timeout=10.0)
+            response.raise_for_status()
+            data = response.json()
+
+            # Check for embedding in response
+            if "embedding" in data:
+                dimension = len(data["embedding"])
+                if dimension > 0:
+                    logger.info(
+                        f"Probed Ollama model '{model_name}': dimension={dimension}"
+                    )
+                    return dimension
+            elif "embeddings" in data and len(data["embeddings"]) > 0:
+                dimension = len(data["embeddings"][0])
+                if dimension > 0:
+                    logger.info(
+                        f"Probed Ollama model '{model_name}': dimension={dimension}"
+                    )
+                    return dimension
+
+            errors.append("response did not include non-zero embedding vector")
+        except Exception as modern_error:  # noqa: BLE001 - log and fall back to legacy payload
+            logger.debug(
+                "Modern Ollama embeddings API probe failed",
+                model=model_name,
+                endpoint=transformed_endpoint,
+                error=str(modern_error),
+            )
+            errors.append(str(modern_error))
+
+        # Try legacy API format (prompt parameter)
+        legacy_payload = {
+            "model": model_name,
+            "prompt": test_input,
+        }
+
+        try:
+            response = await client.post(url, json=legacy_payload, timeout=10.0)
+            response.raise_for_status()
+            data = response.json()
+
+            if "embedding" in data:
+                dimension = len(data["embedding"])
+                if dimension > 0:
+                    logger.info(
+                        f"Probed Ollama model '{model_name}' (legacy): dimension={dimension}"
+                    )
+                    return dimension
+            elif "embeddings" in data and len(data["embeddings"]) > 0:
+                dimension = len(data["embeddings"][0])
+                if dimension > 0:
+                    logger.info(
+                        f"Probed Ollama model '{model_name}' (legacy): dimension={dimension}"
+                    )
+                    return dimension
+
+            errors.append("legacy response did not include non-zero embedding vector")
+        except Exception as legacy_error:  # noqa: BLE001 - collect and raise a helpful error later
+            logger.warning(
+                "Legacy Ollama embeddings API probe failed",
+                model=model_name,
+                endpoint=transformed_endpoint,
+                error=str(legacy_error),
+            )
+            errors.append(str(legacy_error))
+    
+    # remove the first instance of this error to show either it or the actual error from any of the two methods
+    errors.remove("All connection attempts failed") 
+
+    raise ValueError(
+        f"Failed to determine embedding dimensions for Ollama model '{model_name}'. "
+        f"Verify the Ollama server at '{endpoint}' is reachable and the model is available. "
+        f"Error: {errors[0]}"
+    )
+
+
+async def get_embedding_dimensions(model_name: str, provider: str = None, endpoint: str = None) -> int:
    """Get the embedding dimensions for a given model name."""

+    if provider and provider.lower() == "ollama":
+        if not endpoint:
+            raise ValueError(
+                "Ollama endpoint is required to determine embedding dimensions. Please provide a valid endpoint."
+            )
+        return await _probe_ollama_embedding_dimension(endpoint, model_name)
+
    # Check all model dictionaries
-    all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **OLLAMA_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}
+    all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}

    model_name = model_name.lower().strip().split(":")[0]

@ -23,9 +137,22 @@ def get_embedding_dimensions(model_name: str) -> int:
    return VECTOR_DIM


-def create_dynamic_index_body(embedding_model: str) -> dict:
-    """Create a dynamic index body configuration based on the embedding model."""
-    dimensions = get_embedding_dimensions(embedding_model)
+async def create_dynamic_index_body(
+    embedding_model: str,
+    provider: str = None,
+    endpoint: str = None
+) -> dict:
+    """Create a dynamic index body configuration based on the embedding model.
+    
+    Args:
+        embedding_model: Name of the embedding model
+        provider: Provider name (e.g., "ollama", "openai", "watsonx")
+        endpoint: Endpoint URL for the provider (used for Ollama probing)
+        
+    Returns:
+        OpenSearch index body configuration
+    """
+    dimensions = await get_embedding_dimensions(embedding_model, provider, endpoint)

    return {
        "settings": {