From 9ad4d89cb38afbad9b17df7ebfa9de512fa2253b Mon Sep 17 00:00:00 2001
From: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Date: Wed, 12 Nov 2025 13:03:18 -0500
Subject: [PATCH] docs: use ollama with openrag (#380)

* initial-content

* initial-content

* clarify-server-language

* rec-nomic-text-embeddings

* remove-page-make-single-partial

* Apply suggestions from code review

Co-authored-by: April I. Murphy <36110273+aimurphy@users.noreply.github.com>

* ollama-partial-summary-and-steps

* add-space

---------

Co-authored-by: April I. Murphy <36110273+aimurphy@users.noreply.github.com>
---
 docs/docs/_partial-ollama.mdx     | 24 ++++++++++++++++++++++++
 docs/docs/_partial-onboarding.mdx | 18 ++++++++++--------
 2 files changed, 34 insertions(+), 8 deletions(-)
 create mode 100644 docs/docs/_partial-ollama.mdx
diff --git a/docs/docs/_partial-ollama.mdx b/docs/docs/_partial-ollama.mdx
new file mode 100644
index 00000000..a5164e97
--- /dev/null
+++ b/docs/docs/_partial-ollama.mdx
@@ -0,0 +1,24 @@
+import Icon from "@site/src/components/icon/icon";
+
+Using Ollama for your OpenRAG language model provider offers greater flexibility and configuration, but can also be overwhelming to start.
+These recommendations are a reasonable starting point for users with at least one GPU and experience running LLMs locally.
+
+For best performance, OpenRAG recommends OpenAI's `gpt-oss:20b` language model. However, this model uses 16GB of RAM, so consider using Ollama Cloud or running Ollama on a remote machine.
+
+For generating embeddings, OpenRAG recommends the [`nomic-embed-text`](https://ollama.com/library/nomic-embed-text) embedding model, which provides high-quality embeddings optimized for retrieval tasks.
+
+To run models in [**Ollama Cloud**](https://docs.ollama.com/cloud), follow these steps:
+
+    1. Sign in to Ollama Cloud. 
+    In a terminal, enter `ollama signin` to connect your local environment with Ollama Cloud.
+    2. To run the model, in Ollama, select the `gpt-oss:20b-cloud` model, or run `ollama run gpt-oss:20b-cloud` in a terminal.
+    Ollama Cloud models are run at the same URL as your local Ollama server at `http://localhost:11434`, and automatically offloaded to Ollama's cloud service.
+    3. Connect OpenRAG to the same local Ollama server as you would for local models in onboarding, using the default address of `http://localhost:11434`.
+    4. In the **Language model** field, select the `gpt-oss:20b-cloud` model.
+<br></br>
+To run models on a **remote Ollama server**, follow these steps:
+
+    1. Ensure your remote Ollama server is accessible from your OpenRAG instance.
+    2. In the **Ollama Base URL** field, enter your remote Ollama server's base URL, such as `http://your-remote-server:11434`.
+    OpenRAG connects to the remote Ollama server and populates the lists with the server's available models.
+    3. Select your **Embedding model** and **Language model** from the available options.
\ No newline at end of file
diff --git a/docs/docs/_partial-onboarding.mdx b/docs/docs/_partial-onboarding.mdx
index fda43f6e..8c3d508a 100644
--- a/docs/docs/_partial-onboarding.mdx
+++ b/docs/docs/_partial-onboarding.mdx
@@ -1,6 +1,7 @@
 import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
+import PartialOllama from '@site/docs/_partial-ollama.mdx'; 
 
 ## Application onboarding 
 
@@ -58,15 +59,16 @@ Choose one LLM provider and complete these steps:
     :::tip
     Ollama is not included with OpenRAG. To install Ollama, see the [Ollama documentation](https://docs.ollama.com/).
     :::   
-    1. Enter your Ollama server's base URL address.
+    1. To connect to an Ollama server running on your local machine, enter your Ollama server's base URL address.
     The default Ollama server address is `http://localhost:11434`.
-    OpenRAG automatically transforms `localhost` to access services outside of the container, and sends a test connection to your Ollama server to confirm connectivity.
-    2. Under **Advanced settings**, select your **Language Model** from the models available on your Ollama server.
+    OpenRAG connects to the Ollama server and populates the model lists with the server's available models.
+    2. Select the **Embedding Model** and **Language Model** your Ollama server is running.
+        <details closed>
+        <summary>Ollama model selection and external server configuration</summary>
+        <PartialOllama />
+        </details>
     3. Click **Complete**.
-    4. In the second onboarding panel, select your **Embedding Model** from the models available on your Ollama server.
-    5. To complete the onboarding tasks, click **What is OpenRAG**, and then click **Add a Document**.
-    Alternatively, click <Icon name="ArrowRight" aria-hidden="true"/> **Skip overview**.
-    6. Continue with the [Quickstart](/quickstart).
-
+    4. To complete the onboarding tasks, click **What is OpenRAG**, and then click **Add a Document**.
+    5. Continue with the [Quickstart](/quickstart).
     </TabItem>
     </Tabs>
\ No newline at end of file