From 30d94c114068e3dcaa7a8912c11a1d20ab005512 Mon Sep 17 00:00:00 2001
From: April M <36110273+aimurphy@users.noreply.github.com>
Date: Tue, 16 Dec 2025 16:24:20 -0800
Subject: [PATCH] recommended ollama models

---
 docs/docs/_partial-ollama-models.mdx  | 13 +++++++++++++
 docs/docs/_partial-onboarding.mdx     | 13 ++++++-------
 docs/docs/_partial-prereq-common.mdx  |  8 +++++++-
 docs/docs/get-started/docker.mdx      |  1 +
 docs/docs/get-started/install-uv.mdx  |  1 +
 docs/docs/get-started/install-uvx.mdx |  1 +
 docs/docs/get-started/install.mdx     |  1 +
 docs/docs/support/troubleshoot.mdx    |  8 +++++++-
 8 files changed, 37 insertions(+), 9 deletions(-)
 create mode 100644 docs/docs/_partial-ollama-models.mdx
diff --git a/docs/docs/_partial-ollama-models.mdx b/docs/docs/_partial-ollama-models.mdx
new file mode 100644
index 00000000..d9f90918
--- /dev/null
+++ b/docs/docs/_partial-ollama-models.mdx
@@ -0,0 +1,13 @@
+OpenRAG isn't guaranteed to be compatible with all models that are available through Ollama.
+For example, some models might produce unexpected results, such as JSON-formatted output instead of natural language responses, and some models aren't appropriate for the types of tasks that OpenRAG performs, such as those that generate media.
+
+The OpenRAG team recommends the following models when using Ollama as your model provider:
+
+* **Language models**: `gpt-oss:20b` or `mistral-nemo:12b`.
+
+   If you choose `gpt-oss:20b`, consider using Ollama Cloud or running Ollama on a remote machine because this model requires at least 16GB of RAM.
+
+* **Embedding models**: [`nomic-embed-text:latest`](https://ollama.com/library/nomic-embed-text), `mxbai-embed-large:latest`, or `embeddinggemma:latest`.
+
+You can experiment with other models, but if you encounter issues that you are unable to resolve through other RAG best practices (like context filters and prompt engineering), try switching to one of the recommended models.
+You can submit an [OpenRAG GitHub issue](https://github.com/langflow-ai/openrag/issues) to request support for specific models.
\ No newline at end of file
diff --git a/docs/docs/_partial-onboarding.mdx b/docs/docs/_partial-onboarding.mdx
index ce1f1939..c4eee73e 100644
--- a/docs/docs/_partial-onboarding.mdx
+++ b/docs/docs/_partial-onboarding.mdx
@@ -1,6 +1,7 @@
 import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
 
 ## Complete the application onboarding process {#application-onboarding}
 
@@ -68,16 +69,14 @@ The overview demonstrates some basic functionality that is covered in the [quick
 </TabItem>
 <TabItem value="Ollama" label="Ollama">
 
+Using Ollama as your language and embedding model provider offers greater flexibility and configuration options for hosting models.
+However, it requires additional setup because Ollama isn't included with OpenRAG.
+You must deploy Ollama separately if you want to use Ollama as a model provider.
+
 :::info
-Ollama isn't installed with OpenRAG. You must install it separately if you want to use Ollama as a model provider.
+<PartialOllamaModels />
 :::
 
-Using Ollama as your language and embedding model provider offers greater flexibility and configuration options for hosting models, but it can be advanced for new users.
-The recommendations given here are a reasonable starting point for users with at least one GPU and experience running LLMs locally.
-
-The OpenRAG team recommends the OpenAI `gpt-oss:20b` lanuage model and the [`nomic-embed-text`](https://ollama.com/library/nomic-embed-text) embedding model.
-However, `gpt-oss:20b` uses 16GB of RAM, so consider using Ollama Cloud or running Ollama on a remote machine.
-
 1. [Install Ollama locally or on a remote server](https://docs.ollama.com/), or [run models in Ollama Cloud](https://docs.ollama.com/cloud).
 
    If you are running a remote server, it must be accessible from your OpenRAG deployment.
diff --git a/docs/docs/_partial-prereq-common.mdx b/docs/docs/_partial-prereq-common.mdx
index 66374fcc..4682d8cc 100644
--- a/docs/docs/_partial-prereq-common.mdx
+++ b/docs/docs/_partial-prereq-common.mdx
@@ -1,3 +1,5 @@
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
+
 * Gather the credentials and connection details for your preferred model providers.
 You must have access to at least one language model and one embedding model.
 If a provider offers both types, you can use the same provider for both models.
@@ -7,6 +9,10 @@ If a provider offers only one type, you must select two providers.
    * **Anthropic**: Create an [Anthropic API key](https://www.anthropic.com/docs/api/reference).
    Anthropic provides language models only; you must select an additional provider for embeddings.
    * **IBM watsonx.ai**: Get your watsonx.ai API endpoint, IBM project ID, and IBM API key from your watsonx deployment.
-   * **Ollama**: Deploy an [Ollama instance and models](https://docs.ollama.com/) locally, in the cloud, or on a remote server, and then get your Ollama server's base URL and the names of the models that you want to use.
+   * **Ollama**: Deploy an [Ollama instance and models](https://docs.ollama.com/) locally, in the cloud, or on a remote server. Then, get your Ollama server's base URL and the names of the models that you want to use.
+
+      :::info
+      <PartialOllamaModels />
+      :::
 
 * Optional: Install GPU support with an NVIDIA GPU, [CUDA](https://docs.nvidia.com/cuda/) support, and compatible NVIDIA drivers on the OpenRAG host machine. If you don't have GPU capabilities, OpenRAG provides an alternate CPU-only deployment.
\ No newline at end of file
diff --git a/docs/docs/get-started/docker.mdx b/docs/docs/get-started/docker.mdx
index 9eca74f1..575410f2 100644
--- a/docs/docs/get-started/docker.mdx
+++ b/docs/docs/get-started/docker.mdx
@@ -11,6 +11,7 @@ import PartialPrereqNoScript from '@site/docs/_partial-prereq-no-script.mdx';
 import PartialPrereqWindows from '@site/docs/_partial-prereq-windows.mdx';
 import PartialPrereqPython from '@site/docs/_partial-prereq-python.mdx';
 import PartialInstallNextSteps from '@site/docs/_partial-install-next-steps.mdx';
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
 
 To manage your own OpenRAG services, deploy OpenRAG with Docker or Podman.
 
diff --git a/docs/docs/get-started/install-uv.mdx b/docs/docs/get-started/install-uv.mdx
index 9a73546e..d99169b6 100644
--- a/docs/docs/get-started/install-uv.mdx
+++ b/docs/docs/get-started/install-uv.mdx
@@ -13,6 +13,7 @@ import PartialPrereqWindows from '@site/docs/_partial-prereq-windows.mdx';
 import PartialPrereqPython from '@site/docs/_partial-prereq-python.mdx';
 import PartialInstallNextSteps from '@site/docs/_partial-install-next-steps.mdx';
 import PartialOpenSearchAuthMode from '@site/docs/_partial-opensearch-auth-mode.mdx';
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
 
 Use [`uv`](https://docs.astral.sh/uv/getting-started/installation/) to install OpenRAG as a managed or unmanaged dependency in a new or existing Python project.
 
diff --git a/docs/docs/get-started/install-uvx.mdx b/docs/docs/get-started/install-uvx.mdx
index a165191f..a592de25 100644
--- a/docs/docs/get-started/install-uvx.mdx
+++ b/docs/docs/get-started/install-uvx.mdx
@@ -13,6 +13,7 @@ import PartialPrereqWindows from '@site/docs/_partial-prereq-windows.mdx';
 import PartialPrereqPython from '@site/docs/_partial-prereq-python.mdx';
 import PartialInstallNextSteps from '@site/docs/_partial-install-next-steps.mdx';
 import PartialOpenSearchAuthMode from '@site/docs/_partial-opensearch-auth-mode.mdx';
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
 
 Use [`uvx`](https://docs.astral.sh/uv/guides/tools/#running-tools) to invoke OpenRAG outside of a Python project or without modifying your project's dependencies.
 
diff --git a/docs/docs/get-started/install.mdx b/docs/docs/get-started/install.mdx
index d044e48e..2fe97b30 100644
--- a/docs/docs/get-started/install.mdx
+++ b/docs/docs/get-started/install.mdx
@@ -12,6 +12,7 @@ import PartialPrereqWindows from '@site/docs/_partial-prereq-windows.mdx';
 import PartialPrereqPython from '@site/docs/_partial-prereq-python.mdx';
 import PartialInstallNextSteps from '@site/docs/_partial-install-next-steps.mdx';
 import PartialOpenSearchAuthMode from '@site/docs/_partial-opensearch-auth-mode.mdx';
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
 
 :::tip
 To quickly install and test OpenRAG's core features, try the [quickstart](/quickstart).
diff --git a/docs/docs/support/troubleshoot.mdx b/docs/docs/support/troubleshoot.mdx
index 958fa102..c0fc6c3d 100644
--- a/docs/docs/support/troubleshoot.mdx
+++ b/docs/docs/support/troubleshoot.mdx
@@ -3,6 +3,8 @@ title: Troubleshoot OpenRAG
 slug: /support/troubleshoot
 ---
 
+import PartialOllamaModels from '@site/docs/_partial-ollama-models.mdx';
+
 This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
 
 ## OpenSearch fails to start
@@ -137,4 +139,8 @@ To resolve this issue, do the following:
 
 ## Document ingestion or similarity search issues
 
-See [Troubleshoot ingestion](/ingestion#troubleshoot-ingestion).
\ No newline at end of file
+See [Troubleshoot ingestion](/ingestion#troubleshoot-ingestion).
+
+## Ollama model issues
+
+<PartialOllamaModels />
\ No newline at end of file