From ae8638b0719e0ee178104e20bca780651f74b553 Mon Sep 17 00:00:00 2001
From: April M <36110273+aimurphy@users.noreply.github.com>
Date: Wed, 26 Nov 2025 09:10:32 -0800
Subject: [PATCH] working on knowledge pages still
---
docs/docs/_partial-temp-knowledge.mdx | 5 ++
docs/docs/core-components/agents.mdx | 2 +-
docs/docs/core-components/chat.mdx | 5 ++
docs/docs/core-components/ingestion.mdx | 108 ++++++++++++------------
docs/docs/core-components/knowledge.mdx | 8 +-
5 files changed, 68 insertions(+), 60 deletions(-)
create mode 100644 docs/docs/_partial-temp-knowledge.mdx
diff --git a/docs/docs/_partial-temp-knowledge.mdx b/docs/docs/_partial-temp-knowledge.mdx
new file mode 100644
index 00000000..7ecdf99c
--- /dev/null
+++ b/docs/docs/_partial-temp-knowledge.mdx
@@ -0,0 +1,5 @@
+import Icon from "@site/src/components/icon/icon";
+
+When using the OpenRAG **Chat**, click in the chat input field to upload a file to the current chat session.
+Files added this way are processed and made available to the agent for the current conversation only.
+These files aren't stored in the knowledge base permanently.
\ No newline at end of file
diff --git a/docs/docs/core-components/agents.mdx b/docs/docs/core-components/agents.mdx
index 97f48158..8def9af5 100644
--- a/docs/docs/core-components/agents.mdx
+++ b/docs/docs/core-components/agents.mdx
@@ -13,7 +13,7 @@ In a flow, the individual workflow steps are represented by [_components_](https
OpenRAG includes several built-in flows:
* The [**OpenRAG OpenSearch Agent** flow](/chat#flow) powers the **Chat** feature in OpenRAG.
-* The [**OpenSearch Ingestion** and **OpenSearch URL Ingestion** flows](/ingestion#knowledge-ingestion-flows) process documents and web content for storage in your OpenSearch knowledge base.
+* The [**OpenSearch Ingestion** and **OpenSearch URL Ingestion** flows](/ingestion) process documents and web content for storage in your OpenSearch knowledge base.
* The [**OpenRAG OpenSearch Nudges** flow](/chat#nudges) provides optional contextual suggestions in the OpenRAG **Chat**.
You can customize these flows and create your own flows using OpenRAG's embedded Langflow visual editor.
diff --git a/docs/docs/core-components/chat.mdx b/docs/docs/core-components/chat.mdx
index def7d8ba..728f3fb6 100644
--- a/docs/docs/core-components/chat.mdx
+++ b/docs/docs/core-components/chat.mdx
@@ -7,6 +7,7 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialIntegrateChat from '@site/docs/_partial-integrate-chat.mdx';
+import PartialTempKnowledge from '@site/docs/_partial-temp-knowledge.mdx';
After you [upload documents to your knowledge base](/ingestion), you can use the OpenRAG **Chat** feature to interact with your knowledge through natural language queries.
@@ -71,6 +72,10 @@ Click a nudge to accept it and provide the nudge's context to the OpenRAG **Chat
Like OpenRAG's other built-in flows, you can [inspect the flow in Langflow](/agents#inspect-and-modify-flows), and you can customize it if you want to change the nudge behavior.
+## Upload documents to the chat
+
+
+
## Inspect tool calls and knowledge
During the chat, you'll see information about the agent's process. For more detail, you can inspect individual tool calls. This is helpful for troubleshooting because it shows you how the agent used particular tools. For example, click **Function Call: search_documents (tool_call)** to view the log of tool calls made by the agent to the **OpenSearch** component.
diff --git a/docs/docs/core-components/ingestion.mdx b/docs/docs/core-components/ingestion.mdx
index 4f00be00..cb63a02f 100644
--- a/docs/docs/core-components/ingestion.mdx
+++ b/docs/docs/core-components/ingestion.mdx
@@ -6,57 +6,34 @@ slug: /ingestion
import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
+import PartialTempKnowledge from '@site/docs/_partial-temp-knowledge.mdx';
-The documents in your OpenRAG [OpenSearch knowledge base](/knowledge) provide specialized context in addition to the general knowledge available to the language model that you select when you [install OpenRAG](/install).
-Upload documents to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.
-Then, the [OpenRAG **Chat**](/chat) can retrieve relevant content from your knowledge base to provide context-aware responses.
+Upload documents to your [OpenRAG OpenSearch instance](/knowledge) to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.
+Documents are processed through OpenRAG's knowledge ingestion flows with Docling.
-
-
-
- To verify the agent's response, click **Knowledge** to view the documents stored in the OpenRAG OpenSearch vector database.
-You can click a document to view the chunks of the document as they are stored in the database.
-
-4. Click **Add Knowledge** to add your own documents to your OpenRAG knowledge base.
-
- For this quickstart, use either the **File** or **Folder** upload options to load documents from your local machine.
- **Folder** uploads an entire directory.
- The default directory is the `/documents` subdirectory in your OpenRAG installation directory.
-
- For information about the cloud storage provider options, see [Ingest files through OAuth connectors](/ingestion#oauth-ingestion).
-
-
-
-
-
-
-
-## ingest
-
-OpenRAG can ingest knowledge from direct file uploads, URLs, and OAuth connectors.
-
-Knowledge ingestion is powered by OpenRAG's built-in [knowledge ingestion flows](/ingestion#knowledge-ingestion-flows) that use Docling Serve to process documents before storing the documents in your OpenSearch database.
+OpenRAG can ingest knowledge from direct file uploads, URLs, and OAuth authenticated connections.
+Knowledge ingestion is powered by OpenRAG's built-in knowledge ingestion flows that use Docling to process documents before storing the documents in your OpenSearch database.
During ingestion, documents are broken into smaller chunks of content that are then embedded using your selected [embedding model](/knowledge#set-the-embedding-model-and-dimensions).
-The chunks, embeddings, and associated metadata (which connects chunks of the same document) are stored in your OpenSearch database.
+Then, the chunks, embeddings, and associated metadata (which connects chunks of the same document) are stored in your OpenSearch database.
-Like all [OpenRAG flows](/agents), you can [inspect the flows in Langflow](/agents#inspect-and-modify-flows), and you can customize them if you want to change the [ingestion settings](/ingestion#knowledge-ingestion-settings).
+Like all [OpenRAG flows](/agents), you can [inspect the flows in Langflow](/agents#inspect-and-modify-flows), and you can customize them if you want to change the knowledge ingestion settings.
-## Ingest local files and folders {#knowledge-ingestion-flows}
+## Ingest local files and folders
-
+You can upload files and folders from your local machine to your knowledge base. When you do this, the **OpenSearch Ingestion** flow runs in the background.
-The **OpenSearch Ingestion** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database.
+1. Click **Knowledge** to view your OpenSearch knowledge base.
-The default path to your local folder is mounted from the `./documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
+2. Click **Add Knowledge** to add your own documents to your OpenRAG knowledge base.
-To load and process a single file from the mapped location, click **Add Knowledge**, and then click **File**.
-The file is loaded into your OpenSearch database, and appears in the Knowledge page.
+3. To upload one file, click **File**. To upload all documents in a folder, click **Folder**.
-To load and process a directory from the mapped location, click **Add Knowledge**, and then click **Folder**.
-The files are loaded into your OpenSearch database, and appear in the Knowledge page.
+ The default path for either **File** or **Folder** uploads is the `/documents` subdirectory in your OpenRAG installation directory.
-To add files directly to a chat session, click in the chat input and select the files you want to include. Files added this way are processed and made available to the agent for the current conversation, and are not permanently added to the knowledge base.
+### Ingest local files temporarily
+
+
### OpenSearch Ingestion flow
@@ -66,6 +43,8 @@ The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in Ope
If you [inspect the flow in Langflow](/agents#inspect-and-modify-flows), you'll see that it is comprised of ten components that work together to process and store documents in your knowledge base:
+* The **OpenSearch Ingestion** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database.
+The default path to your local folder is mounted from the `./documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
@@ -152,10 +131,11 @@ For each active task, depending on its state, you can find the task ID, start ti
* **Processing**: The task is performing ingestion operations.
* **Failed**: Something went wrong during ingestion, or the task was manually canceled.
+For troubleshooting advice, see [Troubleshoot ingestion](#troubleshoot-ingestion).
To stop an active task, click **Cancel**. Canceling a task stops processing immediately and marks the task as **Failed**.
-## Troubleshoot ingestion (#troubleshoot-ingestion)
+## Troubleshoot ingestion {#troubleshoot-ingestion}
If an ingestion task fails, do the following:
@@ -176,20 +156,24 @@ For example:
* Make sure scanned documents are legible enough for extraction, and enable the **OCR** option. Poorly scanned documents might require additional preparation or rescanning before ingestion.
* Adjust the **Chunk Size** and **Chunk Overlap** settings to better suit your documents. Larger chunks provide more context but can include irrelevant information, while smaller chunks yield more precise semantic search but can lack context.
-For more information about modifying ingestion parameters and flows, see [Docling Serve for knowledge ingestion](/knowledge#docling-serve-for-knowledge-ingestion).
+For more information about modifying ingestion parameters and flows, see [Knowledge ingestion settings](#knowledge-ingestion-settings).
-## Docling Serve for knowledge ingestion {#docling-serve-for-knowledge-ingestion}
-
-
+## Knowledge ingestion settings {#knowledge-ingestion-settings}
OpenRAG uses [Docling](https://docling-project.github.io/docling/) for document ingestion.
-More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling serve` process on your local machine and runs Docling ingestion through an API service.
+
+You can use either Docling Serve or OpenRAG's built-in Docling ingestion pipeline to process documents.
+
+
+
+
+When OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), it starts a `docling serve` process on your local machine and runs Docling ingestion through an API service.
Docling ingests documents from your local machine or OAuth connectors, splits them into chunks, and stores them as separate, structured documents in the OpenSearch `documents` index.
OpenRAG chose Docling for its support for a wide variety of file formats, high performance, and advanced understanding of tables and images.
-### Knowledge ingestion settings {#knowledge-ingestion-settings}
+The following knowledge ingestion settings only apply to the Docling Serve option:
To modify OpenRAG's ingestion settings, including the Docling settings and ingestion flows, **Settings**.
@@ -219,23 +203,29 @@ If OpenRAG detects that the local machine is running on macOS, OpenRAG uses the
**Picture descriptions** adds image descriptions generated by the [SmolVLM-256M-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) model to OCR processing. Enabling picture descriptions can slow ingestion performance.
-### Use OpenRAG default ingestion instead of Docling serve
+
+
-If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DISABLE_INGEST_WITH_LANGFLOW=true` in [Environment variables](/reference/configuration#document-processing).
+If you want to use OpenRAG's built-in Docling ingestion pipeline instead of the separate Docling Serve service, set `DISABLE_INGEST_WITH_LANGFLOW=true` in your [OpenRAG environment variables](/reference/configuration#document-processing).
-The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
+The built-in pipeline uses the Docling processor directly instead of through the Docling Serve API.
-For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
+For the underlying functionality, see [`processors.py`](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58) in the OpenRAG repository.
+
+
+
## Ingestion performance expectations
+The following performance test was conducted with Docling Serve.
+
On a local VM with 7 vCPUs and 8 GiB RAM, OpenRAG ingested approximately 5.03 GB across 1,083 files in about 42 minutes.
This equates to approximately 2.4 documents per second.
-You can generally expect equal or better performance on developer laptops and significantly faster on servers.
-Throughput scales with CPU cores, memory, storage speed, and configuration choices such as embedding model, chunk size and overlap, and concurrency.
+You can generally expect equal or better performance on developer laptops, and significantly faster performance on servers.
+Throughput scales with CPU cores, memory, storage speed, and configuration choices, such as the embedding model, chunk size, overlap, and concurrency.
-This test returned 12 errors (approximately 1.1 percent).
+This test returned 12 error, approximately 1.1 percent of the total files ingested.
All errors were file-specific, and they didn't stop the pipeline.
* Ingestion dataset:
@@ -247,8 +237,8 @@ All errors were file-specific, and they didn't stop the pipeline.
* Machine: Apple M4 Pro
* Podman VM:
- * Name: `podman-machine-default`
- * Type: `applehv`
+ * Name: podman-machine-default
+ * Type: applehv
* vCPUs: 7
* Memory: 8 GiB
* Disk size: 100 GiB
@@ -264,4 +254,10 @@ All errors were file-specific, and they didn't stop the pipeline.
* Elapsed time: Approximately 42 minutes 15 seconds (2,535 seconds)
-* Throughput: Approximately 2.4 documents/second
\ No newline at end of file
+* Throughput: Approximately 2.4 documents per second
+
+## See also
+
+* [Configure knowledge](/knowledge)
+* [Filter knowledge](/knowledge-filters)
+* [Chat with knowledge](/chat)
\ No newline at end of file
diff --git a/docs/docs/core-components/knowledge.mdx b/docs/docs/core-components/knowledge.mdx
index ddca2a8e..1fe9503a 100644
--- a/docs/docs/core-components/knowledge.mdx
+++ b/docs/docs/core-components/knowledge.mdx
@@ -10,10 +10,12 @@ import TabItem from '@theme/TabItem';
OpenRAG includes a built-in [OpenSearch](https://docs.opensearch.org/latest/) instance that serves as the underlying datastore for your _knowledge_ (documents).
This specialized database is used to store and retrieve your documents and the associated vector data (embeddings).
-You can [upload documents](/ingestion) from a variety of sources.
+The documents in your OpenSearch knowledge base provide specialized context in addition to the general knowledge available to the language model that you select when you [install OpenRAG](/install) or [edit a flow](/agents).
+
+You can [upload documents](/ingestion) from a variety of sources to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.
Documents are processed through OpenRAG's knowledge ingestion flows with Docling.
-The [OpenRAG **Chat**](/chat) runs [similarity searches](https://www.ibm.com/think/topics/vector-search) against your OpenSearch database to retrieve relevant information and generate context-aware responses.
+Then, the [OpenRAG **Chat**](/chat) can run [similarity searches](https://www.ibm.com/think/topics/vector-search) against your OpenSearch database to retrieve relevant information and generate context-aware responses.
You can configure how documents are ingested and how the **Chat** interacts with your knowledge base.
@@ -61,7 +63,7 @@ To change the embedding model, you must [reinstall OpenRAG](/install#reinstall).
## Set ingestion parameters
-For information about modifying ingestion parameters and flows, see [Knowledge ingestion settings](/ingestion#knowledge-ingestion-settings) and [Knowledge ingestion flows](/ingestion#knowledge-ingestion-flows).
+For information about modifying ingestion parameters and flows, see [Ingest knowledge](/ingestion).
## Delete knowledge