openrag/knowledge/index.html
2025-12-02 22:07:28 +00:00

126 lines
No EOL
33 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-core-components/knowledge" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v3.9.2">
<title data-rh="true">Configure knowledge | OpenRAG</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="robots" content="noindex, nofollow"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://docs.openr.ag/knowledge"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Configure knowledge | OpenRAG"><meta data-rh="true" name="description" content="OpenRAG includes a built-in OpenSearch instance that serves as the underlying datastore for your knowledge (documents)."><meta data-rh="true" property="og:description" content="OpenRAG includes a built-in OpenSearch instance that serves as the underlying datastore for your knowledge (documents)."><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://docs.openr.ag/knowledge"><link data-rh="true" rel="alternate" href="https://docs.openr.ag/knowledge" hreflang="en"><link data-rh="true" rel="alternate" href="https://docs.openr.ag/knowledge" hreflang="x-default"><script data-rh="true" type="application/ld+json">{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Configure knowledge","item":"https://docs.openr.ag/knowledge"}]}</script><link rel="stylesheet" href="/assets/css/styles.66669ecc.css">
<script src="/assets/js/runtime~main.f46e5be3.js" defer="defer"></script>
<script src="/assets/js/main.86159c47.js" defer="defer"></script>
</head>
<body class="navigation-with-keyboard">
<svg style="display: none;"><defs>
<symbol id="theme-svg-external-link" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol>
</defs></svg>
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||"light"),document.documentElement.setAttribute("data-theme-choice",t||"light")}(),function(){try{const c=new URLSearchParams(window.location.search).entries();for(var[t,e]of c)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><link rel="preload" as="image" href="/img/logo-openrag-light.svg"><link rel="preload" as="image" href="/img/logo-openrag-dark.svg"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="theme-layout-navbar navbar navbar--fixed-top"><div class="navbar__inner"><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/logo-openrag-light.svg" alt="OpenRAG Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src="/img/logo-openrag-dark.svg" alt="OpenRAG Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div></a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href="https://github.com/langflow-ai/openrag" target="_blank" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill="currentColor" d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class="docsWrapper_hBAB"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docRoot_UBD9"><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class="sidebarViewport_aRkj"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/"><span title="About OpenRAG" class="linkLabel_WmDU">About OpenRAG</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/quickstart"><span title="Quickstart" class="linkLabel_WmDU">Quickstart</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/install"><span title="Install OpenRAG with TUI" class="linkLabel_WmDU">Install OpenRAG with TUI</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docker"><span title="Install OpenRAG containers" class="linkLabel_WmDU">Install OpenRAG containers</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/agents"><span title="Flows" class="linkLabel_WmDU">Flows</span></a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role="button" aria-expanded="true" href="/knowledge"><span title="Knowledge" class="categoryLinkLabel_W154">Knowledge</span></a></div><ul class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/knowledge"><span title="Configure knowledge" class="linkLabel_WmDU">Configure knowledge</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/ingestion"><span title="Ingest knowledge" class="linkLabel_WmDU">Ingest knowledge</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/knowledge-filters"><span title="Filter knowledge" class="linkLabel_WmDU">Filter knowledge</span></a></li></ul></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/chat"><span title="Chat" class="linkLabel_WmDU">Chat</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/reference/configuration"><span title="Environment variables" class="linkLabel_WmDU">Environment variables</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/support/troubleshoot"><span title="Troubleshoot OpenRAG" class="linkLabel_WmDU">Troubleshoot OpenRAG</span></a></li></ul></nav></div></div></aside><main class="docMainContainer_TBSr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li class="breadcrumbs__item"><span class="breadcrumbs__link">Knowledge</span></li><li class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link">Configure knowledge</span></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Configure knowledge</h1></header><p>OpenRAG includes a built-in <a href="https://docs.opensearch.org/latest/" target="_blank" rel="noopener noreferrer" class="">OpenSearch</a> instance that serves as the underlying datastore for your <em>knowledge</em> (documents).
This specialized database is used to store and retrieve your documents and the associated vector data (embeddings).</p>
<p>The documents in your OpenSearch knowledge base provide specialized context in addition to the general knowledge available to the language model that you select when you <a class="" href="/install">install OpenRAG</a> or <a class="" href="/agents">edit a flow</a>.</p>
<p>You can <a class="" href="/ingestion">upload documents</a> from a variety of sources to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.
Documents are processed through OpenRAG&#x27;s knowledge ingestion flows with Docling.</p>
<p>Then, the <a class="" href="/chat">OpenRAG <strong>Chat</strong></a> can run <a href="https://www.ibm.com/think/topics/vector-search" target="_blank" rel="noopener noreferrer" class="">similarity searches</a> against your OpenSearch database to retrieve relevant information and generate context-aware responses.</p>
<p>You can configure how documents are ingested and how the <strong>Chat</strong> interacts with your knowledge base.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="browse-knowledge">Browse knowledge<a href="#browse-knowledge" class="hash-link" aria-label="Direct link to Browse knowledge" title="Direct link to Browse knowledge" translate="no"></a></h2>
<p>The <strong>Knowledge</strong> page lists the documents OpenRAG has ingested into your OpenSearch database, specifically in an <a href="https://docs.opensearch.org/latest/getting-started/intro/#index" target="_blank" rel="noopener noreferrer" class="">OpenSearch index</a> named <code>documents</code>.</p>
<p>To explore the raw contents of your knowledge base, click <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-library" aria-hidden="true"><path d="m16 6 4 14"></path><path d="M12 6v14"></path><path d="M8 8v12"></path><path d="M4 4v16"></path></svg> <strong>Knowledge</strong> to get a list of all ingested documents.
Click a document to view the chunks produced from splitting the document during ingestion.</p>
<p>OpenRAG includes some initial documents about OpenRAG. You can use these documents to ask OpenRAG about itself, and to test the <a class="" href="/chat"><strong>Chat</strong></a> feature before uploading your own documents.
If you <a href="#delete-knowledge" class="">delete these documents</a>, you won&#x27;t be able to ask OpenRAG about itself and it&#x27;s own functionality.
It is recommended that you keep these documents, and use <a class="" href="/knowledge-filters">filters</a> to separate them from your other knowledge.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="auth">OpenSearch authentication and document access<a href="#auth" class="hash-link" aria-label="Direct link to OpenSearch authentication and document access" title="Direct link to OpenSearch authentication and document access" translate="no"></a></h2>
<p>When you <a class="" href="/install">install OpenRAG</a>, you can choose between two setup modes: <strong>Basic Setup</strong> and <strong>Advanced Setup</strong>.
The mode you choose determines how OpenRAG authenticates with OpenSearch and controls access to documents:</p>
<ul>
<li class="">
<p><strong>Basic Setup (no-auth mode)</strong>: If you choose <strong>Basic Setup</strong>, then OpenRAG is installed in no-auth mode.
This mode uses one, anonymous JWT token for OpenSearch authentication.
There is no differentiation between users.
All users that access your OpenRAG instance can access all documents uploaded to your OpenSearch knowledge base.</p>
</li>
<li class="">
<p><strong>Advanced Setup (OAuth mode)</strong>: If you choose <strong>Advanced Setup</strong>, then OpenRAG is installed in OAuth mode.
This mode uses a unique JWT token for each OpenRAG user, and each document is tagged with user ownership. Documents are filtered by user owner.
This means users see only the documents that they uploaded or have access to.</p>
</li>
</ul>
<p>You can enable OAuth mode after installation.
For more information, see <a class="" href="/ingestion#oauth-ingestion">Ingest files with OAuth connectors</a>.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="opensearch-indexes">OpenSearch indexes<a href="#opensearch-indexes" class="hash-link" aria-label="Direct link to OpenSearch indexes" title="Direct link to OpenSearch indexes" translate="no"></a></h2>
<p>An <a href="https://docs.opensearch.org/latest/getting-started/intro/#index" target="_blank" rel="noopener noreferrer" class="">OpenSearch index</a> is a collection of documents in an OpenSearch database.</p>
<p>By default, all documents you upload to your OpenRAG knowledge base are stored in an index named <code>documents</code>.</p>
<p>It is possible to change the index name by <a class="" href="/agents#inspect-and-modify-flows">editing the ingestion flow</a>.
However, this can impact dependent processes, such as the <a class="" href="/knowledge-filters">filters</a> and <a class="" href="/chat"><strong>Chat</strong></a> flow, that reference the <code>documents</code> index by default.
Make sure you edit other flows as needed to ensure all processes use the same index name.</p>
<p>If you encounter errors or unexpected behavior after changing the index name, you can <a class="" href="/agents#revert-a-built-in-flow-to-its-original-configuration">revert the flows to their original configuration</a>, or <a class="" href="/knowledge#delete-knowledge">delete knowledge</a> to clear the existing documents from your knowledge base.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="knowledge-ingestion-settings">Knowledge ingestion settings<a href="#knowledge-ingestion-settings" class="hash-link" aria-label="Direct link to Knowledge ingestion settings" title="Direct link to Knowledge ingestion settings" translate="no"></a></h2>
<div class="theme-admonition theme-admonition-warning admonition_xJq3 alert alert--warning"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"></path></svg></span>warning</div><div class="admonitionContent_BuS1"><p>Knowledge ingestion settings apply to documents you upload after making the changes.
Documents uploaded before changing these settings aren&#x27;t reprocessed.</p></div></div>
<p>After changing knowledge ingestion settings, you must determine if you need to reupload any documents to be consistent with the new settings.</p>
<p>It isn&#x27;t always necessary to reupload documents after changing knowledge ingestion settings.
For example, it is typical to upload some documents with OCR enabled and others without OCR enabled.</p>
<p>If needed, you can use <a class="" href="/knowledge-filters">filters</a> to separate documents that you uploaded with different settings, such as different embedding models.</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="set-the-embedding-model-and-dimensions">Set the embedding model and dimensions<a href="#set-the-embedding-model-and-dimensions" class="hash-link" aria-label="Direct link to Set the embedding model and dimensions" title="Direct link to Set the embedding model and dimensions" translate="no"></a></h3>
<p>When you <a class="" href="/install">install OpenRAG</a>, you select at least one embedding model during <a class="" href="/install#application-onboarding">application onboarding</a>.
OpenRAG automatically detects and configures the appropriate vector dimensions for your selected embedding model, ensuring optimal search performance and compatibility.</p>
<p>In the OpenRAG repository, you can find the complete list of supported models in <a href="https://github.com/langflow-ai/openrag/blob/main/src/services/models_service.py" target="_blank" rel="noopener noreferrer" class=""><code>models_service.py</code></a> and the corresponding vector dimensions in <a href="https://github.com/langflow-ai/openrag/blob/main/src/config/settings.py" target="_blank" rel="noopener noreferrer" class=""><code>settings.py</code></a>.</p>
<p>During application onboarding, you can select from the supported models.
The default embedding dimension is <code>1536</code>, and the default model is the OpenAI <code>text-embedding-3-small</code>.</p>
<p>If you want to use an unsupported model, you must manually set the model in your <a class="" href="/reference/configuration">OpenRAG configuration</a>.
If you use an unsupported embedding model that doesn&#x27;t have defined dimensions in <code>settings.py</code>, then OpenRAG falls back to the default dimensions (1536) and logs a warning. OpenRAG&#x27;s OpenSearch instance and flows continue to work, but <a href="https://www.ibm.com/think/topics/vector-search" target="_blank" rel="noopener noreferrer" class="">similarity search</a> quality can be affected if the actual model dimensions aren&#x27;t 1536.</p>
<p>To change the embedding model after onboarding, it is recommended that you modify the embedding model setting in the OpenRAG <strong>Settings</strong> page or in your <a class="" href="/reference/configuration">OpenRAG configuration</a>.
This will automatically update all relevant <a class="" href="/agents">OpenRAG flows</a> to use the new embedding model configuration.</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="set-docling-parameters">Set Docling parameters<a href="#set-docling-parameters" class="hash-link" aria-label="Direct link to Set Docling parameters" title="Direct link to Set Docling parameters" translate="no"></a></h3>
<p>OpenRAG uses <a href="https://docling-project.github.io/docling/" target="_blank" rel="noopener noreferrer" class="">Docling</a> for document ingestion because it supports many file formats, processes tables and images well, and performs efficiently.</p>
<p>When you <a class="" href="/ingestion">upload documents</a>, Docling processes the files, splits them into chunks, and stores them as separate, structured documents in your OpenSearch knowledge base.</p>
<p>You can use either Docling Serve or OpenRAG&#x27;s built-in Docling ingestion pipeline to process documents.</p>
<div class="theme-tabs-container tabs-container tabList__CuJ"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_LNqP tabs__item--active">Docling Serve ingestion</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_LNqP">Built-in Docling ingestion</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_Ymn6"><p>By default, OpenRAG uses <a href="https://github.com/docling-project/docling-serve" target="_blank" rel="noopener noreferrer" class="">Docling Serve</a>.
This means that OpenRAG starts a <code>docling serve</code> process on your local machine and runs Docling ingestion through an API service.</p></div><div role="tabpanel" class="tabItem_Ymn6" hidden=""><p>If you want to use OpenRAG&#x27;s built-in Docling ingestion pipeline instead of the separate Docling Serve service, set <code>DISABLE_INGEST_WITH_LANGFLOW=true</code> in your <a class="" href="/reference/configuration#document-processing">OpenRAG environment variables</a>.</p><p>The built-in pipeline uses the Docling processor directly instead of through the Docling Serve API.</p><p>For the underlying functionality, see <a href="https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58" target="_blank" rel="noopener noreferrer" class=""><code>processors.py</code></a> in the OpenRAG repository.</p></div></div></div>
<p>To modify the Docling ingestion and embedding parameters, click <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-settings2 lucide-settings-2" aria-hidden="true"><path d="M14 17H5"></path><path d="M19 7h-9"></path><circle cx="17" cy="17" r="3"></circle><circle cx="7" cy="7" r="3"></circle></svg> <strong>Settings</strong> in the OpenRAG user interface.</p>
<div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tip</div><div class="admonitionContent_BuS1"><p>OpenRAG warns you if <code>docling serve</code> isn&#x27;t running.
You can <a class="" href="/install#tui-container-management">start and stop OpenRAG services</a> from the TUI main menu with <strong>Start Native Services</strong> or <strong>Stop Native Services</strong>.</p></div></div>
<ul>
<li class="">
<p><strong>Embedding model</strong>: Select the model to use to generate vector embeddings for your documents.</p>
<p>This is initially set during installation.
The recommended way to change this setting is in the OpenRAG <strong>Settings</strong> or your <a class="" href="/reference/configuration">OpenRAG configuration</a>.
This will automatically update all relevant <a class="" href="/agents">OpenRAG flows</a> to use the new embedding model configuration.</p>
<p>If you uploaded documents prior to changing the embedding model, you can <a class="" href="/knowledge-filters">create filters</a> to separate documents embedded with different models, or you can reupload all documents to regenerate embeddings with the new model.
If you want to use multiple embeddings models, similarity search (in the <strong>Chat</strong>) can take longer as it searching each model&#x27;s embeddings separately.</p>
</li>
<li class="">
<p><strong>Chunk size</strong>: Set the number of characters for each text chunk when breaking down a file.
Larger chunks yield more context per chunk, but can include irrelevant information. Smaller chunks yield more precise semantic search, but can lack context.
The default value is 1000 characters, which is usually a good balance between context and precision.</p>
</li>
<li class="">
<p><strong>Chunk overlap</strong>: Set the number of characters to overlap over chunk boundaries.
Use larger overlap values for documents where context is most important. Use smaller overlap values for simpler documents or when optimization is most important.
The default value is 200 characters, which represents an overlap of 20 percent if the <strong>Chunk size</strong> is 1000. This is suitable for general use. For faster processing, decrease the overlap to approximately 10 percent. For more complex documents where you need to preserve context across chunks, increase it to approximately 40 percent.</p>
</li>
<li class="">
<p><strong>Table Structure</strong>: Enables Docling&#x27;s <a href="https://docling-project.github.io/docling/reference/document_converter/" target="_blank" rel="noopener noreferrer" class=""><code>DocumentConverter</code></a> tool for parsing tables. Instead of treating tables as plain text, tables are output as structured table data with preserved relationships and metadata. This option is enabled by default.</p>
</li>
<li class="">
<p><strong>OCR</strong>: Enables Optical Character Recognition (OCR) processing when extracting text from images and ingesting scanned documents. This setting is best suited for processing text-based documents faster with Docling&#x27;s <a href="https://docling-project.github.io/docling/reference/document_converter/" target="_blank" rel="noopener noreferrer" class=""><code>DocumentConverter</code></a>. Images are ignored and not processed.</p>
<p>This option is disabled by default. Enabling OCR can slow ingestion performance.</p>
<p>If OpenRAG detects that the local machine is running on macOS, OpenRAG uses the <a href="https://www.piwheels.org/project/ocrmac/" target="_blank" rel="noopener noreferrer" class="">ocrmac</a> OCR engine. Other platforms use <a href="https://www.jaided.ai/easyocr/" target="_blank" rel="noopener noreferrer" class="">easyocr</a>.</p>
</li>
<li class="">
<p><strong>Picture descriptions</strong>: Only applicable if <strong>OCR</strong> is enabled. Adds image descriptions generated by the <a href="https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct" target="_blank" rel="noopener noreferrer" class=""><code>SmolVLM-256M-Instruct</code></a> model. Enabling picture descriptions can slow ingestion performance.</p>
</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="set-the-local-documents-path">Set the local documents path<a href="#set-the-local-documents-path" class="hash-link" aria-label="Direct link to Set the local documents path" title="Direct link to Set the local documents path" translate="no"></a></h3>
<p>The default path for local uploads is the <code>./openrag-documents</code> subdirectory in your OpenRAG installation directory. This is mounted to the <code>/app/openrag-documents/</code> directory inside the OpenRAG container. Files added to the host or container directory are visible in both locations.</p>
<p>To change this location, modify the <strong>Documents Paths</strong> variable in either the <a class="" href="/install#setup"><strong>Advanced Setup</strong> menu</a> or in the <code>.env</code> used by Docker Compose.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="delete-knowledge">Delete knowledge<a href="#delete-knowledge" class="hash-link" aria-label="Direct link to Delete knowledge" title="Direct link to Delete knowledge" translate="no"></a></h2>
<p>To clear your entire knowledge base, delete the contents of the <code>./opensearch-data</code> folder in your OpenRAG installation directory.
This is a destructive operation that cannot be undone.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="see-also">See also<a href="#see-also" class="hash-link" aria-label="Direct link to See also" title="Direct link to See also" translate="no"></a></h2>
<ul>
<li class=""><a class="" href="/ingestion">Ingest knowledge</a></li>
<li class=""><a class="" href="/knowledge-filters">Filter knowledge</a></li>
<li class=""><a class="" href="/chat">Chat with knowledge</a></li>
<li class=""><a class="" href="/agents#inspect-and-modify-flows">Inspect and modify flows</a></li>
</ul></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href="https://github.com/openrag/openrag/tree/main/docs/docs/core-components/knowledge.mdx" target="_blank" rel="noopener noreferrer" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/agents"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Flows</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/ingestion"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Ingest knowledge</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#browse-knowledge" class="table-of-contents__link toc-highlight">Browse knowledge</a></li><li><a href="#auth" class="table-of-contents__link toc-highlight">OpenSearch authentication and document access</a></li><li><a href="#opensearch-indexes" class="table-of-contents__link toc-highlight">OpenSearch indexes</a></li><li><a href="#knowledge-ingestion-settings" class="table-of-contents__link toc-highlight">Knowledge ingestion settings</a><ul><li><a href="#set-the-embedding-model-and-dimensions" class="table-of-contents__link toc-highlight">Set the embedding model and dimensions</a></li><li><a href="#set-docling-parameters" class="table-of-contents__link toc-highlight">Set Docling parameters</a></li><li><a href="#set-the-local-documents-path" class="table-of-contents__link toc-highlight">Set the local documents path</a></li></ul></li><li><a href="#delete-knowledge" class="table-of-contents__link toc-highlight">Delete knowledge</a></li><li><a href="#see-also" class="table-of-contents__link toc-highlight">See also</a></li></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class="footer__title"></div><ul class="footer__items clean-list"><li class="footer__item"><div class="footer-links">
<span>© 2025 OpenRAG</span>
</div></li></ul></div></div></div></footer></div>
</body>
</html>