diff --git a/404.html b/404.html index 4c0b14a7..cec24a43 100644 --- a/404.html +++ b/404.html @@ -4,7 +4,7 @@ OpenRAG - + diff --git a/agents/index.html b/agents/index.html index 4afeb758..44e20441 100644 --- a/agents/index.html +++ b/agents/index.html @@ -4,7 +4,7 @@ Use Langflow in OpenRAG | OpenRAG - + diff --git a/assets/js/ca2c3c0c.dc47ac79.js b/assets/js/ca2c3c0c.f282f8bd.js similarity index 70% rename from assets/js/ca2c3c0c.dc47ac79.js rename to assets/js/ca2c3c0c.f282f8bd.js index 74ec875c..2e451b02 100644 --- a/assets/js/ca2c3c0c.dc47ac79.js +++ b/assets/js/ca2c3c0c.f282f8bd.js @@ -1 +1 @@ -"use strict";(globalThis.webpackChunkopenrag_docs=globalThis.webpackChunkopenrag_docs||[]).push([[6919],{1381:(e,n,s)=>{s.d(n,{Ay:()=>l,RM:()=>r});var t=s(4848),o=s(8453);const r=[];function i(e){const n={code:"code",pre:"pre",...(0,o.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Docker"',children:"docker compose up -d\n"})}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Podman"',children:"podman compose up -d\n"})})]})}function l(e={}){const{wrapper:n}={...(0,o.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(i,{...e})}):i(e)}},1470:(e,n,s)=>{s.d(n,{A:()=>A});var t=s(6540),o=s(4164),r=s(7559),i=s(3104),l=s(6347),a=s(205),c=s(7485),d=s(1682),h=s(679);function u(e){return t.Children.toArray(e).filter(e=>"\n"!==e).map(e=>{if(!e||(0,t.isValidElement)(e)&&function(e){const{props:n}=e;return!!n&&"object"==typeof n&&"value"in n}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)})?.filter(Boolean)??[]}function p(e){const{values:n,children:s}=e;return(0,t.useMemo)(()=>{const e=n??function(e){return u(e).map(({props:{value:e,label:n,attributes:s,default:t}})=>({value:e,label:n,attributes:s,default:t}))}(s);return function(e){const n=(0,d.XI)(e,(e,n)=>e.value===n.value);if(n.length>0)throw new Error(`Docusaurus error: Duplicate values "${n.map(e=>e.value).join(", ")}" found in . Every value needs to be unique.`)}(e),e},[n,s])}function g({value:e,tabValues:n}){return n.some(n=>n.value===e)}function f({queryString:e=!1,groupId:n}){const s=(0,l.W6)(),o=function({queryString:e=!1,groupId:n}){if("string"==typeof e)return e;if(!1===e)return null;if(!0===e&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:e,groupId:n});return[(0,c.aZ)(o),(0,t.useCallback)(e=>{if(!o)return;const n=new URLSearchParams(s.location.search);n.set(o,e),s.replace({...s.location,search:n.toString()})},[o,s])]}function m(e){const{defaultValue:n,queryString:s=!1,groupId:o}=e,r=p(e),[i,l]=(0,t.useState)(()=>function({defaultValue:e,tabValues:n}){if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(e){if(!g({value:e,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${e}" but none of its children has the corresponding value. Available values are: ${n.map(e=>e.value).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return e}const s=n.find(e=>e.default)??n[0];if(!s)throw new Error("Unexpected error: 0 tabValues");return s.value}({defaultValue:n,tabValues:r})),[c,d]=f({queryString:s,groupId:o}),[u,m]=function({groupId:e}){const n=function(e){return e?`docusaurus.tab.${e}`:null}(e),[s,o]=(0,h.Dv)(n);return[s,(0,t.useCallback)(e=>{n&&o.set(e)},[n,o])]}({groupId:o}),x=(()=>{const e=c??u;return g({value:e,tabValues:r})?e:null})();(0,a.A)(()=>{x&&l(x)},[x]);return{selectedValue:i,selectValue:(0,t.useCallback)(e=>{if(!g({value:e,tabValues:r}))throw new Error(`Can't select invalid tab value=${e}`);l(e),d(e),m(e)},[d,m,r]),tabValues:r}}var x=s(2303);const j={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};var w=s(4848);function b({className:e,block:n,selectedValue:s,selectValue:t,tabValues:r}){const l=[],{blockElementScrollPositionUntilNextRender:a}=(0,i.a_)(),c=e=>{const n=e.currentTarget,o=l.indexOf(n),i=r[o].value;i!==s&&(a(n),t(i))},d=e=>{let n=null;switch(e.key){case"Enter":c(e);break;case"ArrowRight":{const s=l.indexOf(e.currentTarget)+1;n=l[s]??l[0];break}case"ArrowLeft":{const s=l.indexOf(e.currentTarget)-1;n=l[s]??l[l.length-1];break}}n?.focus()};return(0,w.jsx)("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},e),children:r.map(({value:e,label:n,attributes:t})=>(0,w.jsx)("li",{role:"tab",tabIndex:s===e?0:-1,"aria-selected":s===e,ref:e=>{l.push(e)},onKeyDown:d,onClick:c,...t,className:(0,o.A)("tabs__item",j.tabItem,t?.className,{"tabs__item--active":s===e}),children:n??e},e))})}function y({lazy:e,children:n,selectedValue:s}){const r=(Array.isArray(n)?n:[n]).filter(Boolean);if(e){const e=r.find(e=>e.props.value===s);return e?(0,t.cloneElement)(e,{className:(0,o.A)("margin-top--md",e.props.className)}):null}return(0,w.jsx)("div",{className:"margin-top--md",children:r.map((e,n)=>(0,t.cloneElement)(e,{key:n,hidden:e.props.value!==s}))})}function v(e){const n=m(e);return(0,w.jsxs)("div",{className:(0,o.A)(r.G.tabs.container,"tabs-container",j.tabList),children:[(0,w.jsx)(b,{...n,...e}),(0,w.jsx)(y,{...n,...e})]})}function A(e){const n=(0,x.A)();return(0,w.jsx)(v,{...e,children:u(e.children)},String(n))}},4577:(e,n,s)=>{s.d(n,{Ay:()=>l,RM:()=>r});var t=s(4848),o=s(8453);const r=[];function i(e){const n={code:"code",pre:"pre",...(0,o.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Docker"',children:"docker stop $(docker ps -q)\n"})}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Podman"',children:"podman stop --all\n"})})]})}function l(e={}){const{wrapper:n}={...(0,o.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(i,{...e})}):i(e)}},5421:(e,n,s)=>{s.r(n),s.d(n,{assets:()=>x,contentTitle:()=>m,default:()=>b,frontMatter:()=>f,metadata:()=>t,toc:()=>j});const t=JSON.parse('{"id":"core-components/ingestion","title":"Ingest knowledge","description":"Upload documents to your OpenRAG OpenSearch instance to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.","source":"@site/docs/core-components/ingestion.mdx","sourceDirName":"core-components","slug":"/ingestion","permalink":"/ingestion","draft":false,"unlisted":false,"editUrl":"https://github.com/openrag/openrag/tree/main/docs/docs/core-components/ingestion.mdx","tags":[],"version":"current","frontMatter":{"title":"Ingest knowledge","slug":"/ingestion"},"sidebar":"tutorialSidebar","previous":{"title":"Configure knowledge","permalink":"/knowledge"},"next":{"title":"Filter knowledge","permalink":"/knowledge-filters"}}');var o=s(4848),r=s(8453),i=s(9179),l=s(1470),a=s(9365),c=s(8401);const d=[];function h(e){const n={a:"a",code:"code",li:"li",p:"p",strong:"strong",ul:"ul",...(0,r.R)(),...e.components},{Details:s}=n;return s||function(e,n){throw new Error("Expected "+(n?"component":"object")+" `"+e+"` to be defined: you likely forgot to import, pass, or provide it.")}("Details",!0),(0,o.jsxs)(s,{children:[(0,o.jsx)("summary",{children:"About the OpenSearch Ingestion flow"}),(0,o.jsxs)(n.p,{children:["When you upload documents locally or with OAuth connectors, the ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow runs in the background.\nBy default, this flow uses Docling Serve to import and process documents."]}),(0,o.jsxs)(n.p,{children:["Like all ",(0,o.jsx)(n.a,{href:"/agents",children:"OpenRAG flows"}),", you can ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"inspect the flow in Langflow"}),", and you can customize it if you want to change the knowledge ingestion settings."]}),(0,o.jsxs)(n.p,{children:["The ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow is comprised of several components that work together to process and store documents in your knowledge base:"]}),(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/bundles-docling#docling-serve",children:[(0,o.jsx)(n.strong,{children:"Docling Serve"})," component"]}),": Ingests files and processes them by connecting to OpenRAG's local Docling Serve service. The output is ",(0,o.jsx)(n.code,{children:"DoclingDocument"})," data that contains the extracted text and metadata from the documents."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/bundles-docling#export-doclingdocument",children:[(0,o.jsx)(n.strong,{children:"Export DoclingDocument"})," component"]}),": Exports processed ",(0,o.jsx)(n.code,{children:"DoclingDocument"})," data to Markdown format with image placeholders. This conversion standardizes the document data in preparation for further processing."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/dataframe-operations",children:[(0,o.jsx)(n.strong,{children:"DataFrame Operations"})," component"]}),": Three of these components run sequentially to add metadata to the document data: ",(0,o.jsx)(n.code,{children:"filename"}),", ",(0,o.jsx)(n.code,{children:"file_size"}),", and ",(0,o.jsx)(n.code,{children:"mimetype"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/split-text",children:[(0,o.jsx)(n.strong,{children:"Split Text"})," component"]}),": Splits the processed text into chunks, based on the configured ",(0,o.jsx)(n.a,{href:"/knowledge#knowledge-ingestion-settings",children:"chunk size and overlap settings"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Secret Input"})," component: If needed, four of these components securely fetch the ",(0,o.jsx)(n.a,{href:"/knowledge#auth",children:"OAuth authentication"})," configuration variables: ",(0,o.jsx)(n.code,{children:"CONNECTOR_TYPE"}),", ",(0,o.jsx)(n.code,{children:"OWNER"}),", ",(0,o.jsx)(n.code,{children:"OWNER_EMAIL"}),", and ",(0,o.jsx)(n.code,{children:"OWNER_NAME"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Create Data"})," component: Combines the authentication credentials from the ",(0,o.jsx)(n.strong,{children:"Secret Input"})," components into a structured data object that is associated with the document embeddings."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/components-embedding-models",children:[(0,o.jsx)(n.strong,{children:"Embedding Model"})," component"]}),": Generates vector embeddings using your selected ",(0,o.jsx)(n.a,{href:"/knowledge#set-the-embedding-model-and-dimensions",children:"embedding model"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/bundles-elastic#opensearch",children:[(0,o.jsx)(n.strong,{children:"OpenSearch"})," component"]}),": Stores the processed documents and their embeddings in a ",(0,o.jsx)(n.code,{children:"documents"})," index of your OpenRAG ",(0,o.jsx)(n.a,{href:"/knowledge",children:"OpenSearch knowledge base"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The default address for the OpenSearch instance is ",(0,o.jsx)(n.code,{children:"https://opensearch:9200"}),". To change this address, edit the ",(0,o.jsx)(n.code,{children:"OPENSEARCH_PORT"})," ",(0,o.jsx)(n.a,{href:"/reference/configuration#opensearch-settings",children:"environment variable"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The default authentication method is JSON Web Token (JWT) authentication. If you ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"edit the flow"}),", you can select ",(0,o.jsx)(n.code,{children:"basic"})," auth mode, which uses the ",(0,o.jsx)(n.code,{children:"OPENSEARCH_USERNAME"})," and ",(0,o.jsx)(n.code,{children:"OPENSEARCH_PASSWORD"})," ",(0,o.jsx)(n.a,{href:"/reference/configuration#opensearch-settings",children:"environment variables"})," for authentication instead of JWT."]}),"\n"]}),"\n"]})]})}function u(e={}){const{wrapper:n}={...(0,r.R)(),...e.components};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(h,{...e})}):h(e)}var p=s(1381),g=s(4577);const f={title:"Ingest knowledge",slug:"/ingestion"},m=void 0,x={},j=[{value:"Ingest local files and folders",id:"ingest-local-files-and-folders",level:2},...d,{value:"Ingest local files temporarily",id:"ingest-local-files-temporarily",level:2},...c.RM,{value:"Ingest files with OAuth connectors",id:"oauth-ingestion",level:2},{value:"Enable OAuth connectors",id:"enable-oauth-connectors",level:3},...g.RM,...p.RM,{value:"Authenticate and ingest files from cloud storage",id:"authenticate-and-ingest-files-from-cloud-storage",level:3},...d,{value:"Ingest knowledge from URLs",id:"url-flow",level:2},{value:"Monitor ingestion",id:"monitor-ingestion",level:2},{value:"Ingestion performance expectations",id:"ingestion-performance-expectations",level:3},{value:"Troubleshoot ingestion",id:"troubleshoot-ingestion",level:2},{value:"Failed or slow ingestion",id:"failed-or-slow-ingestion",level:3},{value:"Problems when referencing documents in chat",id:"problems-when-referencing-documents-in-chat",level:3},{value:"See also",id:"see-also",level:2}];function w(e){const n={a:"a",admonition:"admonition",code:"code",h2:"h2",h3:"h3",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,r.R)(),...e.components},{Details:s}=n;return s||function(e,n){throw new Error("Expected "+(n?"component":"object")+" `"+e+"` to be defined: you likely forgot to import, pass, or provide it.")}("Details",!0),(0,o.jsxs)(o.Fragment,{children:[(0,o.jsxs)(n.p,{children:["Upload documents to your ",(0,o.jsx)(n.a,{href:"/knowledge",children:"OpenRAG OpenSearch instance"})," to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.\nDocuments are processed through OpenRAG's knowledge ingestion flows with Docling."]}),"\n",(0,o.jsx)(n.p,{children:"OpenRAG can ingest knowledge from direct file uploads, URLs, and OAuth authenticated connectors."}),"\n",(0,o.jsxs)(n.p,{children:["Knowledge ingestion is powered by OpenRAG's built-in knowledge ingestion flows that use Docling to process documents before storing the documents in your OpenSearch database.\nDuring ingestion, documents are broken into smaller chunks of content that are then embedded using your selected ",(0,o.jsx)(n.a,{href:"/knowledge#set-the-embedding-model-and-dimensions",children:"embedding model"}),".\nThen, the chunks, embeddings, and associated metadata (which connects chunks of the same document) are stored in your OpenSearch database."]}),"\n",(0,o.jsxs)(n.p,{children:["To modify chunking behavior and other ingestion settings, see ",(0,o.jsx)(n.a,{href:"/knowledge#knowledge-ingestion-settings",children:"Knowledge ingestion settings"})," and ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"Inspect and modify flows"}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"ingest-local-files-and-folders",children:"Ingest local files and folders"}),"\n",(0,o.jsx)(n.p,{children:"You can upload files and folders from your local machine to your knowledge base:"}),"\n",(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(i.A,{name:"Library","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Knowledge"})," to view your OpenSearch knowledge base."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Add Knowledge"})," to add your own documents to your OpenRAG knowledge base."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["To upload one file, click ",(0,o.jsx)(i.A,{name:"File","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"File"}),". To upload all documents in a folder, click ",(0,o.jsx)(i.A,{name:"Folder","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Folder"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The default path is ",(0,o.jsx)(n.code,{children:"~/.openrag/documents"}),".\nTo change this path, see ",(0,o.jsx)(n.a,{href:"/knowledge#set-the-local-documents-path",children:"Set the local documents path"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["The selected files are processed in the background through the ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow."]}),"\n",(0,o.jsx)(u,{}),"\n",(0,o.jsxs)(n.p,{children:["You can ",(0,o.jsx)(n.a,{href:"#monitor-ingestion",children:"monitor ingestion"})," to see the progress of the uploads and check for failed uploads."]}),"\n",(0,o.jsx)(n.h2,{id:"ingest-local-files-temporarily",children:"Ingest local files temporarily"}),"\n",(0,o.jsx)(c.Ay,{}),"\n",(0,o.jsx)(n.h2,{id:"oauth-ingestion",children:"Ingest files with OAuth connectors"}),"\n",(0,o.jsx)(n.p,{children:"OpenRAG can use OAuth authenticated connectors to ingest documents from the following external services:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"AWS S3"}),"\n",(0,o.jsx)(n.li,{children:"Google Drive"}),"\n",(0,o.jsx)(n.li,{children:"Microsoft OneDrive"}),"\n",(0,o.jsx)(n.li,{children:"Microsoft Sharepoint"}),"\n"]}),"\n",(0,o.jsx)(n.p,{children:"These connectors enable seamless ingestion of files from cloud storage to your OpenRAG knowledge base."}),"\n",(0,o.jsx)(n.p,{children:"Individual users can connect their personal cloud storage accounts to OpenRAG. Each user must separately authorize OpenRAG to access their own cloud storage. When a user connects a cloud storage service, they are redirected to authenticate with that service provider and grant OpenRAG permission to sync documents from their personal cloud storage."}),"\n",(0,o.jsx)(n.h3,{id:"enable-oauth-connectors",children:"Enable OAuth connectors"}),"\n",(0,o.jsx)(n.p,{children:"Before users can connect their own cloud storage accounts, you must configure the provider's OAuth credentials in OpenRAG. Typically, this requires that you register OpenRAG as an OAuth application in your cloud provider, and then obtain the app's OAuth credentials, such as a client ID and secret key.\nTo enable multiple connectors, you must register an app and generate credentials for each provider."}),"\n",(0,o.jsxs)(l.A,{children:[(0,o.jsxs)(a.A,{value:"TUI",label:"TUI-managed services",default:!0,children:[(0,o.jsxs)(n.p,{children:["If you use the ",(0,o.jsx)(n.a,{href:"/tui",children:"Terminal User Interface (TUI)"})," to manage your OpenRAG services, enter OAuth credentials on the ",(0,o.jsx)(n.strong,{children:"Advanced Setup"})," page.\nYou can do this during ",(0,o.jsx)(n.a,{href:"/install#setup",children:"installation"}),", or you can add the credentials afterwards:"]}),(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["If OpenRAG is running, click ",(0,o.jsx)(n.strong,{children:"Stop All Services"})," in the TUI."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Open the ",(0,o.jsx)(n.strong,{children:"Advanced Setup"})," page, and then add the OAuth credentials for the cloud storage providers that you want to use under ",(0,o.jsx)(n.strong,{children:"API Keys"}),":"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.strong,{children:"Google"}),": Provide your Google OAuth Client ID and Google OAuth Client Secret. You can generate these in the ",(0,o.jsx)(n.a,{href:"https://console.cloud.google.com/apis/credentials",children:"Google Cloud Console"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://developers.google.com/identity/protocols/oauth2",children:"Google OAuth client documentation"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.strong,{children:"Microsoft"}),": For the Microsoft OAuth Client ID and Microsoft OAuth Client Secret, provide ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/app-registration?view=odsp-graph-online",children:"Azure application registration credentials for SharePoint and OneDrive"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/graph-oauth",children:"Microsoft Graph OAuth client documentation"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.strong,{children:"Amazon"}),": Provide your AWS Access Key ID and AWS Secret Access Key with access to your S3 instance. For more information, see the AWS documentation on ",(0,o.jsx)(n.a,{href:"https://docs.aws.amazon.com/singlesignon/latest/userguide/manage-your-applications.html",children:"Configuring access to AWS applications"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Register the redirect URIs shown in the TUI in your OAuth provider.\nThese are the URLs your OAuth provider will use to redirect users back to OpenRAG after they sign in."}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Save Configuration"})," to add the OAuth credentials to your ",(0,o.jsxs)(n.a,{href:"/reference/configuration",children:["OpenRAG ",(0,o.jsx)(n.code,{children:".env"})," file"]}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Start Services"})," to restart the OpenRAG containers with OAuth enabled."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Launch the OpenRAG app.\nYou should be prompted to sign in to your OAuth provider before being redirected to your OpenRAG instance."}),"\n"]}),"\n"]})]}),(0,o.jsxs)(a.A,{value:"env",label:"Self-managed services",children:[(0,o.jsxs)(n.p,{children:["If you ",(0,o.jsx)(n.a,{href:"/docker",children:"installed OpenRAG with self-managed services"}),", set OAuth credentials in your ",(0,o.jsxs)(n.a,{href:"/reference/configuration",children:["OpenRAG ",(0,o.jsx)(n.code,{children:".env"})," file"]}),"."]}),(0,o.jsxs)(n.p,{children:["You can do this during ",(0,o.jsx)(n.a,{href:"/docker#setup",children:"initial set up"}),", or you can add the credentials afterwards:"]}),(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Stop all OpenRAG containers:"}),"\n",(0,o.jsx)(g.Ay,{}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Edit your OpenRAG ",(0,o.jsx)(n.code,{children:".env"})," file to add the OAuth credentials for the cloud storage providers that you want to use:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Google"}),": Provide your Google OAuth Client ID and Google OAuth Client Secret. You can generate these in the ",(0,o.jsx)(n.a,{href:"https://console.cloud.google.com/apis/credentials",children:"Google Cloud Console"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://developers.google.com/identity/protocols/oauth2",children:"Google OAuth client documentation"}),"."]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-env",children:"GOOGLE_OAUTH_CLIENT_ID=\nGOOGLE_OAUTH_CLIENT_SECRET=\n"})}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Microsoft"}),": For the Microsoft OAuth Client ID and Microsoft OAuth Client Secret, provide ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/app-registration?view=odsp-graph-online",children:"Azure application registration credentials for SharePoint and OneDrive"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/graph-oauth",children:"Microsoft Graph OAuth client documentation"}),"."]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-env",children:"MICROSOFT_GRAPH_OAUTH_CLIENT_ID=\nMICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=\n"})}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Amazon"}),": Provide your AWS Access Key ID and AWS Secret Access Key with access to your S3 instance. For more information, see the AWS documentation on ",(0,o.jsx)(n.a,{href:"https://docs.aws.amazon.com/singlesignon/latest/userguide/manage-your-applications.html",children:"Configuring access to AWS applications"}),"."]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-env",children:"AWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\n"})}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Save the ",(0,o.jsx)(n.code,{children:".env"})," file."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Restart your OpenRAG containers:"}),"\n",(0,o.jsx)(p.Ay,{}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Access the OpenRAG frontend at ",(0,o.jsx)(n.code,{children:"http://localhost:3000"}),".\nYou should be prompted to sign in to your OAuth provider before being redirected to your OpenRAG instance."]}),"\n"]}),"\n"]})]})]}),"\n",(0,o.jsx)(n.h3,{id:"authenticate-and-ingest-files-from-cloud-storage",children:"Authenticate and ingest files from cloud storage"}),"\n",(0,o.jsxs)(n.p,{children:["After you start OpenRAG with OAuth connectors enabled, each user is prompted to authenticate with the OAuth provider upon accessing your OpenRAG instance.\nIndividual authentication is required to access a user's cloud storage from your OpenRAG instance.\nFor example, if a user navigates to the default OpenRAG URL at ",(0,o.jsx)(n.code,{children:"http://localhost:3000"}),", they are redirected to the OAuth provider's sign-in page.\nAfter authenticating and granting the required permissions for OpenRAG, the user is redirected back to OpenRAG."]}),"\n",(0,o.jsx)(n.p,{children:"To ingest knowledge with an OAuth connector, do the following:"}),"\n",(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(i.A,{name:"Library","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Knowledge"})," to view your OpenSearch knowledge base."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Add Knowledge"}),", and then select a storage provider."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["On the ",(0,o.jsx)(n.strong,{children:"Add Cloud Knowledge"})," page, click ",(0,o.jsx)(n.strong,{children:"Add Files"}),", and then select the files and folders to ingest from the connected storage."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Ingest Files"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["The selected files are processed in the background through the ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow."]}),"\n",(0,o.jsx)(u,{}),"\n",(0,o.jsxs)(n.p,{children:["You can ",(0,o.jsx)(n.a,{href:"#monitor-ingestion",children:"monitor ingestion"})," to see the progress of the uploads and check for failed uploads."]}),"\n",(0,o.jsx)(n.h2,{id:"url-flow",children:"Ingest knowledge from URLs"}),"\n",(0,o.jsx)(n.p,{children:"When using the OpenRAG chat, you can enter URLs into the chat to be ingested in real-time during your conversation."}),"\n",(0,o.jsx)(n.admonition,{type:"tip",children:(0,o.jsxs)(n.p,{children:["Use ",(0,o.jsx)(n.a,{href:"https://www.w3schools.com/tags/ref_urlencode.ASP",children:"UTF-8 encoding"})," for URLs with special characters other than the standard slash, period, and colon characters.\nFor example, use ",(0,o.jsx)(n.code,{children:"https://en.wikipedia.org/wiki/Caf%C3%A9"})," instead of ",(0,o.jsx)(n.code,{children:"https://en.wikipedia.org/wiki/Caf\xe9"})," or ",(0,o.jsx)(n.code,{children:"https://en.wikipedia.org/wiki/Coffee%5Fculture"})," instead of ",(0,o.jsx)(n.code,{children:"https://en.wikipedia.org/wiki/Coffee_culture"}),"."]})}),"\n",(0,o.jsxs)(n.p,{children:["The ",(0,o.jsx)(n.strong,{children:"OpenSearch URL Ingestion"})," flow is used to ingest web content from URLs.\nThis flow isn't directly accessible from the OpenRAG user interface.\nInstead, this flow is called by the ",(0,o.jsxs)(n.a,{href:"/chat#flow",children:[(0,o.jsx)(n.strong,{children:"OpenRAG OpenSearch Agent"})," flow"]})," as a Model Context Protocol (MCP) tool.\nThe agent can call this component to fetch web content from a given URL, and then ingest that content into your OpenSearch knowledge base.\nLike all OpenRAG flows, you can ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"inspect the flow in Langflow"}),", and you can customize it.\nFor more information about MCP in Langflow, see the Langflow documentation on ",(0,o.jsx)(n.a,{href:"https://docs.langflow.org/mcp-client",children:"MCP clients"})," and ",(0,o.jsx)(n.a,{href:"https://docs.langflow.org/mcp-tutorial",children:"MCP servers"}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"monitor-ingestion",children:"Monitor ingestion"}),"\n",(0,o.jsx)(n.p,{children:"Document ingestion tasks run in the background."}),"\n",(0,o.jsxs)(n.p,{children:["In the OpenRAG user interface, a badge is shown on ",(0,o.jsx)(i.A,{name:"Bell","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Tasks"})," when OpenRAG tasks are active.\nClick ",(0,o.jsx)(i.A,{name:"Bell","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Tasks"})," to inspect and cancel tasks:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Active Tasks"}),": All tasks that are ",(0,o.jsx)(n.strong,{children:"Pending"}),", ",(0,o.jsx)(n.strong,{children:"Running"}),", or ",(0,o.jsx)(n.strong,{children:"Processing"}),".\nFor each active task, depending on its state, you can find the task ID, start time, duration, number of files processed, and the total files enqueued for processing."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Pending"}),": The task is queued and waiting to start."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Running"}),": The task is actively processing files."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Processing"}),": The task is performing ingestion operations."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Failed"}),": Something went wrong during ingestion, or the task was manually canceled.\nFor troubleshooting advice, see ",(0,o.jsx)(n.a,{href:"#troubleshoot-ingestion",children:"Troubleshoot ingestion"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["To stop an active task, click ",(0,o.jsx)(i.A,{name:"X","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Cancel"}),". Canceling a task stops processing immediately and marks the task as ",(0,o.jsx)(n.strong,{children:"Failed"}),"."]}),"\n",(0,o.jsx)(n.h3,{id:"ingestion-performance-expectations",children:"Ingestion performance expectations"}),"\n",(0,o.jsx)(n.p,{children:"The following performance test was conducted with Docling Serve."}),"\n",(0,o.jsx)(n.p,{children:"On a local VM with 7 vCPUs and 8 GiB RAM, OpenRAG ingested approximately 5.03 GB across 1,083 files in about 42 minutes.\nThis equates to approximately 2.4 documents per second."}),"\n",(0,o.jsx)(n.p,{children:"You can generally expect equal or better performance on developer laptops, and significantly faster performance on servers.\nThroughput scales with CPU cores, memory, storage speed, and configuration choices, such as the embedding model, chunk size, overlap, and concurrency."}),"\n",(0,o.jsx)(n.p,{children:"This test returned 12 error, approximately 1.1 percent of the total files ingested.\nAll errors were file-specific, and they didn't stop the pipeline."}),"\n",(0,o.jsxs)(s,{children:[(0,o.jsx)("summary",{children:"Ingestion performance test details"}),(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Ingestion dataset:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Total files: 1,083 items mounted"}),"\n",(0,o.jsx)(n.li,{children:"Total size on disk: 5,026,474,862 bytes (approximately 5.03 GB)"}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Hardware specifications:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Machine: Apple M4 Pro"}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Podman VM:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Name: podman-machine-default"}),"\n",(0,o.jsx)(n.li,{children:"Type: applehv"}),"\n",(0,o.jsx)(n.li,{children:"vCPUs: 7"}),"\n",(0,o.jsx)(n.li,{children:"Memory: 8 GiB"}),"\n",(0,o.jsx)(n.li,{children:"Disk size: 100 GiB"}),"\n"]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Test results:"}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-text",children:"2025-09-24T22:40:45.542190Z /app/src/main.py:231 Ingesting default documents when ready disable_langflow_ingest=False\n2025-09-24T22:40:45.546385Z /app/src/main.py:270 Using Langflow ingestion pipeline for default documents file_count=1082\n...\n2025-09-24T23:19:44.866365Z /app/src/main.py:351 Langflow ingestion completed success_count=1070 error_count=12 total_files=1082\n"})}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Elapsed time: Approximately 42 minutes 15 seconds (2,535 seconds)"}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Throughput: Approximately 2.4 documents per second"}),"\n"]}),"\n"]})]}),"\n",(0,o.jsx)(n.h2,{id:"troubleshoot-ingestion",children:"Troubleshoot ingestion"}),"\n",(0,o.jsx)(n.p,{children:"The following issues can occur during document ingestion."}),"\n",(0,o.jsx)(n.h3,{id:"failed-or-slow-ingestion",children:"Failed or slow ingestion"}),"\n",(0,o.jsx)(n.p,{children:"If an ingestion task fails, do the following:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Make sure you are uploading supported file types."}),"\n",(0,o.jsx)(n.li,{children:"Split excessively large files into smaller files before uploading."}),"\n",(0,o.jsx)(n.li,{children:"Remove unusual embedded content, such as videos or animations, before uploading. Although Docling can replace some non-text content with placeholders during ingestion, some embedded content might cause errors."}),"\n",(0,o.jsxs)(n.li,{children:["Make sure your Podman/Docker VM has sufficient memory for the ingestion tasks.\nThe minimum recommendation is 8 GB of RAM.\nIf you regularly upload large files, more RAM is recommended.\nFor more information, see ",(0,o.jsx)(n.a,{href:"/support/troubleshoot#memory-issue-with-podman-on-macos",children:"Memory issue with Podman on macOS"})," and ",(0,o.jsx)(n.a,{href:"/support/troubleshoot#container-out-of-memory-errors",children:"Container out of memory errors"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:["If OCR ingestion fails due to OCR missing, see ",(0,o.jsx)(n.a,{href:"/support/troubleshoot#ocr-ingestion-fails-easyocr-not-installed",children:"OCR ingestion fails (easyocr not installed)"}),"."]}),"\n"]}),"\n",(0,o.jsx)(n.h3,{id:"problems-when-referencing-documents-in-chat",children:"Problems when referencing documents in chat"}),"\n",(0,o.jsxs)(n.p,{children:["If the OpenRAG ",(0,o.jsx)(n.strong,{children:"Chat"})," doesn't seem to use your documents correctly, ",(0,o.jsx)(n.a,{href:"/knowledge#browse-knowledge",children:"browse your knowledge base"})," to confirm that the documents are uploaded in full, and the chunks are correct."]}),"\n",(0,o.jsxs)(n.p,{children:["If the documents are present and well-formed, check your ",(0,o.jsx)(n.a,{href:"/knowledge-filters",children:"knowledge filters"}),".\nIf a global filter is applied, make sure the expected documents are included in the global filter.\nIf the global filter excludes any documents, the agent cannot access those documents unless you apply a chat-level filter or change the global filter."]}),"\n",(0,o.jsx)(n.p,{children:"If text is missing or incorrectly processed, you need to reupload the documents after modifying the ingestion parameters or the documents themselves.\nFor example:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Break combined documents into separate files for better metadata context."}),"\n",(0,o.jsxs)(n.li,{children:["Make sure scanned documents are legible enough for extraction, and enable the ",(0,o.jsx)(n.strong,{children:"OCR"})," option. Poorly scanned documents might require additional preparation or rescanning before ingestion."]}),"\n",(0,o.jsxs)(n.li,{children:["Adjust the ",(0,o.jsx)(n.strong,{children:"Chunk Size"})," and ",(0,o.jsx)(n.strong,{children:"Chunk Overlap"})," settings to better suit your documents. Larger chunks provide more context but can include irrelevant information, while smaller chunks yield more precise semantic search but can lack context."]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["For more information about modifying ingestion parameters and flows, see ",(0,o.jsx)(n.a,{href:"/knowledge#knowledge-ingestion-settings",children:"Knowledge ingestion settings"}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"see-also",children:"See also"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/knowledge",children:"Configure knowledge"})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/knowledge-filters",children:"Filter knowledge"})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/chat",children:"Chat with knowledge"})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"Inspect and modify flows"})}),"\n"]})]})}function b(e={}){const{wrapper:n}={...(0,r.R)(),...e.components};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(w,{...e})}):w(e)}},8401:(e,n,s)=>{s.d(n,{Ay:()=>a,RM:()=>i});var t=s(4848),o=s(8453),r=s(9179);const i=[];function l(e){const n={p:"p",strong:"strong",...(0,o.R)(),...e.components};return(0,t.jsxs)(n.p,{children:["When using the OpenRAG ",(0,t.jsx)(n.strong,{children:"Chat"}),", click ",(0,t.jsx)(r.A,{name:"Plus","aria-hidden":"true"})," in the chat input field to upload a file to the current chat session.\nFiles added this way are processed and made available to the agent for the current conversation only.\nThese files aren't stored in the knowledge base permanently."]})}function a(e={}){const{wrapper:n}={...(0,o.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(l,{...e})}):l(e)}},9179:(e,n,s)=>{s.d(n,{A:()=>r});s(6540);var t=s(7856),o=s(4848);function r({name:e,...n}){const s=t[e];return s?(0,o.jsx)(s,{...n}):null}},9365:(e,n,s)=>{s.d(n,{A:()=>i});s(6540);var t=s(4164);const o={tabItem:"tabItem_Ymn6"};var r=s(4848);function i({children:e,hidden:n,className:s}){return(0,r.jsx)("div",{role:"tabpanel",className:(0,t.A)(o.tabItem,s),hidden:n,children:e})}}}]); \ No newline at end of file +"use strict";(globalThis.webpackChunkopenrag_docs=globalThis.webpackChunkopenrag_docs||[]).push([[6919],{1381:(e,n,s)=>{s.d(n,{Ay:()=>l,RM:()=>r});var t=s(4848),o=s(8453);const r=[];function i(e){const n={code:"code",pre:"pre",...(0,o.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Docker"',children:"docker compose up -d\n"})}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Podman"',children:"podman compose up -d\n"})})]})}function l(e={}){const{wrapper:n}={...(0,o.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(i,{...e})}):i(e)}},1470:(e,n,s)=>{s.d(n,{A:()=>A});var t=s(6540),o=s(4164),r=s(7559),i=s(3104),l=s(6347),a=s(205),c=s(7485),d=s(1682),h=s(679);function u(e){return t.Children.toArray(e).filter(e=>"\n"!==e).map(e=>{if(!e||(0,t.isValidElement)(e)&&function(e){const{props:n}=e;return!!n&&"object"==typeof n&&"value"in n}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)})?.filter(Boolean)??[]}function p(e){const{values:n,children:s}=e;return(0,t.useMemo)(()=>{const e=n??function(e){return u(e).map(({props:{value:e,label:n,attributes:s,default:t}})=>({value:e,label:n,attributes:s,default:t}))}(s);return function(e){const n=(0,d.XI)(e,(e,n)=>e.value===n.value);if(n.length>0)throw new Error(`Docusaurus error: Duplicate values "${n.map(e=>e.value).join(", ")}" found in . Every value needs to be unique.`)}(e),e},[n,s])}function g({value:e,tabValues:n}){return n.some(n=>n.value===e)}function f({queryString:e=!1,groupId:n}){const s=(0,l.W6)(),o=function({queryString:e=!1,groupId:n}){if("string"==typeof e)return e;if(!1===e)return null;if(!0===e&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:e,groupId:n});return[(0,c.aZ)(o),(0,t.useCallback)(e=>{if(!o)return;const n=new URLSearchParams(s.location.search);n.set(o,e),s.replace({...s.location,search:n.toString()})},[o,s])]}function m(e){const{defaultValue:n,queryString:s=!1,groupId:o}=e,r=p(e),[i,l]=(0,t.useState)(()=>function({defaultValue:e,tabValues:n}){if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(e){if(!g({value:e,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${e}" but none of its children has the corresponding value. Available values are: ${n.map(e=>e.value).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return e}const s=n.find(e=>e.default)??n[0];if(!s)throw new Error("Unexpected error: 0 tabValues");return s.value}({defaultValue:n,tabValues:r})),[c,d]=f({queryString:s,groupId:o}),[u,m]=function({groupId:e}){const n=function(e){return e?`docusaurus.tab.${e}`:null}(e),[s,o]=(0,h.Dv)(n);return[s,(0,t.useCallback)(e=>{n&&o.set(e)},[n,o])]}({groupId:o}),x=(()=>{const e=c??u;return g({value:e,tabValues:r})?e:null})();(0,a.A)(()=>{x&&l(x)},[x]);return{selectedValue:i,selectValue:(0,t.useCallback)(e=>{if(!g({value:e,tabValues:r}))throw new Error(`Can't select invalid tab value=${e}`);l(e),d(e),m(e)},[d,m,r]),tabValues:r}}var x=s(2303);const j={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};var w=s(4848);function b({className:e,block:n,selectedValue:s,selectValue:t,tabValues:r}){const l=[],{blockElementScrollPositionUntilNextRender:a}=(0,i.a_)(),c=e=>{const n=e.currentTarget,o=l.indexOf(n),i=r[o].value;i!==s&&(a(n),t(i))},d=e=>{let n=null;switch(e.key){case"Enter":c(e);break;case"ArrowRight":{const s=l.indexOf(e.currentTarget)+1;n=l[s]??l[0];break}case"ArrowLeft":{const s=l.indexOf(e.currentTarget)-1;n=l[s]??l[l.length-1];break}}n?.focus()};return(0,w.jsx)("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},e),children:r.map(({value:e,label:n,attributes:t})=>(0,w.jsx)("li",{role:"tab",tabIndex:s===e?0:-1,"aria-selected":s===e,ref:e=>{l.push(e)},onKeyDown:d,onClick:c,...t,className:(0,o.A)("tabs__item",j.tabItem,t?.className,{"tabs__item--active":s===e}),children:n??e},e))})}function y({lazy:e,children:n,selectedValue:s}){const r=(Array.isArray(n)?n:[n]).filter(Boolean);if(e){const e=r.find(e=>e.props.value===s);return e?(0,t.cloneElement)(e,{className:(0,o.A)("margin-top--md",e.props.className)}):null}return(0,w.jsx)("div",{className:"margin-top--md",children:r.map((e,n)=>(0,t.cloneElement)(e,{key:n,hidden:e.props.value!==s}))})}function v(e){const n=m(e);return(0,w.jsxs)("div",{className:(0,o.A)(r.G.tabs.container,"tabs-container",j.tabList),children:[(0,w.jsx)(b,{...n,...e}),(0,w.jsx)(y,{...n,...e})]})}function A(e){const n=(0,x.A)();return(0,w.jsx)(v,{...e,children:u(e.children)},String(n))}},4577:(e,n,s)=>{s.d(n,{Ay:()=>l,RM:()=>r});var t=s(4848),o=s(8453);const r=[];function i(e){const n={code:"code",pre:"pre",...(0,o.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Docker"',children:"docker stop $(docker ps -q)\n"})}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-bash",metastring:'title="Podman"',children:"podman stop --all\n"})})]})}function l(e={}){const{wrapper:n}={...(0,o.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(i,{...e})}):i(e)}},5421:(e,n,s)=>{s.r(n),s.d(n,{assets:()=>x,contentTitle:()=>m,default:()=>b,frontMatter:()=>f,metadata:()=>t,toc:()=>j});const t=JSON.parse('{"id":"core-components/ingestion","title":"Ingest knowledge","description":"Upload documents to your OpenRAG OpenSearch instance to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.","source":"@site/docs/core-components/ingestion.mdx","sourceDirName":"core-components","slug":"/ingestion","permalink":"/ingestion","draft":false,"unlisted":false,"editUrl":"https://github.com/openrag/openrag/tree/main/docs/docs/core-components/ingestion.mdx","tags":[],"version":"current","frontMatter":{"title":"Ingest knowledge","slug":"/ingestion"},"sidebar":"tutorialSidebar","previous":{"title":"Configure knowledge","permalink":"/knowledge"},"next":{"title":"Filter knowledge","permalink":"/knowledge-filters"}}');var o=s(4848),r=s(8453),i=s(9179),l=s(1470),a=s(9365),c=s(8401);const d=[];function h(e){const n={a:"a",code:"code",li:"li",p:"p",strong:"strong",ul:"ul",...(0,r.R)(),...e.components},{Details:s}=n;return s||function(e,n){throw new Error("Expected "+(n?"component":"object")+" `"+e+"` to be defined: you likely forgot to import, pass, or provide it.")}("Details",!0),(0,o.jsxs)(s,{children:[(0,o.jsx)("summary",{children:"About the OpenSearch Ingestion flow"}),(0,o.jsxs)(n.p,{children:["When you upload documents locally or with OAuth connectors, the ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow runs in the background.\nBy default, this flow uses Docling Serve to import and process documents."]}),(0,o.jsxs)(n.p,{children:["Like all ",(0,o.jsx)(n.a,{href:"/agents",children:"OpenRAG flows"}),", you can ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"inspect the flow in Langflow"}),", and you can customize it if you want to change the knowledge ingestion settings."]}),(0,o.jsxs)(n.p,{children:["The ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow is comprised of several components that work together to process and store documents in your knowledge base:"]}),(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/bundles-docling#docling-serve",children:[(0,o.jsx)(n.strong,{children:"Docling Serve"})," component"]}),": Ingests files and processes them by connecting to OpenRAG's local Docling Serve service. The output is ",(0,o.jsx)(n.code,{children:"DoclingDocument"})," data that contains the extracted text and metadata from the documents."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/bundles-docling#export-doclingdocument",children:[(0,o.jsx)(n.strong,{children:"Export DoclingDocument"})," component"]}),": Exports processed ",(0,o.jsx)(n.code,{children:"DoclingDocument"})," data to Markdown format with image placeholders. This conversion standardizes the document data in preparation for further processing."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/dataframe-operations",children:[(0,o.jsx)(n.strong,{children:"DataFrame Operations"})," component"]}),": Three of these components run sequentially to add metadata to the document data: ",(0,o.jsx)(n.code,{children:"filename"}),", ",(0,o.jsx)(n.code,{children:"file_size"}),", and ",(0,o.jsx)(n.code,{children:"mimetype"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/split-text",children:[(0,o.jsx)(n.strong,{children:"Split Text"})," component"]}),": Splits the processed text into chunks, based on the configured ",(0,o.jsx)(n.a,{href:"/knowledge#knowledge-ingestion-settings",children:"chunk size and overlap settings"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Secret Input"})," component: If needed, four of these components securely fetch the ",(0,o.jsx)(n.a,{href:"/knowledge#auth",children:"OAuth authentication"})," configuration variables: ",(0,o.jsx)(n.code,{children:"CONNECTOR_TYPE"}),", ",(0,o.jsx)(n.code,{children:"OWNER"}),", ",(0,o.jsx)(n.code,{children:"OWNER_EMAIL"}),", and ",(0,o.jsx)(n.code,{children:"OWNER_NAME"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Create Data"})," component: Combines the authentication credentials from the ",(0,o.jsx)(n.strong,{children:"Secret Input"})," components into a structured data object that is associated with the document embeddings."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/components-embedding-models",children:[(0,o.jsx)(n.strong,{children:"Embedding Model"})," component"]}),": Generates vector embeddings using your selected ",(0,o.jsx)(n.a,{href:"/knowledge#set-the-embedding-model-and-dimensions",children:"embedding model"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsxs)(n.a,{href:"https://docs.langflow.org/bundles-elastic#opensearch",children:[(0,o.jsx)(n.strong,{children:"OpenSearch"})," component"]}),": Stores the processed documents and their embeddings in a ",(0,o.jsx)(n.code,{children:"documents"})," index of your OpenRAG ",(0,o.jsx)(n.a,{href:"/knowledge",children:"OpenSearch knowledge base"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The default address for the OpenSearch instance is ",(0,o.jsx)(n.code,{children:"https://opensearch:9200"}),". To change this address, edit the ",(0,o.jsx)(n.code,{children:"OPENSEARCH_PORT"})," ",(0,o.jsx)(n.a,{href:"/reference/configuration#opensearch-settings",children:"environment variable"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The default authentication method is JSON Web Token (JWT) authentication. If you ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"edit the flow"}),", you can select ",(0,o.jsx)(n.code,{children:"basic"})," auth mode, which uses the ",(0,o.jsx)(n.code,{children:"OPENSEARCH_USERNAME"})," and ",(0,o.jsx)(n.code,{children:"OPENSEARCH_PASSWORD"})," ",(0,o.jsx)(n.a,{href:"/reference/configuration#opensearch-settings",children:"environment variables"})," for authentication instead of JWT."]}),"\n"]}),"\n"]})]})}function u(e={}){const{wrapper:n}={...(0,r.R)(),...e.components};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(h,{...e})}):h(e)}var p=s(1381),g=s(4577);const f={title:"Ingest knowledge",slug:"/ingestion"},m=void 0,x={},j=[{value:"Ingest local files and folders",id:"ingest-local-files-and-folders",level:2},...d,{value:"Ingest local files temporarily",id:"ingest-local-files-temporarily",level:2},...c.RM,{value:"Ingest files with OAuth connectors",id:"oauth-ingestion",level:2},{value:"Enable OAuth connectors",id:"enable-oauth-connectors",level:3},...g.RM,...p.RM,{value:"Authenticate and ingest files from cloud storage",id:"authenticate-and-ingest-files-from-cloud-storage",level:3},...d,{value:"Ingest knowledge from URLs",id:"url-flow",level:2},{value:"Monitor ingestion",id:"monitor-ingestion",level:2},{value:"Ingestion performance expectations",id:"ingestion-performance-expectations",level:3},{value:"Troubleshoot ingestion",id:"troubleshoot-ingestion",level:2},{value:"Failed or slow ingestion",id:"failed-or-slow-ingestion",level:3},{value:"Problems when referencing documents in chat",id:"problems-when-referencing-documents-in-chat",level:3},{value:"See also",id:"see-also",level:2}];function w(e){const n={a:"a",admonition:"admonition",code:"code",h2:"h2",h3:"h3",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,r.R)(),...e.components},{Details:s}=n;return s||function(e,n){throw new Error("Expected "+(n?"component":"object")+" `"+e+"` to be defined: you likely forgot to import, pass, or provide it.")}("Details",!0),(0,o.jsxs)(o.Fragment,{children:[(0,o.jsxs)(n.p,{children:["Upload documents to your ",(0,o.jsx)(n.a,{href:"/knowledge",children:"OpenRAG OpenSearch instance"})," to populate your knowledge base with unique content, such as your own company documents, research papers, or websites.\nDocuments are processed through OpenRAG's knowledge ingestion flows with Docling."]}),"\n",(0,o.jsx)(n.p,{children:"OpenRAG can ingest knowledge from direct file uploads, URLs, and OAuth authenticated connectors."}),"\n",(0,o.jsxs)(n.p,{children:["Knowledge ingestion is powered by OpenRAG's built-in knowledge ingestion flows that use Docling to process documents before storing the documents in your OpenSearch database.\nDuring ingestion, documents are broken into smaller chunks of content that are then embedded using your selected ",(0,o.jsx)(n.a,{href:"/knowledge#set-the-embedding-model-and-dimensions",children:"embedding model"}),".\nThen, the chunks, embeddings, and associated metadata (which connects chunks of the same document) are stored in your OpenSearch database."]}),"\n",(0,o.jsxs)(n.p,{children:["To modify chunking behavior and other ingestion settings, see ",(0,o.jsx)(n.a,{href:"/knowledge#knowledge-ingestion-settings",children:"Knowledge ingestion settings"})," and ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"Inspect and modify flows"}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"ingest-local-files-and-folders",children:"Ingest local files and folders"}),"\n",(0,o.jsx)(n.p,{children:"You can upload files and folders from your local machine to your knowledge base:"}),"\n",(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(i.A,{name:"Library","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Knowledge"})," to view your OpenSearch knowledge base."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Add Knowledge"})," to add your own documents to your OpenRAG knowledge base."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["To upload one file, click ",(0,o.jsx)(i.A,{name:"File","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"File"}),". To upload all documents in a folder, click ",(0,o.jsx)(i.A,{name:"Folder","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Folder"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The default path is ",(0,o.jsx)(n.code,{children:"~/.openrag/documents"}),".\nTo change this path, see ",(0,o.jsx)(n.a,{href:"/knowledge#set-the-local-documents-path",children:"Set the local documents path"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["The selected files are processed in the background through the ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow."]}),"\n",(0,o.jsx)(u,{}),"\n",(0,o.jsxs)(n.p,{children:["You can ",(0,o.jsx)(n.a,{href:"#monitor-ingestion",children:"monitor ingestion"})," to see the progress of the uploads and check for failed uploads."]}),"\n",(0,o.jsx)(n.h2,{id:"ingest-local-files-temporarily",children:"Ingest local files temporarily"}),"\n",(0,o.jsx)(c.Ay,{}),"\n",(0,o.jsx)(n.h2,{id:"oauth-ingestion",children:"Ingest files with OAuth connectors"}),"\n",(0,o.jsx)(n.p,{children:"OpenRAG can use OAuth authenticated connectors to ingest documents from the following external services:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"AWS S3"}),"\n",(0,o.jsx)(n.li,{children:"Google Drive"}),"\n",(0,o.jsx)(n.li,{children:"Microsoft OneDrive"}),"\n",(0,o.jsx)(n.li,{children:"Microsoft Sharepoint"}),"\n"]}),"\n",(0,o.jsx)(n.p,{children:"These connectors enable seamless ingestion of files from cloud storage to your OpenRAG knowledge base."}),"\n",(0,o.jsx)(n.p,{children:"Individual users can connect their personal cloud storage accounts to OpenRAG. Each user must separately authorize OpenRAG to access their own cloud storage. When a user connects a cloud storage service, they are redirected to authenticate with that service provider and grant OpenRAG permission to sync documents from their personal cloud storage."}),"\n",(0,o.jsx)(n.h3,{id:"enable-oauth-connectors",children:"Enable OAuth connectors"}),"\n",(0,o.jsx)(n.p,{children:"Before users can connect their own cloud storage accounts, you must configure the provider's OAuth credentials in OpenRAG. Typically, this requires that you register OpenRAG as an OAuth application in your cloud provider, and then obtain the app's OAuth credentials, such as a client ID and secret key.\nTo enable multiple connectors, you must register an app and generate credentials for each provider."}),"\n",(0,o.jsxs)(l.A,{children:[(0,o.jsxs)(a.A,{value:"TUI",label:"TUI-managed services",default:!0,children:[(0,o.jsxs)(n.p,{children:["If you use the ",(0,o.jsx)(n.a,{href:"/tui",children:"Terminal User Interface (TUI)"})," to manage your OpenRAG services, enter OAuth credentials on the ",(0,o.jsx)(n.strong,{children:"Advanced Setup"})," page.\nYou can do this during ",(0,o.jsx)(n.a,{href:"/install#setup",children:"installation"}),", or you can add the credentials afterwards:"]}),(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["If OpenRAG is running, click ",(0,o.jsx)(n.strong,{children:"Stop All Services"})," in the TUI."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Open the ",(0,o.jsx)(n.strong,{children:"Advanced Setup"})," page, and then add the OAuth credentials for the cloud storage providers that you want to use under ",(0,o.jsx)(n.strong,{children:"API Keys"}),":"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.strong,{children:"Google"}),": Provide your Google OAuth Client ID and Google OAuth Client Secret. You can generate these in the ",(0,o.jsx)(n.a,{href:"https://console.cloud.google.com/apis/credentials",children:"Google Cloud Console"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://developers.google.com/identity/protocols/oauth2",children:"Google OAuth client documentation"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.strong,{children:"Microsoft"}),": For the Microsoft OAuth Client ID and Microsoft OAuth Client Secret, provide ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/app-registration?view=odsp-graph-online",children:"Azure application registration credentials for SharePoint and OneDrive"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/graph-oauth",children:"Microsoft Graph OAuth client documentation"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.strong,{children:"Amazon"}),": Provide your AWS Access Key ID and AWS Secret Access Key with access to your S3 instance. For more information, see the AWS documentation on ",(0,o.jsx)(n.a,{href:"https://docs.aws.amazon.com/singlesignon/latest/userguide/manage-your-applications.html",children:"Configuring access to AWS applications"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Register the redirect URIs shown in the TUI in your OAuth provider.\nThese are the URLs your OAuth provider will use to redirect users back to OpenRAG after they sign in."}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Save Configuration"})," to add the OAuth credentials to your ",(0,o.jsxs)(n.a,{href:"/reference/configuration",children:["OpenRAG ",(0,o.jsx)(n.code,{children:".env"})," file"]}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Start Services"})," to restart the OpenRAG containers with OAuth enabled."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Launch the OpenRAG app.\nYou should be prompted to sign in to your OAuth provider before being redirected to your OpenRAG instance."}),"\n"]}),"\n"]})]}),(0,o.jsxs)(a.A,{value:"env",label:"Self-managed services",children:[(0,o.jsxs)(n.p,{children:["If you ",(0,o.jsx)(n.a,{href:"/docker",children:"installed OpenRAG with self-managed services"}),", set OAuth credentials in your ",(0,o.jsxs)(n.a,{href:"/reference/configuration",children:["OpenRAG ",(0,o.jsx)(n.code,{children:".env"})," file"]}),"."]}),(0,o.jsxs)(n.p,{children:["You can do this during ",(0,o.jsx)(n.a,{href:"/docker#setup",children:"initial set up"}),", or you can add the credentials afterwards:"]}),(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Stop all OpenRAG containers:"}),"\n",(0,o.jsx)(g.Ay,{}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Edit your OpenRAG ",(0,o.jsx)(n.code,{children:".env"})," file to add the OAuth credentials for the cloud storage providers that you want to use:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Google"}),": Provide your Google OAuth Client ID and Google OAuth Client Secret. You can generate these in the ",(0,o.jsx)(n.a,{href:"https://console.cloud.google.com/apis/credentials",children:"Google Cloud Console"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://developers.google.com/identity/protocols/oauth2",children:"Google OAuth client documentation"}),"."]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-env",children:"GOOGLE_OAUTH_CLIENT_ID=\nGOOGLE_OAUTH_CLIENT_SECRET=\n"})}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Microsoft"}),": For the Microsoft OAuth Client ID and Microsoft OAuth Client Secret, provide ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/app-registration?view=odsp-graph-online",children:"Azure application registration credentials for SharePoint and OneDrive"}),". For more information, see the ",(0,o.jsx)(n.a,{href:"https://learn.microsoft.com/en-us/onedrive/developer/rest-api/getting-started/graph-oauth",children:"Microsoft Graph OAuth client documentation"}),"."]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-env",children:"MICROSOFT_GRAPH_OAUTH_CLIENT_ID=\nMICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=\n"})}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Amazon"}),": Provide your AWS Access Key ID and AWS Secret Access Key with access to your S3 instance. For more information, see the AWS documentation on ",(0,o.jsx)(n.a,{href:"https://docs.aws.amazon.com/singlesignon/latest/userguide/manage-your-applications.html",children:"Configuring access to AWS applications"}),"."]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-env",children:"AWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\n"})}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Save the ",(0,o.jsx)(n.code,{children:".env"})," file."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Restart your OpenRAG containers:"}),"\n",(0,o.jsx)(p.Ay,{}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Access the OpenRAG frontend at ",(0,o.jsx)(n.code,{children:"http://localhost:3000"}),".\nYou should be prompted to sign in to your OAuth provider before being redirected to your OpenRAG instance."]}),"\n"]}),"\n"]})]})]}),"\n",(0,o.jsx)(n.h3,{id:"authenticate-and-ingest-files-from-cloud-storage",children:"Authenticate and ingest files from cloud storage"}),"\n",(0,o.jsxs)(n.p,{children:["After you start OpenRAG with OAuth connectors enabled, each user is prompted to authenticate with the OAuth provider upon accessing your OpenRAG instance.\nIndividual authentication is required to access a user's cloud storage from your OpenRAG instance.\nFor example, if a user navigates to the default OpenRAG URL at ",(0,o.jsx)(n.code,{children:"http://localhost:3000"}),", they are redirected to the OAuth provider's sign-in page.\nAfter authenticating and granting the required permissions for OpenRAG, the user is redirected back to OpenRAG."]}),"\n",(0,o.jsx)(n.p,{children:"To ingest knowledge with an OAuth connector, do the following:"}),"\n",(0,o.jsxs)(n.ol,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(i.A,{name:"Library","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Knowledge"})," to view your OpenSearch knowledge base."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Add Knowledge"}),", and then select a storage provider."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["On the ",(0,o.jsx)(n.strong,{children:"Add Cloud Knowledge"})," page, click ",(0,o.jsx)(n.strong,{children:"Add Files"}),", and then select the files and folders to ingest from the connected storage."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:["Click ",(0,o.jsx)(n.strong,{children:"Ingest Files"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["The selected files are processed in the background through the ",(0,o.jsx)(n.strong,{children:"OpenSearch Ingestion"})," flow."]}),"\n",(0,o.jsx)(u,{}),"\n",(0,o.jsxs)(n.p,{children:["You can ",(0,o.jsx)(n.a,{href:"#monitor-ingestion",children:"monitor ingestion"})," to see the progress of the uploads and check for failed uploads."]}),"\n",(0,o.jsx)(n.h2,{id:"url-flow",children:"Ingest knowledge from URLs"}),"\n",(0,o.jsx)(n.p,{children:"When using the OpenRAG chat, you can enter URLs into the chat to be ingested in real-time during your conversation."}),"\n",(0,o.jsx)(n.admonition,{type:"info",children:(0,o.jsxs)(n.p,{children:["The chat cannot ingest URLs that end in static document file extensions like ",(0,o.jsx)(n.code,{children:".pdf"}),".\nTo upload these types of files, see ",(0,o.jsx)(n.a,{href:"#ingest-local-files-and-folders",children:"Ingest local files and folders"})," and ",(0,o.jsx)(n.a,{href:"#oauth-ingestion",children:"Ingest files with OAuth connectors"}),"."]})}),"\n",(0,o.jsxs)(n.p,{children:["OpenRAG runs the ",(0,o.jsx)(n.strong,{children:"OpenSearch URL Ingestion"})," flow to ingest web content from URLs.\nThis flow isn't directly accessible from the OpenRAG user interface.\nInstead, this flow is called by the ",(0,o.jsxs)(n.a,{href:"/chat#flow",children:[(0,o.jsx)(n.strong,{children:"OpenRAG OpenSearch Agent"})," flow"]})," as a Model Context Protocol (MCP) tool.\nThe agent can call this component to fetch web content from a given URL, and then ingest that content into your OpenSearch knowledge base.\nLike all OpenRAG flows, you can ",(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"inspect the flow in Langflow"}),", and you can customize it.\nFor more information about MCP in Langflow, see the Langflow documentation on ",(0,o.jsx)(n.a,{href:"https://docs.langflow.org/mcp-client",children:"MCP clients"})," and ",(0,o.jsx)(n.a,{href:"https://docs.langflow.org/mcp-tutorial",children:"MCP servers"}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"monitor-ingestion",children:"Monitor ingestion"}),"\n",(0,o.jsx)(n.p,{children:"Document ingestion tasks run in the background."}),"\n",(0,o.jsxs)(n.p,{children:["In the OpenRAG user interface, a badge is shown on ",(0,o.jsx)(i.A,{name:"Bell","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Tasks"})," when OpenRAG tasks are active.\nClick ",(0,o.jsx)(i.A,{name:"Bell","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Tasks"})," to inspect and cancel tasks:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Active Tasks"}),": All tasks that are ",(0,o.jsx)(n.strong,{children:"Pending"}),", ",(0,o.jsx)(n.strong,{children:"Running"}),", or ",(0,o.jsx)(n.strong,{children:"Processing"}),".\nFor each active task, depending on its state, you can find the task ID, start time, duration, number of files processed, and the total files enqueued for processing."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Pending"}),": The task is queued and waiting to start."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Running"}),": The task is actively processing files."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Processing"}),": The task is performing ingestion operations."]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.strong,{children:"Failed"}),": Something went wrong during ingestion, or the task was manually canceled.\nFor troubleshooting advice, see ",(0,o.jsx)(n.a,{href:"#troubleshoot-ingestion",children:"Troubleshoot ingestion"}),"."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["To stop an active task, click ",(0,o.jsx)(i.A,{name:"X","aria-hidden":"true"})," ",(0,o.jsx)(n.strong,{children:"Cancel"}),". Canceling a task stops processing immediately and marks the task as ",(0,o.jsx)(n.strong,{children:"Failed"}),"."]}),"\n",(0,o.jsx)(n.h3,{id:"ingestion-performance-expectations",children:"Ingestion performance expectations"}),"\n",(0,o.jsx)(n.p,{children:"The following performance test was conducted with Docling Serve."}),"\n",(0,o.jsx)(n.p,{children:"On a local VM with 7 vCPUs and 8 GiB RAM, OpenRAG ingested approximately 5.03 GB across 1,083 files in about 42 minutes.\nThis equates to approximately 2.4 documents per second."}),"\n",(0,o.jsx)(n.p,{children:"You can generally expect equal or better performance on developer laptops, and significantly faster performance on servers.\nThroughput scales with CPU cores, memory, storage speed, and configuration choices, such as the embedding model, chunk size, overlap, and concurrency."}),"\n",(0,o.jsx)(n.p,{children:"This test returned 12 error, approximately 1.1 percent of the total files ingested.\nAll errors were file-specific, and they didn't stop the pipeline."}),"\n",(0,o.jsxs)(s,{children:[(0,o.jsx)("summary",{children:"Ingestion performance test details"}),(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Ingestion dataset:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Total files: 1,083 items mounted"}),"\n",(0,o.jsx)(n.li,{children:"Total size on disk: 5,026,474,862 bytes (approximately 5.03 GB)"}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Hardware specifications:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Machine: Apple M4 Pro"}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Podman VM:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Name: podman-machine-default"}),"\n",(0,o.jsx)(n.li,{children:"Type: applehv"}),"\n",(0,o.jsx)(n.li,{children:"vCPUs: 7"}),"\n",(0,o.jsx)(n.li,{children:"Memory: 8 GiB"}),"\n",(0,o.jsx)(n.li,{children:"Disk size: 100 GiB"}),"\n"]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Test results:"}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-text",children:"2025-09-24T22:40:45.542190Z /app/src/main.py:231 Ingesting default documents when ready disable_langflow_ingest=False\n2025-09-24T22:40:45.546385Z /app/src/main.py:270 Using Langflow ingestion pipeline for default documents file_count=1082\n...\n2025-09-24T23:19:44.866365Z /app/src/main.py:351 Langflow ingestion completed success_count=1070 error_count=12 total_files=1082\n"})}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Elapsed time: Approximately 42 minutes 15 seconds (2,535 seconds)"}),"\n"]}),"\n",(0,o.jsxs)(n.li,{children:["\n",(0,o.jsx)(n.p,{children:"Throughput: Approximately 2.4 documents per second"}),"\n"]}),"\n"]})]}),"\n",(0,o.jsx)(n.h2,{id:"troubleshoot-ingestion",children:"Troubleshoot ingestion"}),"\n",(0,o.jsx)(n.p,{children:"The following issues can occur during document ingestion."}),"\n",(0,o.jsx)(n.h3,{id:"failed-or-slow-ingestion",children:"Failed or slow ingestion"}),"\n",(0,o.jsx)(n.p,{children:"If an ingestion task fails, do the following:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Make sure you are uploading supported file types."}),"\n",(0,o.jsx)(n.li,{children:"Split excessively large files into smaller files before uploading."}),"\n",(0,o.jsx)(n.li,{children:"Remove unusual embedded content, such as videos or animations, before uploading. Although Docling can replace some non-text content with placeholders during ingestion, some embedded content might cause errors."}),"\n",(0,o.jsxs)(n.li,{children:["Make sure your Podman/Docker VM has sufficient memory for the ingestion tasks.\nThe minimum recommendation is 8 GB of RAM.\nIf you regularly upload large files, more RAM is recommended.\nFor more information, see ",(0,o.jsx)(n.a,{href:"/support/troubleshoot#memory-issue-with-podman-on-macos",children:"Memory issue with Podman on macOS"})," and ",(0,o.jsx)(n.a,{href:"/support/troubleshoot#container-out-of-memory-errors",children:"Container out of memory errors"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:["If OCR ingestion fails due to OCR missing, see ",(0,o.jsx)(n.a,{href:"/support/troubleshoot#ocr-ingestion-fails-easyocr-not-installed",children:"OCR ingestion fails (easyocr not installed)"}),"."]}),"\n"]}),"\n",(0,o.jsx)(n.h3,{id:"problems-when-referencing-documents-in-chat",children:"Problems when referencing documents in chat"}),"\n",(0,o.jsxs)(n.p,{children:["If the OpenRAG ",(0,o.jsx)(n.strong,{children:"Chat"})," doesn't seem to use your documents correctly, ",(0,o.jsx)(n.a,{href:"/knowledge#browse-knowledge",children:"browse your knowledge base"})," to confirm that the documents are uploaded in full, and the chunks are correct."]}),"\n",(0,o.jsxs)(n.p,{children:["If the documents are present and well-formed, check your ",(0,o.jsx)(n.a,{href:"/knowledge-filters",children:"knowledge filters"}),".\nIf a global filter is applied, make sure the expected documents are included in the global filter.\nIf the global filter excludes any documents, the agent cannot access those documents unless you apply a chat-level filter or change the global filter."]}),"\n",(0,o.jsx)(n.p,{children:"If text is missing or incorrectly processed, you need to reupload the documents after modifying the ingestion parameters or the documents themselves.\nFor example:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Break combined documents into separate files for better metadata context."}),"\n",(0,o.jsxs)(n.li,{children:["Make sure scanned documents are legible enough for extraction, and enable the ",(0,o.jsx)(n.strong,{children:"OCR"})," option. Poorly scanned documents might require additional preparation or rescanning before ingestion."]}),"\n",(0,o.jsxs)(n.li,{children:["Adjust the ",(0,o.jsx)(n.strong,{children:"Chunk Size"})," and ",(0,o.jsx)(n.strong,{children:"Chunk Overlap"})," settings to better suit your documents. Larger chunks provide more context but can include irrelevant information, while smaller chunks yield more precise semantic search but can lack context."]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["For more information about modifying ingestion parameters and flows, see ",(0,o.jsx)(n.a,{href:"/knowledge#knowledge-ingestion-settings",children:"Knowledge ingestion settings"}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"see-also",children:"See also"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/knowledge",children:"Configure knowledge"})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/knowledge-filters",children:"Filter knowledge"})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/chat",children:"Chat with knowledge"})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"/agents#inspect-and-modify-flows",children:"Inspect and modify flows"})}),"\n"]})]})}function b(e={}){const{wrapper:n}={...(0,r.R)(),...e.components};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(w,{...e})}):w(e)}},8401:(e,n,s)=>{s.d(n,{Ay:()=>a,RM:()=>i});var t=s(4848),o=s(8453),r=s(9179);const i=[];function l(e){const n={p:"p",strong:"strong",...(0,o.R)(),...e.components};return(0,t.jsxs)(n.p,{children:["When using the OpenRAG ",(0,t.jsx)(n.strong,{children:"Chat"}),", click ",(0,t.jsx)(r.A,{name:"Plus","aria-hidden":"true"})," in the chat input field to upload a file to the current chat session.\nFiles added this way are processed and made available to the agent for the current conversation only.\nThese files aren't stored in the knowledge base permanently."]})}function a(e={}){const{wrapper:n}={...(0,o.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(l,{...e})}):l(e)}},9179:(e,n,s)=>{s.d(n,{A:()=>r});s(6540);var t=s(7856),o=s(4848);function r({name:e,...n}){const s=t[e];return s?(0,o.jsx)(s,{...n}):null}},9365:(e,n,s)=>{s.d(n,{A:()=>i});s(6540);var t=s(4164);const o={tabItem:"tabItem_Ymn6"};var r=s(4848);function i({children:e,hidden:n,className:s}){return(0,r.jsx)("div",{role:"tabpanel",className:(0,t.A)(o.tabItem,s),hidden:n,children:e})}}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.8df7d349.js b/assets/js/runtime~main.3e7bcd02.js similarity index 98% rename from assets/js/runtime~main.8df7d349.js rename to assets/js/runtime~main.3e7bcd02.js index e87d4ee9..32db5d6f 100644 --- a/assets/js/runtime~main.8df7d349.js +++ b/assets/js/runtime~main.3e7bcd02.js @@ -1 +1 @@ -(()=>{"use strict";var e,a,r,t,d,f={},c={};function o(e){var a=c[e];if(void 0!==a)return a.exports;var r=c[e]={exports:{}};return f[e].call(r.exports,r,r.exports,o),r.exports}o.m=f,e=[],o.O=(a,r,t,d)=>{if(!r){var f=1/0;for(i=0;i=d)&&Object.keys(o.O).every(e=>o.O[e](r[b]))?r.splice(b--,1):(c=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[r,t,d]},o.n=e=>{var a=e&&e.__esModule?()=>e.default:()=>e;return o.d(a,{a:a}),a},r=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,o.t=function(e,t){if(1&t&&(e=this(e)),8&t)return e;if("object"==typeof e&&e){if(4&t&&e.__esModule)return e;if(16&t&&"function"==typeof e.then)return e}var d=Object.create(null);o.r(d);var f={};a=a||[null,r({}),r([]),r(r)];for(var c=2&t&&e;("object"==typeof c||"function"==typeof c)&&!~a.indexOf(c);c=r(c))Object.getOwnPropertyNames(c).forEach(a=>f[a]=()=>e[a]);return f.default=()=>e,o.d(d,f),d},o.d=(e,a)=>{for(var r in a)o.o(a,r)&&!o.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:a[r]})},o.f={},o.e=e=>Promise.all(Object.keys(o.f).reduce((a,r)=>(o.f[r](e,a),a),[])),o.u=e=>"assets/js/"+({98:"af920ffe",571:"0ba6a408",961:"21afe7ac",1567:"22dd74f7",2272:"749371cc",2668:"eb5b356a",3207:"27b4a875",5490:"71478a5d",5668:"c5b64a02",5742:"aba21aa0",5750:"d0314b07",5848:"d03060d0",6152:"52896773",6190:"7b5e550f",6235:"915a24fd",6919:"ca2c3c0c",6976:"4ba45368",7098:"a7bd4aaa",8186:"03ef5215",8401:"17896441",8615:"ae242f53",8617:"441f609b",9026:"c8078f0a",9048:"a94703ab",9172:"e633a5ea",9532:"33362219",9647:"5e95c892"}[e]||e)+"."+{98:"fae170b0",165:"7b1d067d",291:"1b64972f",571:"e346183d",617:"e8fa27b8",961:"7f9f70e7",1e3:"f9af7a41",1203:"b618bb61",1567:"8f33e94e",1741:"b927934f",1746:"1ce4b99b",2130:"09232a19",2237:"70c48bab",2272:"fa610c63",2279:"44341cb1",2291:"2753844f",2325:"7c4239a7",2334:"6d98e48e",2492:"e6c78669",2668:"07349ef3",2821:"7e574346",3207:"e4ca90dd",3490:"026802f4",3624:"a9a49a51",3815:"f87fc96a",4218:"0662e556",4250:"77e8e6c6",4616:"0d0b0873",4802:"a8b2857c",4981:"61cf4b0a",5480:"23265b76",5490:"6bdf311d",5668:"8aade99d",5742:"2f625fe1",5750:"c52456be",5848:"4e243f92",5901:"f6ccb00b",5955:"14464ff3",5996:"b2d1663e",6152:"7edb6c39",6190:"a397b62a",6235:"34562fc1",6241:"a1fa513c",6319:"9020eb44",6366:"0c77f825",6567:"08800141",6919:"dc47ac79",6976:"9417b2ce",6992:"eb4cc2ed",7098:"9fea9356",7465:"7e0b8008",7592:"d6165eff",7873:"083fcc2e",7928:"5f633e47",8142:"5edfc34d",8186:"0f05b0ce",8249:"4f81048e",8401:"afd63cb1",8565:"6a562290",8615:"33b6f886",8617:"ca64d962",8731:"4bc22343",8756:"74d5516d",9026:"75fa4391",9032:"f4369ef2",9048:"fa9eaf65",9172:"88a60d4e",9412:"ca1dda85",9510:"81e2df6a",9532:"9fafa665",9647:"d28d5273"}[e]+".js",o.miniCssF=e=>{},o.o=(e,a)=>Object.prototype.hasOwnProperty.call(e,a),t={},d="openrag-docs:",o.l=(e,a,r,f)=>{if(t[e])t[e].push(a);else{var c,b;if(void 0!==r)for(var n=document.getElementsByTagName("script"),i=0;i{c.onerror=c.onload=null,clearTimeout(s);var d=t[e];if(delete t[e],c.parentNode&&c.parentNode.removeChild(c),d&&d.forEach(e=>e(r)),a)return a(r)},s=setTimeout(u.bind(null,void 0,{type:"timeout",target:c}),12e4);c.onerror=u.bind(null,c.onerror),c.onload=u.bind(null,c.onload),b&&document.head.appendChild(c)}},o.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},o.p="/",o.gca=function(e){return e={17896441:"8401",33362219:"9532",52896773:"6152",af920ffe:"98","0ba6a408":"571","21afe7ac":"961","22dd74f7":"1567","749371cc":"2272",eb5b356a:"2668","27b4a875":"3207","71478a5d":"5490",c5b64a02:"5668",aba21aa0:"5742",d0314b07:"5750",d03060d0:"5848","7b5e550f":"6190","915a24fd":"6235",ca2c3c0c:"6919","4ba45368":"6976",a7bd4aaa:"7098","03ef5215":"8186",ae242f53:"8615","441f609b":"8617",c8078f0a:"9026",a94703ab:"9048",e633a5ea:"9172","5e95c892":"9647"}[e]||e,o.p+o.u(e)},(()=>{var e={5354:0,1869:0};o.f.j=(a,r)=>{var t=o.o(e,a)?e[a]:void 0;if(0!==t)if(t)r.push(t[2]);else if(/^(1869|5354)$/.test(a))e[a]=0;else{var d=new Promise((r,d)=>t=e[a]=[r,d]);r.push(t[2]=d);var f=o.p+o.u(a),c=new Error;o.l(f,r=>{if(o.o(e,a)&&(0!==(t=e[a])&&(e[a]=void 0),t)){var d=r&&("load"===r.type?"missing":r.type),f=r&&r.target&&r.target.src;c.message="Loading chunk "+a+" failed.\n("+d+": "+f+")",c.name="ChunkLoadError",c.type=d,c.request=f,t[1](c)}},"chunk-"+a,a)}},o.O.j=a=>0===e[a];var a=(a,r)=>{var t,d,[f,c,b]=r,n=0;if(f.some(a=>0!==e[a])){for(t in c)o.o(c,t)&&(o.m[t]=c[t]);if(b)var i=b(o)}for(a&&a(r);n{"use strict";var e,a,r,t,d,f={},c={};function o(e){var a=c[e];if(void 0!==a)return a.exports;var r=c[e]={exports:{}};return f[e].call(r.exports,r,r.exports,o),r.exports}o.m=f,e=[],o.O=(a,r,t,d)=>{if(!r){var f=1/0;for(i=0;i=d)&&Object.keys(o.O).every(e=>o.O[e](r[b]))?r.splice(b--,1):(c=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[r,t,d]},o.n=e=>{var a=e&&e.__esModule?()=>e.default:()=>e;return o.d(a,{a:a}),a},r=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,o.t=function(e,t){if(1&t&&(e=this(e)),8&t)return e;if("object"==typeof e&&e){if(4&t&&e.__esModule)return e;if(16&t&&"function"==typeof e.then)return e}var d=Object.create(null);o.r(d);var f={};a=a||[null,r({}),r([]),r(r)];for(var c=2&t&&e;("object"==typeof c||"function"==typeof c)&&!~a.indexOf(c);c=r(c))Object.getOwnPropertyNames(c).forEach(a=>f[a]=()=>e[a]);return f.default=()=>e,o.d(d,f),d},o.d=(e,a)=>{for(var r in a)o.o(a,r)&&!o.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:a[r]})},o.f={},o.e=e=>Promise.all(Object.keys(o.f).reduce((a,r)=>(o.f[r](e,a),a),[])),o.u=e=>"assets/js/"+({98:"af920ffe",571:"0ba6a408",961:"21afe7ac",1567:"22dd74f7",2272:"749371cc",2668:"eb5b356a",3207:"27b4a875",5490:"71478a5d",5668:"c5b64a02",5742:"aba21aa0",5750:"d0314b07",5848:"d03060d0",6152:"52896773",6190:"7b5e550f",6235:"915a24fd",6919:"ca2c3c0c",6976:"4ba45368",7098:"a7bd4aaa",8186:"03ef5215",8401:"17896441",8615:"ae242f53",8617:"441f609b",9026:"c8078f0a",9048:"a94703ab",9172:"e633a5ea",9532:"33362219",9647:"5e95c892"}[e]||e)+"."+{98:"fae170b0",165:"7b1d067d",291:"1b64972f",571:"e346183d",617:"e8fa27b8",961:"7f9f70e7",1e3:"f9af7a41",1203:"b618bb61",1567:"8f33e94e",1741:"b927934f",1746:"1ce4b99b",2130:"09232a19",2237:"70c48bab",2272:"fa610c63",2279:"44341cb1",2291:"2753844f",2325:"7c4239a7",2334:"6d98e48e",2492:"e6c78669",2668:"07349ef3",2821:"7e574346",3207:"e4ca90dd",3490:"026802f4",3624:"a9a49a51",3815:"f87fc96a",4218:"0662e556",4250:"77e8e6c6",4616:"0d0b0873",4802:"a8b2857c",4981:"61cf4b0a",5480:"23265b76",5490:"6bdf311d",5668:"8aade99d",5742:"2f625fe1",5750:"c52456be",5848:"4e243f92",5901:"f6ccb00b",5955:"14464ff3",5996:"b2d1663e",6152:"7edb6c39",6190:"a397b62a",6235:"34562fc1",6241:"a1fa513c",6319:"9020eb44",6366:"0c77f825",6567:"08800141",6919:"f282f8bd",6976:"9417b2ce",6992:"eb4cc2ed",7098:"9fea9356",7465:"7e0b8008",7592:"d6165eff",7873:"083fcc2e",7928:"5f633e47",8142:"5edfc34d",8186:"0f05b0ce",8249:"4f81048e",8401:"afd63cb1",8565:"6a562290",8615:"33b6f886",8617:"ca64d962",8731:"4bc22343",8756:"74d5516d",9026:"75fa4391",9032:"f4369ef2",9048:"fa9eaf65",9172:"88a60d4e",9412:"ca1dda85",9510:"81e2df6a",9532:"9fafa665",9647:"d28d5273"}[e]+".js",o.miniCssF=e=>{},o.o=(e,a)=>Object.prototype.hasOwnProperty.call(e,a),t={},d="openrag-docs:",o.l=(e,a,r,f)=>{if(t[e])t[e].push(a);else{var c,b;if(void 0!==r)for(var n=document.getElementsByTagName("script"),i=0;i{c.onerror=c.onload=null,clearTimeout(s);var d=t[e];if(delete t[e],c.parentNode&&c.parentNode.removeChild(c),d&&d.forEach(e=>e(r)),a)return a(r)},s=setTimeout(u.bind(null,void 0,{type:"timeout",target:c}),12e4);c.onerror=u.bind(null,c.onerror),c.onload=u.bind(null,c.onload),b&&document.head.appendChild(c)}},o.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},o.p="/",o.gca=function(e){return e={17896441:"8401",33362219:"9532",52896773:"6152",af920ffe:"98","0ba6a408":"571","21afe7ac":"961","22dd74f7":"1567","749371cc":"2272",eb5b356a:"2668","27b4a875":"3207","71478a5d":"5490",c5b64a02:"5668",aba21aa0:"5742",d0314b07:"5750",d03060d0:"5848","7b5e550f":"6190","915a24fd":"6235",ca2c3c0c:"6919","4ba45368":"6976",a7bd4aaa:"7098","03ef5215":"8186",ae242f53:"8615","441f609b":"8617",c8078f0a:"9026",a94703ab:"9048",e633a5ea:"9172","5e95c892":"9647"}[e]||e,o.p+o.u(e)},(()=>{var e={5354:0,1869:0};o.f.j=(a,r)=>{var t=o.o(e,a)?e[a]:void 0;if(0!==t)if(t)r.push(t[2]);else if(/^(1869|5354)$/.test(a))e[a]=0;else{var d=new Promise((r,d)=>t=e[a]=[r,d]);r.push(t[2]=d);var f=o.p+o.u(a),c=new Error;o.l(f,r=>{if(o.o(e,a)&&(0!==(t=e[a])&&(e[a]=void 0),t)){var d=r&&("load"===r.type?"missing":r.type),f=r&&r.target&&r.target.src;c.message="Loading chunk "+a+" failed.\n("+d+": "+f+")",c.name="ChunkLoadError",c.type=d,c.request=f,t[1](c)}},"chunk-"+a,a)}},o.O.j=a=>0===e[a];var a=(a,r)=>{var t,d,[f,c,b]=r,n=0;if(f.some(a=>0!==e[a])){for(t in c)o.o(c,t)&&(o.m[t]=c[t]);if(b)var i=b(o)}for(a&&a(r);n Chat in OpenRAG | OpenRAG - + diff --git a/docker/index.html b/docker/index.html index 14a0156a..7b00367f 100644 --- a/docker/index.html +++ b/docker/index.html @@ -4,7 +4,7 @@ Deploy OpenRAG with self-managed services | OpenRAG - + diff --git a/index.html b/index.html index d99e9b51..1102f75a 100644 --- a/index.html +++ b/index.html @@ -4,7 +4,7 @@ What is OpenRAG? | OpenRAG - + diff --git a/ingestion/index.html b/ingestion/index.html index 6c9d8454..9a3b46eb 100644 --- a/ingestion/index.html +++ b/ingestion/index.html @@ -4,7 +4,7 @@ Ingest knowledge | OpenRAG - + @@ -197,9 +197,9 @@ By default, this flow uses Docling Serve to import and process documents.

You can monitor ingestion to see the progress of the uploads and check for failed uploads.

Ingest knowledge from URLs

When using the OpenRAG chat, you can enter URLs into the chat to be ingested in real-time during your conversation.

-
tip

Use UTF-8 encoding for URLs with special characters other than the standard slash, period, and colon characters. -For example, use https://en.wikipedia.org/wiki/Caf%C3%A9 instead of https://en.wikipedia.org/wiki/Café or https://en.wikipedia.org/wiki/Coffee%5Fculture instead of https://en.wikipedia.org/wiki/Coffee_culture.

-

The OpenSearch URL Ingestion flow is used to ingest web content from URLs. +

info

The chat cannot ingest URLs that end in static document file extensions like .pdf. +To upload these types of files, see Ingest local files and folders and Ingest files with OAuth connectors.

+

OpenRAG runs the OpenSearch URL Ingestion flow to ingest web content from URLs. This flow isn't directly accessible from the OpenRAG user interface. Instead, this flow is called by the OpenRAG OpenSearch Agent flow as a Model Context Protocol (MCP) tool. The agent can call this component to fetch web content from a given URL, and then ingest that content into your OpenSearch knowledge base. diff --git a/install-options/index.html b/install-options/index.html index e7ec0cdb..e21fc2c7 100644 --- a/install-options/index.html +++ b/install-options/index.html @@ -4,7 +4,7 @@ Select an installation method | OpenRAG - + diff --git a/install-uv/index.html b/install-uv/index.html index f5a0971d..0253f85f 100644 --- a/install-uv/index.html +++ b/install-uv/index.html @@ -4,7 +4,7 @@ Install OpenRAG in a Python project with uv | OpenRAG - + diff --git a/install-uvx/index.html b/install-uvx/index.html index 901e50eb..8f5428ee 100644 --- a/install-uvx/index.html +++ b/install-uvx/index.html @@ -4,7 +4,7 @@ Invoke OpenRAG with uvx | OpenRAG - + diff --git a/install-windows/index.html b/install-windows/index.html index 0a6107a3..1221f961 100644 --- a/install-windows/index.html +++ b/install-windows/index.html @@ -4,7 +4,7 @@ Install OpenRAG on Microsoft Windows | OpenRAG - + diff --git a/install/index.html b/install/index.html index 904b50d6..cb7ba73e 100644 --- a/install/index.html +++ b/install/index.html @@ -4,7 +4,7 @@ Install OpenRAG with the automatic installer script | OpenRAG - + diff --git a/knowledge-filters/index.html b/knowledge-filters/index.html index d6926b1c..465646e6 100644 --- a/knowledge-filters/index.html +++ b/knowledge-filters/index.html @@ -4,7 +4,7 @@ Filter knowledge | OpenRAG - + diff --git a/knowledge/index.html b/knowledge/index.html index 6af971c1..c9d0f51e 100644 --- a/knowledge/index.html +++ b/knowledge/index.html @@ -4,7 +4,7 @@ Configure knowledge | OpenRAG - + diff --git a/manage-services/index.html b/manage-services/index.html index 7c99aba9..4605ff39 100644 --- a/manage-services/index.html +++ b/manage-services/index.html @@ -4,7 +4,7 @@ Manage OpenRAG containers and services | OpenRAG - + diff --git a/quickstart/index.html b/quickstart/index.html index 39933620..24fcfab2 100644 --- a/quickstart/index.html +++ b/quickstart/index.html @@ -4,7 +4,7 @@ Quickstart | OpenRAG - + diff --git a/reference/configuration/index.html b/reference/configuration/index.html index 8c66f5ba..923f26a5 100644 --- a/reference/configuration/index.html +++ b/reference/configuration/index.html @@ -4,7 +4,7 @@ Environment variables | OpenRAG - + diff --git a/reinstall/index.html b/reinstall/index.html index 8874ac41..05cf6729 100644 --- a/reinstall/index.html +++ b/reinstall/index.html @@ -4,7 +4,7 @@ Reinstall OpenRAG | OpenRAG - + diff --git a/support/contribute/index.html b/support/contribute/index.html index 89c91159..305b6f6f 100644 --- a/support/contribute/index.html +++ b/support/contribute/index.html @@ -4,7 +4,7 @@ Contribute to OpenRAG | OpenRAG - + diff --git a/support/troubleshoot/index.html b/support/troubleshoot/index.html index 9021947c..c0029632 100644 --- a/support/troubleshoot/index.html +++ b/support/troubleshoot/index.html @@ -4,7 +4,7 @@ Troubleshoot OpenRAG | OpenRAG - + diff --git a/tui/index.html b/tui/index.html index 3b00389f..0dde6aaa 100644 --- a/tui/index.html +++ b/tui/index.html @@ -4,7 +4,7 @@ Use the TUI | OpenRAG - + diff --git a/uninstall/index.html b/uninstall/index.html index dba5c30a..67e0822e 100644 --- a/uninstall/index.html +++ b/uninstall/index.html @@ -4,7 +4,7 @@ Remove OpenRAG | OpenRAG - + diff --git a/upgrade/index.html b/upgrade/index.html index 71011d29..2e2c694b 100644 --- a/upgrade/index.html +++ b/upgrade/index.html @@ -4,7 +4,7 @@ Upgrade OpenRAG | OpenRAG - +