openrag/assets/js/e633a5ea.380cba07.js


			
				
				
					
						
						
						
							
							
							"use strict";(self.webpackChunkopenrag_docs=self.webpackChunkopenrag_docs||[]).push([[9172],{9359:(e,n,r)=>{r.r(n),r.d(n,{assets:()=>l,contentTitle:()=>a,default:()=>h,frontMatter:()=>o,metadata:()=>s,toc:()=>c});const s=JSON.parse('{"id":"get-started/what-is-openrag","title":"What is OpenRAG?","description":"OpenRAG is an open-source package for building agentic RAG systems that integrates with a wide range of orchestration tools, vector databases, and LLM providers.","source":"@site/docs/get-started/what-is-openrag.mdx","sourceDirName":"get-started","slug":"/","permalink":"/","draft":false,"unlisted":false,"editUrl":"https://github.com/openrag/openrag/tree/main/docs/docs/get-started/what-is-openrag.mdx","tags":[],"version":"current","frontMatter":{"title":"What is OpenRAG?","slug":"/"},"sidebar":"tutorialSidebar","next":{"title":"Install OpenRAG with TUI","permalink":"/install"}}');var t=r(4848),i=r(8453);const o={title:"What is OpenRAG?",slug:"/"},a=void 0,l={},c=[{value:"OpenRAG architecture",id:"openrag-architecture",level:2},{value:"Performance expectations",id:"performance-expectations",level:2}];function d(e){const n={a:"a",code:"code",h2:"h2",li:"li",mermaid:"mermaid",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,i.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(n.p,{children:"OpenRAG is an open-source package for building agentic RAG systems that integrates with a wide range of orchestration tools, vector databases, and LLM providers."}),"\n",(0,t.jsx)(n.p,{children:"OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["\n",(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.a,{href:"https://docs.langflow.org",children:"Langflow"}),": Langflow is a versatile tool for building and deploying AI agents and MCP servers. It supports all major LLMs, vector databases, and a growing library of AI tools."]}),"\n"]}),"\n",(0,t.jsxs)(n.li,{children:["\n",(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.a,{href:"https://docs.opensearch.org/latest/",children:"OpenSearch"}),": OpenSearch is a community-driven, Apache 2.0-licensed open source search and analytics suite that makes it easy to ingest, search, visualize, and analyze data."]}),"\n"]}),"\n",(0,t.jsxs)(n.li,{children:["\n",(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.a,{href:"https://docling-project.github.io/docling/",children:"Docling"}),": Docling simplifies document processing, parsing diverse formats \u2014 including advanced PDF understanding \u2014 and providing seamless integrations with the gen AI ecosystem."]}),"\n"]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"OpenRAG builds on Langflow's familiar interface while adding OpenSearch for vector storage and Docling for simplified document parsing, with opinionated flows that serve as ready-to-use recipes for ingestion, retrieval, and generation from popular sources like Google Drive, OneDrive, and Sharepoint."}),"\n",(0,t.jsx)(n.p,{children:"What's more, every part of the stack is swappable. Write your own custom components in Python, try different language models, and customize your flows to build an agentic RAG system."}),"\n",(0,t.jsxs)(n.p,{children:["Ready to get started? ",(0,t.jsx)(n.a,{href:"/install",children:"Install OpenRAG"})," and then run the ",(0,t.jsx)(n.a,{href:"/quickstart",children:"Quickstart"})," to create a powerful RAG pipeline."]}),"\n",(0,t.jsx)(n.h2,{id:"openrag-architecture",children:"OpenRAG architecture"}),"\n",(0,t.jsxs)(n.p,{children:["OpenRAG deploys and orchestrates a lightweight, container-based architecture that combines ",(0,t.jsx)(n.strong,{children:"Langflow"}),", ",(0,t.jsx)(n.strong,{children:"OpenSearch"}),", and ",(0,t.jsx)(n.strong,{children:"Docling"})," into a cohesive RAG platform."]}),"\n",(0,t.jsx)(n.mermaid,{value:'%%{init: {\'theme\': \'dark\', \'flowchart\': {\'useMaxWidth\': false, \'width\': \'100%\'}}}%%\nflowchart LR\n    %% Encapsulate the entire diagram in a rectangle with black background\n    subgraph DiagramContainer["OpenRAG Architecture"]\n        style DiagramContainer fill:#000000,stroke:#ffffff,color:white,stroke-width:2px\n\n        %% Define subgraphs for the different sections\n        subgraph LocalService["Local Service"]\n            DoclingSrv[Docling Serve]\n            style DoclingSrv fill:#a8d1ff,stroke:#0066cc,color:black,stroke-width:2px\n        end\n\n        subgraph Containers\n            Backend["OpenRAG Backend"]\n            style Backend fill:#e6ffe6,stroke:#006600,color:black,stroke-width:2px\n            Langflow\n            style Langflow fill:#e6ffe6,stroke:#006600,color:black,stroke-width:2px\n            OpenSearch\n            style OpenSearch fill:#e6ffe6,stroke:#006600,color:black,stroke-width:2px\n            Frontend["OpenRAG Frontend"]\n            style Frontend fill:#ffcc99,stroke:#ff6600,color:black,stroke-width:2px\n        end\n\n        subgraph ThirdParty["Third Party Services"]\n            GoogleDrive["Google Drive"]\n            style GoogleDrive fill:#f2e6ff,stroke:#6600cc,color:black,stroke-width:2px\n            OneDrive\n            style OneDrive fill:#f2e6ff,stroke:#6600cc,color:black,stroke-width:2px\n            SharePoint["SharePoint"]\n            style SharePoint fill:#f2e6ff,stroke:#6600cc,color:black,stroke-width:2px\n            More[...]\n            style More fill:#f2e6ff,stroke:#6600cc,color:black,stroke-width:2px\n        end\n\n        %% Define connections\n        DoclingSrv --\x3e Backend\n        GoogleDrive --\x3e Backend\n        OneDrive --\x3e Backend\n        SharePoint --\x3e Backend\n        More --\x3e Backend\n        Backend --\x3e Langflow\n        Langflow <--\x3e OpenSearch\n        Backend <--\x3e Frontend\n\n        %% Style subgraphs\n        style LocalService fill:#333333,stroke:#666666,color:white,stroke-width:2px\n        style Containers fill:#444444,stroke:#666666,color:white,stroke-width:2px\n        style ThirdParty fill:#333333,stroke:#666666,color:white,stroke-width:2px\n    end'}),"\n",(0,t.jsxs)(n.p,{children:["The ",(0,t.jsx)(n.strong,{children:"OpenRAG Backend"})," is the central orchestration service that coordinates all other components."]}),"\n",(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Langflow"})," provides a visual workflow engine for building AI agents, and connects to ",(0,t.jsx)(n.strong,{children:"OpenSearch"})," for vector storage and retrieval."]}),"\n",(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Docling Serve"})," is a local document processing service managed by the ",(0,t.jsx)(n.strong,{children:"OpenRAG Backend"}),"."]}),"\n",(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Third Party Services"})," like ",(0,t.jsx)(n.strong,{children:"Google Drive"})," connect to the ",(0,t.jsx)(n.strong,{children:"OpenRAG Backend"})," through OAuth authentication, allowing synchronication of cloud storage with the OpenSearch knowledge base."]}),"\n",(0,t.jsxs)(n.p,{children:["The ",(0,t.jsx)(n.strong,{children:"OpenRAG Frontend"})," provides the user interface for interacting with the system."]}),"\n",(0,t.jsx)(n.h2,{id:"performance-expectations",children:"Performance expectations"}),"\n",(0,t.jsx)(n.p,{children:"On a local VM with 7 vCPUs and 8\u202fGiB RAM, OpenRAG ingested approximately 5.03 GB across 1,083 files in about 42 minutes.\nThis equates to approximately 2.4 documents per second."}),"\n",(0,t.jsx)(n.p,{children:"You can generally expect equal or better performance on developer laptops and significantly faster on servers.\nThroughput scales with CPU cores, memory, storage speed, and configuration choices such as embedding model, chunk size and overlap, and concurrency."}),"\n",(0,t.jsx)(n.p,{children:"This test returned 12 errors (approximately 1.1%).\nAll errors were file\u2011specific, and they didn't stop the pipeline."}),"\n",(0,t.jsx)(n.p,{children:"Ingestion dataset:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"Total files: 1,083 items mounted"}),"\n",(0,t.jsx)(n.li,{children:"Total size on disk: 5,026,474,862 bytes (approximately 5.03 GB)"}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"Hardware specifications:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"Machine: Apple M4 Pro"}),"\n",(0,t.jsxs)(n.li,{children:["Podman VM:","\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["Name: ",(0,t.jsx)(n.code,{children:"podman-machine-default"})]}),"\n",(0,t.jsxs)(n.li,{children:["Type: ",(0,t.jsx)(n.code,{children:"applehv"})]}),"\n",(0,t.jsx)(n.li,{children:"vCPUs: 7"}),"\n",(0,t.jsx)(n.li,{children:"Memory: 8 GiB"}),"\n",(0,t.jsx)(n.li,{children:"Disk size: 100 GiB"}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"Test results:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-text",children:"2025-09-24T22:40:45.542190Z /app/src/main.py:231 Ingesting default documents when ready disable_langflow_ingest=False\n2025-09-24T22:40:45.546385Z /app/src/main.py:270 Using Langflow ingestion pipeline for default documents file_count=1082\n...\n2025-09-24T23:19:44.866365Z /app/src/main.py:351 Langflow ingestion completed success_count=1070 error_count=12 total_files=1082\n"})}),"\n",(0,t.jsx)(n.p,{children:"Elapsed time: ~42 minutes 15 seconds (2,535 seconds)"}),"\n",(0,t.jsx)(n.p,{children:"Throughput: ~2.4 documents/second"})]})}function h(e={}){const{wrapper:n}={...(0,i.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(d,{...e})}):d(e)}}}]);
						
						
					
				
				
					
						Reference in a new issue
					
					View git blame
					Copy permalink