From 340bef217572e60585bb0425b41e562a0c24b135 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Mon, 29 Sep 2025 11:16:55 -0600 Subject: [PATCH 01/77] fix: issues with chunk copy --- frontend/src/app/knowledge/chunks/page.tsx | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index cdc9fcc3..d680ebc5 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -2,6 +2,7 @@ import { ArrowLeft, + Check, Copy, File as FileIcon, Loader2, @@ -41,6 +42,9 @@ function ChunksPageContent() { ChunkResult[] >([]); const [selectedChunks, setSelectedChunks] = useState>(new Set()); + const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< + number | null + >(null); // Calculate average chunk length const averageChunkLength = useMemo( @@ -70,8 +74,11 @@ function ChunksPageContent() { } }, [queryInputText, chunks]); - const handleCopy = useCallback((text: string) => { - navigator.clipboard.writeText(text); + const handleCopy = useCallback((text: string, index: number) => { + // Trime whitespace and remove new lines/tabs for cleaner copy + navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); + setActiveCopiedChunkIndex(index); + setTimeout(() => setActiveCopiedChunkIndex(null), 30 * 1000); // 30 seconds }, []); const fileData = (data as File[]).find( @@ -86,7 +93,7 @@ function ChunksPageContent() { } setChunks(fileData?.chunks || []); - }, [data, filename]); + }, [data, filename, setChunks, fileData]); // Set selected state for all checkboxes when selectAll changes useEffect(() => { @@ -238,11 +245,15 @@ function ChunksPageContent() {
From 677a01d634cc0f670a308d4da3cb7842be243f38 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Mon, 29 Sep 2025 11:17:41 -0600 Subject: [PATCH 02/77] fix typo --- frontend/src/app/knowledge/chunks/page.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index d680ebc5..264af4dc 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -75,7 +75,7 @@ function ChunksPageContent() { }, [queryInputText, chunks]); const handleCopy = useCallback((text: string, index: number) => { - // Trime whitespace and remove new lines/tabs for cleaner copy + // Trim whitespace and remove new lines/tabs for cleaner copy navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); setActiveCopiedChunkIndex(index); setTimeout(() => setActiveCopiedChunkIndex(null), 30 * 1000); // 30 seconds From f45441a9e5986c4ecbc2252387ed376b6f37426a Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Mon, 29 Sep 2025 17:09:53 -0400 Subject: [PATCH 03/77] slight-cleanup --- docs/docs/configure/configuration.md | 127 +++++++++++++++++---------- 1 file changed, 80 insertions(+), 47 deletions(-) diff --git a/docs/docs/configure/configuration.md b/docs/docs/configure/configuration.md index 2387c2ce..79a5226a 100644 --- a/docs/docs/configure/configuration.md +++ b/docs/docs/configure/configuration.md @@ -1,48 +1,20 @@ --- -title: Configuration +title: Environment variables and configuration values slug: /configure/configuration --- -# Configuration - OpenRAG supports multiple configuration methods with the following priority: 1. **Environment Variables** (highest priority) 2. **Configuration File** (`config.yaml`) -3. **Langflow Flow Settings** (runtime override) -4. **Default Values** (fallback) +3. **Default Values** (fallback) -## Configuration File +## Environment variables -Create a `config.yaml` file in the project root to configure OpenRAG: +Environment variables will override configuration file settings. +You can create a `.env` file in the project root to set these variables. -```yaml -# OpenRAG Configuration File -provider: - model_provider: "openai" # openai, anthropic, azure, etc. - api_key: "your-api-key" # or use OPENAI_API_KEY env var - -knowledge: - embedding_model: "text-embedding-3-small" - chunk_size: 1000 - chunk_overlap: 200 - ocr: true - picture_descriptions: false - -agent: - llm_model: "gpt-4o-mini" - system_prompt: "You are a helpful AI assistant..." -``` - -## Environment Variables - -Environment variables will override configuration file settings. You can still use `.env` files: - -```bash -cp .env.example .env -``` - -## Required Variables +## Required variables | Variable | Description | | ----------------------------- | ------------------------------------------- | @@ -54,7 +26,7 @@ cp .env.example .env | `LANGFLOW_INGEST_FLOW_ID` | ID of your Langflow ingestion flow | | `NUDGES_FLOW_ID` | ID of your Langflow nudges/suggestions flow | -## Ingestion Configuration +## Ingestion configuration | Variable | Description | | ------------------------------ | ------------------------------------------------------ | @@ -63,10 +35,14 @@ cp .env.example .env - `false` or unset: Uses Langflow pipeline (upload → ingest → delete) - `true`: Uses traditional OpenRAG processor for document ingestion -## Optional Variables +## Optional variables | Variable | Description | | ------------------------------------------------------------------------- | ------------------------------------------------------------------ | +| `OPENSEARCH_HOST` | OpenSearch host (default: `localhost`) | +| `OPENSEARCH_PORT` | OpenSearch port (default: `9200`) | +| `OPENSEARCH_USERNAME` | OpenSearch username (default: `admin`) | +| `LANGFLOW_URL` | Langflow URL (default: `http://localhost:7860`) | | `LANGFLOW_PUBLIC_URL` | Public URL for Langflow (default: `http://localhost:7860`) | | `GOOGLE_OAUTH_CLIENT_ID` / `GOOGLE_OAUTH_CLIENT_SECRET` | Google OAuth authentication | | `MICROSOFT_GRAPH_OAUTH_CLIENT_ID` / `MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET` | Microsoft OAuth | @@ -75,20 +51,27 @@ cp .env.example .env | `SESSION_SECRET` | Session management (default: auto-generated, change in production) | | `LANGFLOW_KEY` | Explicit Langflow API key (auto-generated if not provided) | | `LANGFLOW_SECRET_KEY` | Secret key for Langflow internal operations | +| `DOCLING_OCR_ENGINE` | OCR engine for document processing | +| `LANGFLOW_AUTO_LOGIN` | Enable auto-login for Langflow (default: `False`) | +| `LANGFLOW_NEW_USER_IS_ACTIVE` | New users are active by default (default: `False`) | +| `LANGFLOW_ENABLE_SUPERUSER_CLI` | Enable superuser CLI (default: `False`) | +| `OPENRAG_DOCUMENTS_PATHS` | Document paths for ingestion (default: `./documents`) | -## OpenRAG Configuration Variables +## OpenRAG configuration variables These environment variables override settings in `config.yaml`: -### Provider Settings +### Provider settings -| Variable | Description | Default | -| ------------------ | ---------------------------------------- | -------- | -| `MODEL_PROVIDER` | Model provider (openai, anthropic, etc.) | `openai` | -| `PROVIDER_API_KEY` | API key for the model provider | | -| `OPENAI_API_KEY` | OpenAI API key (backward compatibility) | | +| Variable | Description | Default | +| -------------------- | ---------------------------------------- | -------- | +| `MODEL_PROVIDER` | Model provider (openai, anthropic, etc.) | `openai` | +| `PROVIDER_API_KEY` | API key for the model provider | | +| `PROVIDER_ENDPOINT` | Custom provider endpoint (e.g., Watson) | | +| `PROVIDER_PROJECT_ID`| Project ID for providers (e.g., Watson) | | +| `OPENAI_API_KEY` | OpenAI API key (backward compatibility) | | -### Knowledge Settings +### Knowledge settings | Variable | Description | Default | | ------------------------------ | --------------------------------------- | ------------------------ | @@ -98,11 +81,61 @@ These environment variables override settings in `config.yaml`: | `OCR_ENABLED` | Enable OCR for image processing | `true` | | `PICTURE_DESCRIPTIONS_ENABLED` | Enable picture descriptions | `false` | -### Agent Settings +### Agent settings | Variable | Description | Default | | --------------- | --------------------------------- | ------------------------ | | `LLM_MODEL` | Language model for the chat agent | `gpt-4o-mini` | -| `SYSTEM_PROMPT` | System prompt for the agent | Default assistant prompt | +| `SYSTEM_PROMPT` | System prompt for the agent | "You are a helpful AI assistant with access to a knowledge base. Answer questions based on the provided context." | -See `.env.example` for a complete list with descriptions, and `docker-compose*.yml` for runtime usage. +See `docker-compose-*.yml` files for runtime usage examples. + +## Configuration file + +Create a `config.yaml` file in the project root to configure OpenRAG: + +```yaml +# OpenRAG Configuration File +provider: + model_provider: "openai" # openai, anthropic, azure, etc. + api_key: "your-api-key" # or use OPENAI_API_KEY env var + endpoint: "" # For custom provider endpoints (e.g., Watson/IBM) + project_id: "" # For providers that need project IDs (e.g., Watson/IBM) + +knowledge: + embedding_model: "text-embedding-3-small" + chunk_size: 1000 + chunk_overlap: 200 + doclingPresets: "standard" # standard, ocr, picture_description, VLM + ocr: true + picture_descriptions: false + +agent: + llm_model: "gpt-4o-mini" + system_prompt: "You are a helpful AI assistant with access to a knowledge base. Answer questions based on the provided context." +``` + +## Default Values and Fallbacks + +When no environment variables or configuration file values are provided, OpenRAG uses default values. +These values can be found in the code base at the following locations. + +### OpenRAG configuration defaults + +These values are are defined in `src/config/config_manager.py`. + +### System configuration defaults + +These fallback values are defined in `src/config/settings.py`. + +### TUI default values + +These values are defined in `src/tui/managers/env_manager.py`. + +### Frontend default values + +These values are defined in `frontend/src/lib/constants.ts`. + +### Docling preset configurations + +These values are defined in `src/api/settings.py`. \ No newline at end of file From 90e5ed3d2dfe86e6b93ff24a215aae60227bfff0 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Mon, 29 Sep 2025 17:25:03 -0400 Subject: [PATCH 04/77] tweaks-override-defaults --- README.md | 2 +- .../{configure => reference}/configuration.md | 27 ++++++++++++------- docs/sidebars.js | 12 +++------ 3 files changed, 22 insertions(+), 19 deletions(-) rename docs/docs/{configure => reference}/configuration.md (88%) diff --git a/README.md b/README.md index d79011a0..bc020dae 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id NUDGES_FLOW_ID=your_nudges_flow_id ``` -See extended configuration, including ingestion and optional variables: [docs/configure/configuration.md](docs/docs/configure/configuration.md) +See extended configuration, including ingestion and optional variables: [docs/reference/configuration.md](docs/docs/reference/configuration.md) ### 3. Start OpenRAG ```bash diff --git a/docs/docs/configure/configuration.md b/docs/docs/reference/configuration.md similarity index 88% rename from docs/docs/configure/configuration.md rename to docs/docs/reference/configuration.md index 79a5226a..a8aeff5f 100644 --- a/docs/docs/configure/configuration.md +++ b/docs/docs/reference/configuration.md @@ -1,18 +1,19 @@ --- title: Environment variables and configuration values -slug: /configure/configuration +slug: /reference/configuration --- -OpenRAG supports multiple configuration methods with the following priority: +OpenRAG supports multiple configuration methods with the following priority, from highest to lowest: -1. **Environment Variables** (highest priority) -2. **Configuration File** (`config.yaml`) -3. **Default Values** (fallback) +1. [Environment variables](#environment-variables) +2. [Configuration file (`config.yaml`)](#configuration-file) +3. [Langflow runtime overrides](#langflow-runtime-overrides) +4. [Default or fallback values](#default-values-and-fallbacks) ## Environment variables -Environment variables will override configuration file settings. -You can create a `.env` file in the project root to set these variables. +Environment variables override configuration file settings. +You can create a `.env` file in the project root to set these variables, or set them in the TUI, which will create a `.env` file for you. ## Required variables @@ -90,7 +91,15 @@ These environment variables override settings in `config.yaml`: See `docker-compose-*.yml` files for runtime usage examples. -## Configuration file +## Langflow runtime overrides + +Langflow runtime overrides allow you to modify component settings at runtime without changing the base configuration. + +Runtime overrides are implemented through **tweaks** - parameter modifications that are passed to specific Langflow components during flow execution. + +For more information on tweaks, see [Input schema (tweaks)](https://docs.langflow.org/concepts-publish#input-schema). + +## Configuration file (`config.yaml) {#configuration-file} Create a `config.yaml` file in the project root to configure OpenRAG: @@ -115,7 +124,7 @@ agent: system_prompt: "You are a helpful AI assistant with access to a knowledge base. Answer questions based on the provided context." ``` -## Default Values and Fallbacks +## Default values and fallbacks When no environment variables or configuration file values are provided, OpenRAG uses default values. These values can be found in the code base at the following locations. diff --git a/docs/sidebars.js b/docs/sidebars.js index 3048cb70..1e81a3cd 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -65,19 +65,13 @@ const sidebars = { }, { type: "category", - label: "Configuration", + label: "Reference", items: [ { type: "doc", - id: "configure/configuration", - label: "Environment Variables" + id: "reference/configuration", + label: "Environment Variables and Configuration File" }, - ], - }, - { - type: "category", - label: "Reference", - items: [ { type: "doc", id: "reference/troubleshooting", From 5a4cb3fa85476627c1c690d0e17c4b83a928b08f Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Mon, 29 Sep 2025 17:46:11 -0400 Subject: [PATCH 05/77] fix-build-errors --- docs/docs/get-started/install.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/get-started/install.mdx b/docs/docs/get-started/install.mdx index dcb5c5f1..65693875 100644 --- a/docs/docs/get-started/install.mdx +++ b/docs/docs/get-started/install.mdx @@ -178,8 +178,8 @@ To install OpenRAG with Docker Compose: LANGFLOW_SUPERUSER_PASSWORD=your_langflow_password LANGFLOW_SECRET_KEY=your_secret_key ``` - For more information on configuring OpenRAG with environment variables, see [Environment variables](/configure/configuration). - For additional configuration values, including `config.yaml`, see [Configuration](/configure/configuration). + For more information on configuring OpenRAG with environment variables, see [Environment variables](/reference/configuration). + For additional configuration values, including `config.yaml`, see [Configuration](/reference/configuration). 4. Deploy OpenRAG with Docker Compose based on your deployment type. From ddeeb7e5b6bbfd4fd86950aa83574e1c29499bc6 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 30 Sep 2025 09:55:11 -0400 Subject: [PATCH 06/77] fix-error --- docs/docs/get-started/docker.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/get-started/docker.mdx b/docs/docs/get-started/docker.mdx index 84f0fca6..594a26b9 100644 --- a/docs/docs/get-started/docker.mdx +++ b/docs/docs/get-started/docker.mdx @@ -39,8 +39,8 @@ To install OpenRAG with Docker Compose: LANGFLOW_SUPERUSER_PASSWORD=your_langflow_password LANGFLOW_SECRET_KEY=your_secret_key ``` - For more information on configuring OpenRAG with environment variables, see [Environment variables](/configure/configuration). - For additional configuration values, including `config.yaml`, see [Configuration](/configure/configuration). + For more information on configuring OpenRAG with environment variables, see [Environment variables](/reference/configuration). + For additional configuration values, including `config.yaml`, see [Configuration](/reference/configuration). 4. Deploy OpenRAG with Docker Compose based on your deployment type. From 717b864fec13bc486bb6002360a6e4e8acc76882 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:21:42 -0400 Subject: [PATCH 07/77] partial-for-onboarding --- docs/docs/_partial-onboarding.mdx | 51 +++++++++++++++++++++++++++++++ docs/docs/get-started/docker.mdx | 7 +++-- docs/docs/get-started/install.mdx | 41 ++----------------------- 3 files changed, 59 insertions(+), 40 deletions(-) create mode 100644 docs/docs/_partial-onboarding.mdx diff --git a/docs/docs/_partial-onboarding.mdx b/docs/docs/_partial-onboarding.mdx new file mode 100644 index 00000000..aaead4ad --- /dev/null +++ b/docs/docs/_partial-onboarding.mdx @@ -0,0 +1,51 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +### Application onboarding + +The first time you start OpenRAG, whether using the TUI or a `.env` file, a `config.yaml` file is generated if OpenRAG detects one doesn't exist. +The `config.yaml` file controls application configuration, including language model and embedding model provider, Docling ingestion settings, and API keys. + +Values input during onboarding can be changed later in the OpenRAG **Settings** page, except for the language model and embedding model _provider_. The provider can only be selected during onboarding, and you must use the same provider for your language model and embedding model. + +1. Select your language model and embedding model provider, and complete the required fields. + **Your provider can only be selected once, and you must use the same provider for your language model and embedding model.** + The language model can be changed, but the embeddings model cannot be changed. + To change your provider selection, you must restart OpenRAG and delete the `config.yml` file. + + + + 2. If you already entered a value for `OPENAI_API_KEY` in the TUI in Step 5, enable **Get API key from environment variable**. + 3. Under **Advanced settings**, select your **Embedding Model** and **Language Model**. + 4. To load 2 sample PDFs, enable **Sample dataset**. + This is recommended, but not required. + 5. Click **Complete**. + + + + 2. Complete the fields for **watsonx.ai API Endpoint**, **IBM API key**, and **IBM Project ID**. + These values are found in your IBM watsonx deployment. + 3. Under **Advanced settings**, select your **Embedding Model** and **Language Model**. + 4. To load 2 sample PDFs, enable **Sample dataset**. + This is recommended, but not required. + 5. Click **Complete**. + + + + :::tip + Ollama is not included with OpenRAG. To install Ollama, see the [Ollama documentation](https://docs.ollama.com/). + ::: + 2. Enter your Ollama server's base URL address. + The default Ollama server address is `http://localhost:11434`. + Since OpenRAG is running in a container, you may need to change `localhost` to access services outside of the container. For example, change `http://localhost:11434` to `http://host.docker.internal:11434` to connect to Ollama. + OpenRAG automatically sends a test connection to your Ollama server to confirm connectivity. + 3. Select the **Embedding Model** and **Language Model** your Ollama server is running. + OpenRAG automatically lists the available models from your Ollama server. + 4. To load 2 sample PDFs, enable **Sample dataset**. + This is recommended, but not required. + 5. Click **Complete**. + + + + +6. Continue with the [Quickstart](/quickstart). \ No newline at end of file diff --git a/docs/docs/get-started/docker.mdx b/docs/docs/get-started/docker.mdx index 594a26b9..219e9814 100644 --- a/docs/docs/get-started/docker.mdx +++ b/docs/docs/get-started/docker.mdx @@ -3,6 +3,8 @@ title: Docker deployment slug: /get-started/docker --- +import PartialOnboarding from '@site/docs/_partial-onboarding.mdx'; + There are two different Docker Compose files. They deploy the same applications and containers, but to different environments. @@ -34,7 +36,6 @@ To install OpenRAG with Docker Compose: ```bash OPENSEARCH_PASSWORD=your_secure_password OPENAI_API_KEY=your_openai_api_key - LANGFLOW_SUPERUSER=admin LANGFLOW_SUPERUSER_PASSWORD=your_langflow_password LANGFLOW_SECRET_KEY=your_secret_key @@ -75,7 +76,9 @@ To install OpenRAG with Docker Compose: - **Backend API**: http://localhost:8000 - **Langflow**: http://localhost:7860 -Continue with the [Quickstart](/quickstart). +6. To use the OpenRAG application and continue with application onboarding, access the frontend at `http://localhost:3000`. + + ## Rebuild all Docker containers diff --git a/docs/docs/get-started/install.mdx b/docs/docs/get-started/install.mdx index 27cafb44..040f555f 100644 --- a/docs/docs/get-started/install.mdx +++ b/docs/docs/get-started/install.mdx @@ -5,6 +5,7 @@ slug: /install import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; +import PartialOnboarding from '@site/docs/_partial-onboarding.mdx'; OpenRAG can be installed in multiple ways: @@ -79,46 +80,10 @@ For more information on virtual environments, see [uv](https://docs.astral.sh/uv Command completed successfully ``` -7. To open the OpenRAG application, click **Open App**, press 6, or navigate to `http://localhost:3000`. +7. To open the OpenRAG application and continue with application onboarding, click **Open App**, press 6, or navigate to `http://localhost:3000`. The application opens. -8. Select your language model and embedding model provider, and complete the required fields. - **Your provider can only be selected once, and you must use the same provider for your language model and embedding model.** - The language model can be changed, but the embeddings model cannot be changed. - To change your provider selection, you must restart OpenRAG and delete the `config.yml` file. - - - 9. If you already entered a value for `OPENAI_API_KEY` in the TUI in Step 5, enable **Get API key from environment variable**. - 10. Under **Advanced settings**, select your **Embedding Model** and **Language Model**. - 11. To load 2 sample PDFs, enable **Sample dataset**. - This is recommended, but not required. - 12. Click **Complete**. - - - - 9. Complete the fields for **watsonx.ai API Endpoint**, **IBM API key**, and **IBM Project ID**. - These values are found in your IBM watsonx deployment. - 10. Under **Advanced settings**, select your **Embedding Model** and **Language Model**. - 11. To load 2 sample PDFs, enable **Sample dataset**. - This is recommended, but not required. - 12. Click **Complete**. - - - - 9. Enter your Ollama server's base URL address. - The default Ollama server address is `http://localhost:11434`. - Since OpenRAG is running in a container, you may need to change `localhost` to access services outside of the container. For example, change `http://localhost:11434` to `http://host.docker.internal:11434` to connect to Ollama. - OpenRAG automatically sends a test connection to your Ollama server to confirm connectivity. - 10. Select the **Embedding Model** and **Language Model** your Ollama server is running. - OpenRAG automatically lists the available models from your Ollama server. - 11. To load 2 sample PDFs, enable **Sample dataset**. - This is recommended, but not required. - 12. Click **Complete**. - - - - -13. Continue with the [Quickstart](/quickstart). + ### Advanced Setup {#advanced-setup} From 4dd7f5722f42f14817238a1c9b2ec6948f21f01a Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:30:31 -0400 Subject: [PATCH 08/77] clarify-yaml-and-make-configuration-mdx --- .../{configuration.md => configuration.mdx} | 58 +++++++++---------- 1 file changed, 27 insertions(+), 31 deletions(-) rename docs/docs/reference/{configuration.md => configuration.mdx} (85%) diff --git a/docs/docs/reference/configuration.md b/docs/docs/reference/configuration.mdx similarity index 85% rename from docs/docs/reference/configuration.md rename to docs/docs/reference/configuration.mdx index a8aeff5f..815397e1 100644 --- a/docs/docs/reference/configuration.md +++ b/docs/docs/reference/configuration.mdx @@ -5,8 +5,8 @@ slug: /reference/configuration OpenRAG supports multiple configuration methods with the following priority, from highest to lowest: -1. [Environment variables](#environment-variables) -2. [Configuration file (`config.yaml`)](#configuration-file) +1. [Environment variables](#environment-variables) - Environment variables in the `.env` control Langflow authentication, Oauth settings, and the required OpenAI API key. +2. [Configuration file (`config.yaml`)](#configuration-file) - The `config.yaml` file is generated with values input during [Application onboarding](/install#application-onboarding). If the same value is available in `.env` and `config.yaml`, the value in `.env` takes precedence. 3. [Langflow runtime overrides](#langflow-runtime-overrides) 4. [Default or fallback values](#default-values-and-fallbacks) @@ -58,9 +58,7 @@ You can create a `.env` file in the project root to set these variables, or set | `LANGFLOW_ENABLE_SUPERUSER_CLI` | Enable superuser CLI (default: `False`) | | `OPENRAG_DOCUMENTS_PATHS` | Document paths for ingestion (default: `./documents`) | -## OpenRAG configuration variables - -These environment variables override settings in `config.yaml`: +## OpenRAG configuration variables {#openrag-config-variables} ### Provider settings @@ -89,7 +87,30 @@ These environment variables override settings in `config.yaml`: | `LLM_MODEL` | Language model for the chat agent | `gpt-4o-mini` | | `SYSTEM_PROMPT` | System prompt for the agent | "You are a helpful AI assistant with access to a knowledge base. Answer questions based on the provided context." | -See `docker-compose-*.yml` files for runtime usage examples. +## Configuration file (`config.yaml) {#configuration-file} + +The `config.yaml` file created during [Application onboarding](/install#application-onboarding) can control the variables in [OpenRAG configuration variables](#openrag-configuration-variables-openrag-config-variables), but is overridden by the `.env` if the variable is present both files. +The `config.yaml` file controls application configuration, including language model and embedding model provider, Docling ingestion settings, and API keys. + +```yaml +config.yaml: +provider: + model_provider: openai + api_key: ${PROVIDER_API_KEY} # optional: can be literal instead + endpoint: https://api.example.com + project_id: my-project + +knowledge: + embedding_model: text-embedding-3-small + chunk_size: 1000 + chunk_overlap: 200 + ocr: true + picture_descriptions: false + +agent: + llm_model: gpt-4o-mini + system_prompt: "You are a helpful AI assistant..." +``` ## Langflow runtime overrides @@ -99,31 +120,6 @@ Runtime overrides are implemented through **tweaks** - parameter modifications t For more information on tweaks, see [Input schema (tweaks)](https://docs.langflow.org/concepts-publish#input-schema). -## Configuration file (`config.yaml) {#configuration-file} - -Create a `config.yaml` file in the project root to configure OpenRAG: - -```yaml -# OpenRAG Configuration File -provider: - model_provider: "openai" # openai, anthropic, azure, etc. - api_key: "your-api-key" # or use OPENAI_API_KEY env var - endpoint: "" # For custom provider endpoints (e.g., Watson/IBM) - project_id: "" # For providers that need project IDs (e.g., Watson/IBM) - -knowledge: - embedding_model: "text-embedding-3-small" - chunk_size: 1000 - chunk_overlap: 200 - doclingPresets: "standard" # standard, ocr, picture_description, VLM - ocr: true - picture_descriptions: false - -agent: - llm_model: "gpt-4o-mini" - system_prompt: "You are a helpful AI assistant with access to a knowledge base. Answer questions based on the provided context." -``` - ## Default values and fallbacks When no environment variables or configuration file values are provided, OpenRAG uses default values. From 9056461523319ab145428893a6aeca2166964596 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:32:39 -0400 Subject: [PATCH 09/77] better-title --- docs/docs/get-started/install.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/get-started/install.mdx b/docs/docs/get-started/install.mdx index 040f555f..0bb5919d 100644 --- a/docs/docs/get-started/install.mdx +++ b/docs/docs/get-started/install.mdx @@ -21,7 +21,7 @@ OpenRAG can be installed in multiple ways: - [Docker Compose](https://docs.docker.com/compose/install/) installed. If using Podman, use [podman-compose](https://docs.podman.io/en/latest/markdown/podman-compose.1.html) or alias Docker compose commands to Podman commands. - For GPU support: (TBD) -## Python wheel {#install-python-wheel} +## Install the OpenRAG Python wheel {#install-python-wheel} The Python wheel is currently available internally, but will be available on PyPI at launch. The wheel installs the OpenRAG wheel, which includes the TUI for installing, running, and managing OpenRAG. From f9159b67127b27377620a5396c470774b31bb7e2 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:33:30 -0400 Subject: [PATCH 10/77] docs-broken-anchor --- docs/docs/reference/configuration.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/reference/configuration.mdx b/docs/docs/reference/configuration.mdx index 815397e1..fc29e2f7 100644 --- a/docs/docs/reference/configuration.mdx +++ b/docs/docs/reference/configuration.mdx @@ -89,7 +89,7 @@ You can create a `.env` file in the project root to set these variables, or set ## Configuration file (`config.yaml) {#configuration-file} -The `config.yaml` file created during [Application onboarding](/install#application-onboarding) can control the variables in [OpenRAG configuration variables](#openrag-configuration-variables-openrag-config-variables), but is overridden by the `.env` if the variable is present both files. +The `config.yaml` file created during [Application onboarding](/install#application-onboarding) can control the variables in [OpenRAG configuration variables](#openrag-config-variables), but is overridden by the `.env` if the variable is present both files. The `config.yaml` file controls application configuration, including language model and embedding model provider, Docling ingestion settings, and API keys. ```yaml From bc055319affb26f10f34256232ad04c722d01dc5 Mon Sep 17 00:00:00 2001 From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Date: Tue, 30 Sep 2025 09:45:39 -0600 Subject: [PATCH 11/77] fixed empty state --- frontend/src/app/knowledge/page.tsx | 47 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index 5155f4e2..dbd7ac2f 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -55,7 +55,7 @@ function SearchPage() { const { data = [], isFetching } = useGetSearchQuery( parsedFilterData?.query || "*", - parsedFilterData, + parsedFilterData ); const handleTableSearch = (e: ChangeEvent) => { @@ -63,7 +63,7 @@ function SearchPage() { }; // Convert TaskFiles to File format and merge with backend results - const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { + const taskFilesAsFiles: File[] = taskFiles.map(taskFile => { return { filename: taskFile.filename, mimetype: taskFile.mimetype, @@ -76,11 +76,11 @@ function SearchPage() { const backendFiles = data as File[]; - const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { + const filteredTaskFiles = taskFilesAsFiles.filter(taskFile => { return ( taskFile.status !== "active" && !backendFiles.some( - (backendFile) => backendFile.filename === taskFile.filename, + backendFile => backendFile.filename === taskFile.filename ) ); }); @@ -106,8 +106,8 @@ function SearchPage() { onClick={() => { router.push( `/knowledge/chunks?filename=${encodeURIComponent( - data?.filename ?? "", - )}`, + data?.filename ?? "" + )}` ); }} > @@ -122,7 +122,7 @@ function SearchPage() { { field: "size", headerName: "Size", - valueFormatter: (params) => + valueFormatter: params => params.value ? `${Math.round(params.value / 1024)} KB` : "-", }, { @@ -132,13 +132,13 @@ function SearchPage() { { field: "owner", headerName: "Owner", - valueFormatter: (params) => + valueFormatter: params => params.data?.owner_name || params.data?.owner_email || "—", }, { field: "chunkCount", headerName: "Chunks", - valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", + valueFormatter: params => params.data?.chunkCount?.toString() || "-", }, { field: "avgScore", @@ -200,8 +200,8 @@ function SearchPage() { try { // Delete each file individually since the API expects one filename at a time - const deletePromises = selectedRows.map((row) => - deleteDocumentMutation.mutateAsync({ filename: row.filename }), + const deletePromises = selectedRows.map(row => + deleteDocumentMutation.mutateAsync({ filename: row.filename }) ); await Promise.all(deletePromises); @@ -209,7 +209,7 @@ function SearchPage() { toast.success( `Successfully deleted ${selectedRows.length} document${ selectedRows.length > 1 ? "s" : "" - }`, + }` ); setSelectedRows([]); setShowBulkDeleteDialog(false); @@ -222,7 +222,7 @@ function SearchPage() { toast.error( error instanceof Error ? error.message - : "Failed to delete some documents", + : "Failed to delete some documents" ); } }; @@ -313,18 +313,17 @@ function SearchPage() { rowSelection="multiple" rowMultiSelectWithClick={false} suppressRowClickSelection={true} - getRowId={(params) => params.data.filename} - domLayout="autoHeight" + getRowId={params => params.data.filename} + domLayout="normal" onSelectionChanged={onSelectionChanged} noRowsOverlayComponent={() => ( -
- -

- No documents found -

-

- Try adjusting your search terms -

+
+
+ No knowledge +
+
+ Add files from local or your preferred cloud. +
)} /> @@ -342,7 +341,7 @@ function SearchPage() { }? This will remove all chunks and data associated with these documents. This action cannot be undone. Documents to be deleted: -${selectedRows.map((row) => `• ${row.filename}`).join("\n")}`} +${selectedRows.map(row => `• ${row.filename}`).join("\n")}`} confirmText="Delete All" onConfirm={handleBulkDelete} isLoading={deleteDocumentMutation.isPending} From 0a0dbe15ef5c91a2f5878570d5c64859d999076e Mon Sep 17 00:00:00 2001 From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:56:43 -0600 Subject: [PATCH 12/77] Enhance settings page with new routing and UI updates; update connector descriptions for clarity --- frontend/src/app/settings/page.tsx | 321 +++++++++++------------ src/connectors/google_drive/connector.py | 2 +- src/connectors/onedrive/connector.py | 2 +- src/connectors/sharepoint/connector.py | 2 +- 4 files changed, 162 insertions(+), 165 deletions(-) diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index a63d91d3..72f36d91 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -1,7 +1,7 @@ "use client"; -import { ArrowUpRight, Loader2, PlugZap, RefreshCw } from "lucide-react"; -import { useSearchParams } from "next/navigation"; +import { ArrowUpRight, Loader2, PlugZap, Plus, RefreshCw } from "lucide-react"; +import { useRouter, useSearchParams } from "next/navigation"; import { Suspense, useCallback, useEffect, useState } from "react"; import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation"; import { @@ -35,15 +35,14 @@ import { Textarea } from "@/components/ui/textarea"; import { useAuth } from "@/contexts/auth-context"; import { useTask } from "@/contexts/task-context"; import { useDebounce } from "@/lib/debounce"; -import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from "@/lib/constants"; +import { + DEFAULT_AGENT_SETTINGS, + DEFAULT_KNOWLEDGE_SETTINGS, + UI_CONSTANTS, +} from "@/lib/constants"; import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers"; import { ModelSelectItems } from "./helpers/model-select-item"; import { LabelWrapper } from "@/components/label-wrapper"; -import { - Tooltip, - TooltipContent, - TooltipTrigger, -} from "@radix-ui/react-tooltip"; const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS; @@ -97,6 +96,7 @@ function KnowledgeSourcesPage() { const { isAuthenticated, isNoAuthMode } = useAuth(); const { addTask, tasks } = useTask(); const searchParams = useSearchParams(); + const router = useRouter(); // Connectors state const [connectors, setConnectors] = useState([]); @@ -162,7 +162,7 @@ function KnowledgeSourcesPage() { onSuccess: () => { console.log("Setting updated successfully"); }, - onError: (error) => { + onError: error => { console.error("Failed to update setting:", error.message); }, }); @@ -280,8 +280,8 @@ function KnowledgeSourcesPage() { // Initialize connectors list with metadata from backend const initialConnectors = connectorTypes - .filter((type) => connectorsResult.connectors[type].available) // Only show available connectors - .map((type) => ({ + .filter(type => connectorsResult.connectors[type].available) // Only show available connectors + .map(type => ({ id: type, name: connectorsResult.connectors[type].name, description: connectorsResult.connectors[type].description, @@ -304,8 +304,8 @@ function KnowledgeSourcesPage() { ); const isConnected = activeConnection !== undefined; - setConnectors((prev) => - prev.map((c) => + setConnectors(prev => + prev.map(c => c.type === connectorType ? { ...c, @@ -324,7 +324,7 @@ function KnowledgeSourcesPage() { const handleConnect = async (connector: Connector) => { setIsConnecting(connector.id); - setSyncResults((prev) => ({ ...prev, [connector.id]: null })); + setSyncResults(prev => ({ ...prev, [connector.id]: null })); try { // Use the shared auth callback URL, same as connectors page @@ -374,58 +374,58 @@ function KnowledgeSourcesPage() { } }; - const handleSync = async (connector: Connector) => { - if (!connector.connectionId) return; + // const handleSync = async (connector: Connector) => { + // if (!connector.connectionId) return; - setIsSyncing(connector.id); - setSyncResults((prev) => ({ ...prev, [connector.id]: null })); + // setIsSyncing(connector.id); + // setSyncResults(prev => ({ ...prev, [connector.id]: null })); - try { - const syncBody: { - connection_id: string; - max_files?: number; - selected_files?: string[]; - } = { - connection_id: connector.connectionId, - max_files: syncAllFiles ? 0 : maxFiles || undefined, - }; + // try { + // const syncBody: { + // connection_id: string; + // max_files?: number; + // selected_files?: string[]; + // } = { + // connection_id: connector.connectionId, + // max_files: syncAllFiles ? 0 : maxFiles || undefined, + // }; - // Note: File selection is now handled via the cloud connectors dialog + // // Note: File selection is now handled via the cloud connectors dialog - const response = await fetch(`/api/connectors/${connector.type}/sync`, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(syncBody), - }); + // const response = await fetch(`/api/connectors/${connector.type}/sync`, { + // method: "POST", + // headers: { + // "Content-Type": "application/json", + // }, + // body: JSON.stringify(syncBody), + // }); - const result = await response.json(); + // const result = await response.json(); - if (response.status === 201) { - const taskId = result.task_id; - if (taskId) { - addTask(taskId); - setSyncResults((prev) => ({ - ...prev, - [connector.id]: { - processed: 0, - total: result.total_files || 0, - }, - })); - } - } else if (response.ok) { - setSyncResults((prev) => ({ ...prev, [connector.id]: result })); - // Note: Stats will auto-refresh via task completion watcher for async syncs - } else { - console.error("Sync failed:", result.error); - } - } catch (error) { - console.error("Sync error:", error); - } finally { - setIsSyncing(null); - } - }; + // if (response.status === 201) { + // const taskId = result.task_id; + // if (taskId) { + // addTask(taskId); + // setSyncResults(prev => ({ + // ...prev, + // [connector.id]: { + // processed: 0, + // total: result.total_files || 0, + // }, + // })); + // } + // } else if (response.ok) { + // setSyncResults(prev => ({ ...prev, [connector.id]: result })); + // // Note: Stats will auto-refresh via task completion watcher for async syncs + // } else { + // console.error("Sync failed:", result.error); + // } + // } catch (error) { + // console.error("Sync error:", error); + // } finally { + // setIsSyncing(null); + // } + // }; const getStatusBadge = (status: Connector["status"]) => { switch (status) { @@ -461,6 +461,11 @@ function KnowledgeSourcesPage() { } }; + const navigateToKnowledgePage = (connector: Connector) => { + const provider = connector.type.replace(/-/g, "_"); + router.push(`/upload/${provider}`); + }; + // Check connector status on mount and when returning from OAuth useEffect(() => { if (isAuthenticated) { @@ -480,9 +485,9 @@ function KnowledgeSourcesPage() { // Watch for task completions and refresh stats useEffect(() => { // Find newly completed tasks by comparing with previous state - const newlyCompletedTasks = tasks.filter((task) => { + const newlyCompletedTasks = tasks.filter(task => { const wasCompleted = - prevTasks.find((prev) => prev.task_id === task.task_id)?.status === + prevTasks.find(prev => prev.task_id === task.task_id)?.status === "completed"; return task.status === "completed" && !wasCompleted; }); @@ -536,7 +541,7 @@ function KnowledgeSourcesPage() { fetch(`/api/reset-flow/retrieval`, { method: "POST", }) - .then((response) => { + .then(response => { if (response.ok) { return response.json(); } @@ -549,7 +554,7 @@ function KnowledgeSourcesPage() { handleModelChange(DEFAULT_AGENT_SETTINGS.llm_model); closeDialog(); // Close after successful completion }) - .catch((error) => { + .catch(error => { console.error("Error restoring retrieval flow:", error); closeDialog(); // Close even on error (could show error toast instead) }); @@ -559,7 +564,7 @@ function KnowledgeSourcesPage() { fetch(`/api/reset-flow/ingest`, { method: "POST", }) - .then((response) => { + .then(response => { if (response.ok) { return response.json(); } @@ -572,7 +577,7 @@ function KnowledgeSourcesPage() { setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode); closeDialog(); // Close after successful completion }) - .catch((error) => { + .catch(error => { console.error("Error restoring ingest flow:", error); closeDialog(); // Close even on error (could show error toast instead) }); @@ -589,85 +594,88 @@ function KnowledgeSourcesPage() {
{/* Conditional Sync Settings or No-Auth Message */} - {isNoAuthMode ? ( - - - - Cloud connectors are only available with auth mode enabled - - - Please provide the following environment variables and restart: - - - -
-
- # make here https://console.cloud.google.com/apis/credentials + { + isNoAuthMode ? ( + + + + Cloud connectors are only available with auth mode enabled + + + Please provide the following environment variables and + restart: + + + +
+
+ # make here + https://console.cloud.google.com/apis/credentials +
+
GOOGLE_OAUTH_CLIENT_ID=
+
GOOGLE_OAUTH_CLIENT_SECRET=
-
GOOGLE_OAUTH_CLIENT_ID=
-
GOOGLE_OAUTH_CLIENT_SECRET=
-
- - - ) : ( -
-
-

Sync Settings

-

- Configure how many files to sync when manually triggering a sync -

-
-
-
- { - setSyncAllFiles(!!checked); - if (checked) { - setMaxFiles(0); - } else { - setMaxFiles(10); - } - }} - /> - -
- -
- setMaxFiles(parseInt(e.target.value) || 10)} - disabled={syncAllFiles} - className="w-16 min-w-16 max-w-16 flex-shrink-0 disabled:opacity-50 disabled:cursor-not-allowed" - min="1" - max="100" - title={ - syncAllFiles - ? "Disabled when 'Sync all files' is checked" - : "Leave blank or set to 0 for unlimited" - } - /> -
-
-
- )} + + + ) : null + //
+ //
+ //

Sync Settings

+ //

+ // Configure how many files to sync when manually triggering a sync + //

+ //
+ //
+ //
+ // { + // setSyncAllFiles(!!checked); + // if (checked) { + // setMaxFiles(0); + // } else { + // setMaxFiles(10); + // } + // }} + // /> + // + //
+ // + //
+ // setMaxFiles(parseInt(e.target.value) || 10)} + // disabled={syncAllFiles} + // className="w-16 min-w-16 max-w-16 flex-shrink-0 disabled:opacity-50 disabled:cursor-not-allowed" + // min="1" + // max="100" + // title={ + // syncAllFiles + // ? "Disabled when 'Sync all files' is checked" + // : "Leave blank or set to 0 for unlimited" + // } + // /> + //
+ //
+ //
+ } {/* Connectors Grid */}
- {connectors.map((connector) => ( + {connectors.map(connector => (
@@ -689,22 +697,13 @@ function KnowledgeSourcesPage() { {connector.status === "connected" ? (
{syncResults[connector.id] && ( @@ -810,7 +809,7 @@ function KnowledgeSourcesPage() { } confirmText="Proceed" confirmIcon={} - onConfirm={(closeDialog) => + onConfirm={closeDialog => handleEditInLangflow("chat", closeDialog) } variant="warning" @@ -830,8 +829,7 @@ function KnowledgeSourcesPage() { : null} + id="search-query" + type="text" + defaultValue={parsedFilterData?.query} + value={queryInputText} + onChange={(e) => setQueryInputText(e.target.value)} + placeholder="Search chunks..." + /> +
+
-
- setQueryInputText(e.target.value)} - placeholder="Search chunks..." - className="flex-1 bg-muted/20 rounded-lg border border-border/50 px-4 py-3 focus-visible:ring-1 focus-visible:ring-ring" - /> - -
{/* Content Area - matches knowledge page structure */} -
+
{isFetching ? (
@@ -244,10 +234,9 @@ function ChunksPageContent() {