diff --git a/docker-compose-cpu.yml b/docker-compose-cpu.yml
index 45aaee01..61b9a2f6 100644
--- a/docker-compose-cpu.yml
+++ b/docker-compose-cpu.yml
@@ -81,7 +81,7 @@ services:
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- - ./documents:/app/documents:Z
+ - ./openrag-documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:U,z
diff --git a/docker-compose.yml b/docker-compose.yml
index a3371a8e..2ed199a5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -80,7 +80,7 @@ services:
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- - ./documents:/app/documents:Z
+ - ./openrag-documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:U,z
diff --git a/docs/README.md b/docs/README.md
index 436d7830..bad5759c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -42,7 +42,7 @@ If you are using GitHub pages for hosting, this command is a convenient way to b
## Update the OpenRAG documentation PDF
-The documentation PDF at `openrag/documents/openrag-documentation.pdf` is used by the OpenRAG application, so keep it up to date.
+The documentation PDF at `openrag/openrag-documents/openrag-documentation.pdf` is used by the OpenRAG application, so keep it up to date.
To update the PDF, do the following:
@@ -68,7 +68,7 @@ To remove these items, give the following prompt or something similar to your ID
2. Check your `.mdx` files to confirm these elements are removed.
Don't commit the changes.
-3. From `openrag/docs`, run this command to build the site with the changes, and create a PDF at `openrag/documents`.
+3. From `openrag/docs`, run this command to build the site with the changes, and create a PDF at `openrag/openrag-documents`.
```
npm run build:pdf
diff --git a/docs/docs/core-components/knowledge.mdx b/docs/docs/core-components/knowledge.mdx
index 80a997c2..63edbaa3 100644
--- a/docs/docs/core-components/knowledge.mdx
+++ b/docs/docs/core-components/knowledge.mdx
@@ -29,7 +29,7 @@ To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion
The **Knowledge Ingest** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database.
-The default path to your local folder is mounted from the `./documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
+The default path to your local folder is mounted from the `./openrag-documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
To load and process a single file from the mapped location, click **Add Knowledge**, and then click **File**.
The file is loaded into your OpenSearch database, and appears in the Knowledge page.
diff --git a/docs/docs/get-started/docker.mdx b/docs/docs/get-started/docker.mdx
index 0771b08e..af2e191f 100644
--- a/docs/docs/get-started/docker.mdx
+++ b/docs/docs/get-started/docker.mdx
@@ -187,7 +187,7 @@ docker compose up -d --force-recreate
Reset state by rebuilding all of your containers.
Your OpenSearch and Langflow databases will be lost.
-Documents stored in the `./documents` directory will persist, since the directory is mounted as a volume in the OpenRAG backend container.
+Documents stored in the `./openrag-documents` directory will persist, since the directory is mounted as a volume in the OpenRAG backend container.
```bash
docker compose up --build --force-recreate --remove-orphans
diff --git a/docs/docs/get-started/quickstart.mdx b/docs/docs/get-started/quickstart.mdx
index 2e333a14..9c7feb4e 100644
--- a/docs/docs/get-started/quickstart.mdx
+++ b/docs/docs/get-started/quickstart.mdx
@@ -101,7 +101,7 @@ You can click a document to view the chunks of the document as they are stored i
For this quickstart, use either the **File** or **Folder** upload options to load documents from your local machine.
**Folder** uploads an entire directory.
- The default directory is the `/documents` subdirectory in your OpenRAG installation directory.
+ The default directory is the `/openrag-documents` subdirectory in your OpenRAG installation directory.
For information about the cloud storage provider options, see [Ingest files through OAuth connectors](/knowledge#oauth-ingestion).
diff --git a/docs/docs/reference/configuration.mdx b/docs/docs/reference/configuration.mdx
index 1dbc4198..30edfaf4 100644
--- a/docs/docs/reference/configuration.mdx
+++ b/docs/docs/reference/configuration.mdx
@@ -81,7 +81,7 @@ For more information, see [Ingestion](/ingestion).
| `DISABLE_INGEST_WITH_LANGFLOW` | `false` | Disable Langflow ingestion pipeline. |
| `DOCLING_OCR_ENGINE` | - | OCR engine for document processing. |
| `OCR_ENABLED` | `false` | Enable OCR for image processing. |
-| `OPENRAG_DOCUMENTS_PATHS` | `./documents` | Document paths for ingestion. |
+| `OPENRAG_DOCUMENTS_PATHS` | `./openrag-documents` | Document paths for ingestion. |
| `PICTURE_DESCRIPTIONS_ENABLED` | `false` | Enable picture descriptions. |
### Langflow settings
diff --git a/docs/package.json b/docs/package.json
index 21b47600..1faaf5ce 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -6,7 +6,7 @@
"docusaurus": "docusaurus",
"start": "docusaurus start",
"build": "docusaurus build",
- "build:pdf": "rm -f ../documents/openrag-documentation.pdf && npm run build && npm run serve & sleep 10 && npx docusaurus-to-pdf && pkill -f 'docusaurus serve'",
+ "build:pdf": "rm -f ../openrag-documents/openrag-documentation.pdf && npm run build && npm run serve & sleep 10 && npx docusaurus-to-pdf && pkill -f 'docusaurus serve'",
"swizzle": "docusaurus swizzle",
"deploy": "docusaurus deploy",
"clear": "docusaurus clear",
diff --git a/docs/scraper.config.json b/docs/scraper.config.json
index 8a3d3daf..7157c054 100644
--- a/docs/scraper.config.json
+++ b/docs/scraper.config.json
@@ -1,7 +1,7 @@
{
"baseUrl": "http://localhost:3000",
"entryPoint": "http://localhost:3000",
- "outputDir": "../documents/openrag-documentation.pdf",
+ "outputDir": "../openrag-documents/openrag-documentation.pdf",
"customStyles": "table { max-width: 3500px !important; } .navbar, .footer, .breadcrumbs { display: none !important; }",
"forceImages": true
}
\ No newline at end of file
diff --git a/documents/docling.pdf b/openrag-documents/docling.pdf
similarity index 100%
rename from documents/docling.pdf
rename to openrag-documents/docling.pdf
diff --git a/documents/ibm_anthropic.pdf b/openrag-documents/ibm_anthropic.pdf
similarity index 100%
rename from documents/ibm_anthropic.pdf
rename to openrag-documents/ibm_anthropic.pdf
diff --git a/documents/openrag-documentation.pdf b/openrag-documents/openrag-documentation.pdf
similarity index 100%
rename from documents/openrag-documentation.pdf
rename to openrag-documents/openrag-documentation.pdf
diff --git a/documents/warmup_ocr.pdf b/openrag-documents/warmup_ocr.pdf
similarity index 100%
rename from documents/warmup_ocr.pdf
rename to openrag-documents/warmup_ocr.pdf
diff --git a/src/main.py b/src/main.py
index 59584298..14c84e4d 100644
--- a/src/main.py
+++ b/src/main.py
@@ -2,6 +2,7 @@
from connectors.langflow_connector_service import LangflowConnectorService
from connectors.service import ConnectorService
from services.flows_service import FlowsService
+from utils.container_utils import detect_container_environment
from utils.embeddings import create_dynamic_index_body
from utils.logging_config import configure_from_env, get_logger
@@ -13,6 +14,7 @@ import atexit
import mimetypes
import multiprocessing
import os
+import shutil
import subprocess
from functools import partial
@@ -300,6 +302,21 @@ async def init_index_when_ready():
)
+def _get_documents_dir():
+ """Get the documents directory path, handling both Docker and local environments."""
+ # In Docker, the volume is mounted at /app/documents
+ # Locally, we use openrag-documents
+ container_env = detect_container_environment()
+ if container_env:
+ path = os.path.abspath("/app/documents")
+ logger.debug(f"Running in {container_env}, using container path: {path}")
+ return path
+ else:
+ path = os.path.abspath(os.path.join(os.getcwd(), "openrag-documents"))
+ logger.debug(f"Running locally, using local path: {path}")
+ return path
+
+
async def ingest_default_documents_when_ready(services):
"""Scan the local documents folder and ingest files like a non-auth upload."""
try:
@@ -307,7 +324,7 @@ async def ingest_default_documents_when_ready(services):
"Ingesting default documents when ready",
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
)
- base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
+ base_dir = _get_documents_dir()
if not os.path.isdir(base_dir):
logger.info(
"Default documents directory not found; skipping ingestion",
diff --git a/src/tui/main.py b/src/tui/main.py
index d27db184..3a40ca46 100644
--- a/src/tui/main.py
+++ b/src/tui/main.py
@@ -455,7 +455,7 @@ def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[It
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
- documents_dir = Path("documents")
+ documents_dir = Path("openrag-documents")
try:
assets_files = files("tui._assets.documents")
diff --git a/src/tui/managers/env_manager.py b/src/tui/managers/env_manager.py
index b1fd73bb..2910e485 100644
--- a/src/tui/managers/env_manager.py
+++ b/src/tui/managers/env_manager.py
@@ -64,7 +64,7 @@ class EnvConfig:
nudges_flow_id: str = "ebc01d31-1976-46ce-a385-b0240327226c"
# Document paths (comma-separated)
- openrag_documents_paths: str = "./documents"
+ openrag_documents_paths: str = "./openrag-documents"
# OpenSearch data path
opensearch_data_path: str = "./opensearch-data"
@@ -454,7 +454,7 @@ class EnvManager:
(
"openrag_documents_paths",
"Documents Paths",
- "./documents,/path/to/more/docs",
+ "./openrag-documents,/path/to/more/docs",
False,
),
]
@@ -521,7 +521,7 @@ class EnvManager:
)
if not is_valid:
- return ["./documents:/app/documents:Z"] # fallback
+ return ["./openrag-documents:/app/documents:Z"] # fallback
volume_mounts = []
for i, path in enumerate(validated_paths):
diff --git a/src/tui/screens/config.py b/src/tui/screens/config.py
index 51662964..0f51532c 100644
--- a/src/tui/screens/config.py
+++ b/src/tui/screens/config.py
@@ -523,7 +523,7 @@ class ConfigScreen(Screen):
yield Label("Documents Paths")
current_value = getattr(self.env_manager.config, "openrag_documents_paths", "")
input_widget = Input(
- placeholder="./documents,/path/to/more/docs",
+ placeholder="./openrag-documents,/path/to/more/docs",
value=current_value,
validators=[DocumentsPathValidator()],
id="input-openrag_documents_paths",
diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py
index 44d1e8b2..78402392 100644
--- a/tests/integration/test_startup_ingest.py
+++ b/tests/integration/test_startup_ingest.py
@@ -29,7 +29,7 @@ async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
def count_files_in_documents() -> int:
- base_dir = Path(os.getcwd()) / "documents"
+ base_dir = Path(os.getcwd()) / "openrag-documents"
if not base_dir.is_dir():
return 0
return sum(1 for _ in base_dir.rglob("*") if _.is_file() and _.name not in EXCLUDED_INGESTION_FILES)