Merge pull request #521 from langflow-ai/doc-directory-name
change documents directory to openrag-documents
This commit is contained in:
commit
79bb998211
18 changed files with 34 additions and 17 deletions
|
|
@ -81,7 +81,7 @@ services:
|
|||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
volumes:
|
||||
- ./documents:/app/documents:Z
|
||||
- ./openrag-documents:/app/documents:Z
|
||||
- ./keys:/app/keys:Z
|
||||
- ./flows:/app/flows:U,z
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ services:
|
|||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
volumes:
|
||||
- ./documents:/app/documents:Z
|
||||
- ./openrag-documents:/app/documents:Z
|
||||
- ./keys:/app/keys:Z
|
||||
- ./flows:/app/flows:U,z
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ If you are using GitHub pages for hosting, this command is a convenient way to b
|
|||
|
||||
## Update the OpenRAG documentation PDF
|
||||
|
||||
The documentation PDF at `openrag/documents/openrag-documentation.pdf` is used by the OpenRAG application, so keep it up to date.
|
||||
The documentation PDF at `openrag/openrag-documents/openrag-documentation.pdf` is used by the OpenRAG application, so keep it up to date.
|
||||
|
||||
To update the PDF, do the following:
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ To remove these items, give the following prompt or something similar to your ID
|
|||
2. Check your `.mdx` files to confirm these elements are removed.
|
||||
Don't commit the changes.
|
||||
|
||||
3. From `openrag/docs`, run this command to build the site with the changes, and create a PDF at `openrag/documents`.
|
||||
3. From `openrag/docs`, run this command to build the site with the changes, and create a PDF at `openrag/openrag-documents`.
|
||||
|
||||
```
|
||||
npm run build:pdf
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion
|
|||
|
||||
The **Knowledge Ingest** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database.
|
||||
|
||||
The default path to your local folder is mounted from the `./documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
|
||||
The default path to your local folder is mounted from the `./openrag-documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
|
||||
|
||||
To load and process a single file from the mapped location, click **Add Knowledge**, and then click <Icon name="File" aria-hidden="true"/> **File**.
|
||||
The file is loaded into your OpenSearch database, and appears in the Knowledge page.
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@ docker compose up -d --force-recreate
|
|||
|
||||
Reset state by rebuilding all of your containers.
|
||||
Your OpenSearch and Langflow databases will be lost.
|
||||
Documents stored in the `./documents` directory will persist, since the directory is mounted as a volume in the OpenRAG backend container.
|
||||
Documents stored in the `./openrag-documents` directory will persist, since the directory is mounted as a volume in the OpenRAG backend container.
|
||||
|
||||
```bash
|
||||
docker compose up --build --force-recreate --remove-orphans
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ You can click a document to view the chunks of the document as they are stored i
|
|||
|
||||
For this quickstart, use either the <Icon name="File" aria-hidden="true"/> **File** or <Icon name="Folder" aria-hidden="true"/> **Folder** upload options to load documents from your local machine.
|
||||
**Folder** uploads an entire directory.
|
||||
The default directory is the `/documents` subdirectory in your OpenRAG installation directory.
|
||||
The default directory is the `/openrag-documents` subdirectory in your OpenRAG installation directory.
|
||||
|
||||
For information about the cloud storage provider options, see [Ingest files through OAuth connectors](/knowledge#oauth-ingestion).
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ For more information, see [Ingestion](/ingestion).
|
|||
| `DISABLE_INGEST_WITH_LANGFLOW` | `false` | Disable Langflow ingestion pipeline. |
|
||||
| `DOCLING_OCR_ENGINE` | - | OCR engine for document processing. |
|
||||
| `OCR_ENABLED` | `false` | Enable OCR for image processing. |
|
||||
| `OPENRAG_DOCUMENTS_PATHS` | `./documents` | Document paths for ingestion. |
|
||||
| `OPENRAG_DOCUMENTS_PATHS` | `./openrag-documents` | Document paths for ingestion. |
|
||||
| `PICTURE_DESCRIPTIONS_ENABLED` | `false` | Enable picture descriptions. |
|
||||
|
||||
### Langflow settings
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
"docusaurus": "docusaurus",
|
||||
"start": "docusaurus start",
|
||||
"build": "docusaurus build",
|
||||
"build:pdf": "rm -f ../documents/openrag-documentation.pdf && npm run build && npm run serve & sleep 10 && npx docusaurus-to-pdf && pkill -f 'docusaurus serve'",
|
||||
"build:pdf": "rm -f ../openrag-documents/openrag-documentation.pdf && npm run build && npm run serve & sleep 10 && npx docusaurus-to-pdf && pkill -f 'docusaurus serve'",
|
||||
"swizzle": "docusaurus swizzle",
|
||||
"deploy": "docusaurus deploy",
|
||||
"clear": "docusaurus clear",
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"baseUrl": "http://localhost:3000",
|
||||
"entryPoint": "http://localhost:3000",
|
||||
"outputDir": "../documents/openrag-documentation.pdf",
|
||||
"outputDir": "../openrag-documents/openrag-documentation.pdf",
|
||||
"customStyles": "table { max-width: 3500px !important; } .navbar, .footer, .breadcrumbs { display: none !important; }",
|
||||
"forceImages": true
|
||||
}
|
||||
19
src/main.py
19
src/main.py
|
|
@ -2,6 +2,7 @@
|
|||
from connectors.langflow_connector_service import LangflowConnectorService
|
||||
from connectors.service import ConnectorService
|
||||
from services.flows_service import FlowsService
|
||||
from utils.container_utils import detect_container_environment
|
||||
from utils.embeddings import create_dynamic_index_body
|
||||
from utils.logging_config import configure_from_env, get_logger
|
||||
|
||||
|
|
@ -13,6 +14,7 @@ import atexit
|
|||
import mimetypes
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from functools import partial
|
||||
|
||||
|
|
@ -300,6 +302,21 @@ async def init_index_when_ready():
|
|||
)
|
||||
|
||||
|
||||
def _get_documents_dir():
|
||||
"""Get the documents directory path, handling both Docker and local environments."""
|
||||
# In Docker, the volume is mounted at /app/documents
|
||||
# Locally, we use openrag-documents
|
||||
container_env = detect_container_environment()
|
||||
if container_env:
|
||||
path = os.path.abspath("/app/documents")
|
||||
logger.debug(f"Running in {container_env}, using container path: {path}")
|
||||
return path
|
||||
else:
|
||||
path = os.path.abspath(os.path.join(os.getcwd(), "openrag-documents"))
|
||||
logger.debug(f"Running locally, using local path: {path}")
|
||||
return path
|
||||
|
||||
|
||||
async def ingest_default_documents_when_ready(services):
|
||||
"""Scan the local documents folder and ingest files like a non-auth upload."""
|
||||
try:
|
||||
|
|
@ -307,7 +324,7 @@ async def ingest_default_documents_when_ready(services):
|
|||
"Ingesting default documents when ready",
|
||||
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
|
||||
)
|
||||
base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
|
||||
base_dir = _get_documents_dir()
|
||||
if not os.path.isdir(base_dir):
|
||||
logger.info(
|
||||
"Default documents directory not found; skipping ingestion",
|
||||
|
|
|
|||
|
|
@ -455,7 +455,7 @@ def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[It
|
|||
|
||||
def copy_sample_documents(*, force: bool = False) -> None:
|
||||
"""Copy sample documents from package to current directory if they don't exist."""
|
||||
documents_dir = Path("documents")
|
||||
documents_dir = Path("openrag-documents")
|
||||
|
||||
try:
|
||||
assets_files = files("tui._assets.documents")
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class EnvConfig:
|
|||
nudges_flow_id: str = "ebc01d31-1976-46ce-a385-b0240327226c"
|
||||
|
||||
# Document paths (comma-separated)
|
||||
openrag_documents_paths: str = "./documents"
|
||||
openrag_documents_paths: str = "./openrag-documents"
|
||||
|
||||
# OpenSearch data path
|
||||
opensearch_data_path: str = "./opensearch-data"
|
||||
|
|
@ -454,7 +454,7 @@ class EnvManager:
|
|||
(
|
||||
"openrag_documents_paths",
|
||||
"Documents Paths",
|
||||
"./documents,/path/to/more/docs",
|
||||
"./openrag-documents,/path/to/more/docs",
|
||||
False,
|
||||
),
|
||||
]
|
||||
|
|
@ -521,7 +521,7 @@ class EnvManager:
|
|||
)
|
||||
|
||||
if not is_valid:
|
||||
return ["./documents:/app/documents:Z"] # fallback
|
||||
return ["./openrag-documents:/app/documents:Z"] # fallback
|
||||
|
||||
volume_mounts = []
|
||||
for i, path in enumerate(validated_paths):
|
||||
|
|
|
|||
|
|
@ -523,7 +523,7 @@ class ConfigScreen(Screen):
|
|||
yield Label("Documents Paths")
|
||||
current_value = getattr(self.env_manager.config, "openrag_documents_paths", "")
|
||||
input_widget = Input(
|
||||
placeholder="./documents,/path/to/more/docs",
|
||||
placeholder="./openrag-documents,/path/to/more/docs",
|
||||
value=current_value,
|
||||
validators=[DocumentsPathValidator()],
|
||||
id="input-openrag_documents_paths",
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
|
|||
|
||||
|
||||
def count_files_in_documents() -> int:
|
||||
base_dir = Path(os.getcwd()) / "documents"
|
||||
base_dir = Path(os.getcwd()) / "openrag-documents"
|
||||
if not base_dir.is_dir():
|
||||
return 0
|
||||
return sum(1 for _ in base_dir.rglob("*") if _.is_file() and _.name not in EXCLUDED_INGESTION_FILES)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue