fix: openrag documents ingestion (#545)

* Changed references to /app/documents to reference /app/openrag-documents

* bump version
This commit is contained in:
Lucas Oliveira 2025-11-27 15:22:09 -03:00 committed by GitHub
parent 268d5b3d00
commit 3624b4f82b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 332 additions and 332 deletions

View file

@ -81,7 +81,7 @@ services:
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes: volumes:
- ./openrag-documents:/app/documents:Z - ./openrag-documents:/app/openrag-documents:Z
- ./keys:/app/keys:Z - ./keys:/app/keys:Z
- ./flows:/app/flows:U,z - ./flows:/app/flows:U,z
- ./config:/app/config:Z - ./config:/app/config:Z

View file

@ -29,7 +29,7 @@ To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion
The **Knowledge Ingest** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database. The **Knowledge Ingest** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database.
The default path to your local folder is mounted from the `./openrag-documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose. The default path to your local folder is mounted from the `./openrag-documents` folder in your OpenRAG project directory to the `/app/openrag-documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
To load and process a single file from the mapped location, click **Add Knowledge**, and then click <Icon name="File" aria-hidden="true"/> **File**. To load and process a single file from the mapped location, click **Add Knowledge**, and then click <Icon name="File" aria-hidden="true"/> **File**.
The file is loaded into your OpenSearch database, and appears in the Knowledge page. The file is loaded into your OpenSearch database, and appears in the Knowledge page.

View file

@ -1,364 +1,364 @@
"use client"; "use client";
import { useState, useEffect } from "react"; import { Cloud, FolderOpen, Loader2, Upload } from "lucide-react";
import { useEffect, useState } from "react";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { import {
Card, Card,
CardContent, CardContent,
CardDescription, CardDescription,
CardHeader, CardHeader,
CardTitle, CardTitle,
} from "@/components/ui/card"; } from "@/components/ui/card";
import { Input } from "@/components/ui/input"; import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label"; import { Label } from "@/components/ui/label";
import { Upload, FolderOpen, Loader2, Cloud } from "lucide-react";
import { ProtectedRoute } from "@/components/protected-route";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
function AdminPage() { function AdminPage() {
console.log("AdminPage component rendered!"); console.log("AdminPage component rendered!");
const [fileUploadLoading, setFileUploadLoading] = useState(false); const [fileUploadLoading, setFileUploadLoading] = useState(false);
const [pathUploadLoading, setPathUploadLoading] = useState(false); const [pathUploadLoading, setPathUploadLoading] = useState(false);
const [selectedFile, setSelectedFile] = useState<File | null>(null); const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [folderPath, setFolderPath] = useState("/app/documents/"); const [folderPath, setFolderPath] = useState("/app/openrag-documents/");
const [bucketUploadLoading, setBucketUploadLoading] = useState(false); const [bucketUploadLoading, setBucketUploadLoading] = useState(false);
const [bucketUrl, setBucketUrl] = useState("s3://"); const [bucketUrl, setBucketUrl] = useState("s3://");
const [uploadStatus, setUploadStatus] = useState<string>(""); const [uploadStatus, setUploadStatus] = useState<string>("");
const [awsEnabled, setAwsEnabled] = useState(false); const [awsEnabled, setAwsEnabled] = useState(false);
const { addTask } = useTask(); const { addTask } = useTask();
useEffect(() => { useEffect(() => {
console.log("AdminPage useEffect running - checking AWS availability"); console.log("AdminPage useEffect running - checking AWS availability");
const checkAws = async () => { const checkAws = async () => {
try { try {
console.log("Making request to /api/upload_options"); console.log("Making request to /api/upload_options");
const res = await fetch("/api/upload_options"); const res = await fetch("/api/upload_options");
console.log("Response status:", res.status, "OK:", res.ok); console.log("Response status:", res.status, "OK:", res.ok);
if (res.ok) { if (res.ok) {
const data = await res.json(); const data = await res.json();
console.log("Response data:", data); console.log("Response data:", data);
setAwsEnabled(Boolean(data.aws)); setAwsEnabled(Boolean(data.aws));
} }
} catch (err) { } catch (err) {
console.error("Failed to check AWS availability", err); console.error("Failed to check AWS availability", err);
} }
}; };
checkAws(); checkAws();
}, []); }, []);
const handleFileUpload = async (e: React.FormEvent) => { const handleFileUpload = async (e: React.FormEvent) => {
e.preventDefault(); e.preventDefault();
if (!selectedFile) return; if (!selectedFile) return;
setFileUploadLoading(true); setFileUploadLoading(true);
setUploadStatus(""); setUploadStatus("");
try { try {
const formData = new FormData(); const formData = new FormData();
formData.append("file", selectedFile); formData.append("file", selectedFile);
const response = await fetch("/api/router/upload_ingest", { const response = await fetch("/api/router/upload_ingest", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });
const result = await response.json(); const result = await response.json();
if (response.ok) { if (response.ok) {
setUploadStatus(`File uploaded successfully! ID: ${result.id}`); setUploadStatus(`File uploaded successfully! ID: ${result.id}`);
setSelectedFile(null); setSelectedFile(null);
// Reset the file input // Reset the file input
const fileInput = document.getElementById( const fileInput = document.getElementById(
"file-input", "file-input",
) as HTMLInputElement; ) as HTMLInputElement;
if (fileInput) fileInput.value = ""; if (fileInput) fileInput.value = "";
} else { } else {
setUploadStatus(`Error: ${result.error || "Upload failed"}`); setUploadStatus(`Error: ${result.error || "Upload failed"}`);
} }
} catch (error) { } catch (error) {
setUploadStatus( setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Upload failed"}`, `Error: ${error instanceof Error ? error.message : "Upload failed"}`,
); );
} finally { } finally {
setFileUploadLoading(false); setFileUploadLoading(false);
} }
}; };
const handleBucketUpload = async (e: React.FormEvent) => { const handleBucketUpload = async (e: React.FormEvent) => {
e.preventDefault(); e.preventDefault();
if (!bucketUrl.trim()) return; if (!bucketUrl.trim()) return;
setBucketUploadLoading(true); setBucketUploadLoading(true);
setUploadStatus(""); setUploadStatus("");
try { try {
const response = await fetch("/api/upload_bucket", { const response = await fetch("/api/upload_bucket", {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
}, },
body: JSON.stringify({ s3_url: bucketUrl }), body: JSON.stringify({ s3_url: bucketUrl }),
}); });
const result = await response.json(); const result = await response.json();
if (response.status === 201) { if (response.status === 201) {
const taskId = result.task_id || result.id; const taskId = result.task_id || result.id;
const totalFiles = result.total_files || 0; const totalFiles = result.total_files || 0;
if (!taskId) { if (!taskId) {
throw new Error("No task ID received from server"); throw new Error("No task ID received from server");
} }
addTask(taskId); addTask(taskId);
setUploadStatus( setUploadStatus(
`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`, `🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`,
); );
setBucketUrl(""); setBucketUrl("");
} else { } else {
setUploadStatus(`Error: ${result.error || "Bucket processing failed"}`); setUploadStatus(`Error: ${result.error || "Bucket processing failed"}`);
} }
} catch (error) { } catch (error) {
setUploadStatus( setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Bucket processing failed"}`, `Error: ${error instanceof Error ? error.message : "Bucket processing failed"}`,
); );
} finally { } finally {
setBucketUploadLoading(false); setBucketUploadLoading(false);
} }
}; };
const handlePathUpload = async (e: React.FormEvent) => { const handlePathUpload = async (e: React.FormEvent) => {
e.preventDefault(); e.preventDefault();
if (!folderPath.trim()) return; if (!folderPath.trim()) return;
setPathUploadLoading(true); setPathUploadLoading(true);
setUploadStatus(""); setUploadStatus("");
try { try {
const response = await fetch("/api/upload_path", { const response = await fetch("/api/upload_path", {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
}, },
body: JSON.stringify({ path: folderPath }), body: JSON.stringify({ path: folderPath }),
}); });
const result = await response.json(); const result = await response.json();
if (response.status === 201) { if (response.status === 201) {
// New flow: Got task ID, use centralized tracking // New flow: Got task ID, use centralized tracking
const taskId = result.task_id || result.id; const taskId = result.task_id || result.id;
const totalFiles = result.total_files || 0; const totalFiles = result.total_files || 0;
if (!taskId) { if (!taskId) {
throw new Error("No task ID received from server"); throw new Error("No task ID received from server");
} }
// Add task to centralized tracking // Add task to centralized tracking
addTask(taskId); addTask(taskId);
setUploadStatus( setUploadStatus(
`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`, `🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`,
); );
setFolderPath(""); setFolderPath("");
setPathUploadLoading(false); setPathUploadLoading(false);
} else if (response.ok) { } else if (response.ok) {
// Original flow: Direct response with results // Original flow: Direct response with results
const successful = const successful =
result.results?.filter( result.results?.filter(
(r: { status: string }) => r.status === "indexed", (r: { status: string }) => r.status === "indexed",
).length || 0; ).length || 0;
const total = result.results?.length || 0; const total = result.results?.length || 0;
setUploadStatus( setUploadStatus(
`Path processed successfully! ${successful}/${total} files indexed.`, `Path processed successfully! ${successful}/${total} files indexed.`,
); );
setFolderPath(""); setFolderPath("");
setPathUploadLoading(false); setPathUploadLoading(false);
} else { } else {
setUploadStatus(`Error: ${result.error || "Path upload failed"}`); setUploadStatus(`Error: ${result.error || "Path upload failed"}`);
setPathUploadLoading(false); setPathUploadLoading(false);
} }
} catch (error) { } catch (error) {
setUploadStatus( setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Path upload failed"}`, `Error: ${error instanceof Error ? error.message : "Path upload failed"}`,
); );
setPathUploadLoading(false); setPathUploadLoading(false);
} }
}; };
// Remove the old pollPathTaskStatus function since we're using centralized system // Remove the old pollPathTaskStatus function since we're using centralized system
return ( return (
<div className="space-y-8"> <div className="space-y-8">
<div> <div>
<h1 className="text-3xl font-bold">Ingest</h1> <h1 className="text-3xl font-bold">Ingest</h1>
<p className="text-muted-foreground"> <p className="text-muted-foreground">
Upload and manage documents in your database Upload and manage documents in your database
</p> </p>
</div> </div>
{uploadStatus && ( {uploadStatus && (
<Card <Card
className={ className={
uploadStatus.includes("Error") uploadStatus.includes("Error")
? "border-destructive" ? "border-destructive"
: "border-green-500" : "border-green-500"
} }
> >
<CardContent className="pt-6"> <CardContent className="pt-6">
<p <p
className={ className={
uploadStatus.includes("Error") uploadStatus.includes("Error")
? "text-destructive" ? "text-destructive"
: "text-green-600" : "text-green-600"
} }
> >
{uploadStatus} {uploadStatus}
</p> </p>
</CardContent> </CardContent>
</Card> </Card>
)} )}
<div className="grid gap-6 md:grid-cols-3"> <div className="grid gap-6 md:grid-cols-3">
<Card> <Card>
<CardHeader> <CardHeader>
<CardTitle className="flex items-center gap-2"> <CardTitle className="flex items-center gap-2">
<Upload className="h-5 w-5" /> <Upload className="h-5 w-5" />
Upload File Upload File
</CardTitle> </CardTitle>
<CardDescription> <CardDescription>
Upload a single document to be indexed and searchable Upload a single document to be indexed and searchable
</CardDescription> </CardDescription>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
<form onSubmit={handleFileUpload} className="space-y-4"> <form onSubmit={handleFileUpload} className="space-y-4">
<div className="space-y-2"> <div className="space-y-2">
<Label htmlFor="file-input">Select File</Label> <Label htmlFor="file-input">Select File</Label>
<Input <Input
id="file-input" id="file-input"
type="file" type="file"
onChange={(e) => setSelectedFile(e.target.files?.[0] || null)} onChange={(e) => setSelectedFile(e.target.files?.[0] || null)}
accept=".pdf,.doc,.docx,.txt,.md" accept=".pdf,.doc,.docx,.txt,.md"
className="cursor-pointer" className="cursor-pointer"
/> />
</div> </div>
<Button <Button
type="submit" type="submit"
disabled={!selectedFile || fileUploadLoading} disabled={!selectedFile || fileUploadLoading}
className="w-full" className="w-full"
> >
{fileUploadLoading ? ( {fileUploadLoading ? (
<> <>
<Loader2 className="mr-2 h-4 w-4 animate-spin" /> <Loader2 className="mr-2 h-4 w-4 animate-spin" />
Uploading... Uploading...
</> </>
) : ( ) : (
<> <>
<Upload className="mr-2 h-4 w-4" /> <Upload className="mr-2 h-4 w-4" />
Upload File Upload File
</> </>
)} )}
</Button> </Button>
</form> </form>
</CardContent> </CardContent>
</Card> </Card>
<Card> <Card>
<CardHeader> <CardHeader>
<CardTitle className="flex items-center gap-2"> <CardTitle className="flex items-center gap-2">
<FolderOpen className="h-5 w-5" /> <FolderOpen className="h-5 w-5" />
Upload Folder Upload Folder
</CardTitle> </CardTitle>
<CardDescription> <CardDescription>
Process all documents in a folder path on the server Process all documents in a folder path on the server
</CardDescription> </CardDescription>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
<form onSubmit={handlePathUpload} className="space-y-4"> <form onSubmit={handlePathUpload} className="space-y-4">
<div className="space-y-2"> <div className="space-y-2">
<Label htmlFor="folder-path">Folder Path</Label> <Label htmlFor="folder-path">Folder Path</Label>
<Input <Input
id="folder-path" id="folder-path"
type="text" type="text"
placeholder="/path/to/documents" placeholder="/path/to/documents"
value={folderPath} value={folderPath}
onChange={(e) => setFolderPath(e.target.value)} onChange={(e) => setFolderPath(e.target.value)}
/> />
</div> </div>
<Button <Button
type="submit" type="submit"
disabled={!folderPath.trim() || pathUploadLoading} disabled={!folderPath.trim() || pathUploadLoading}
className="w-full" className="w-full"
> >
{pathUploadLoading ? ( {pathUploadLoading ? (
<> <>
<Loader2 className="mr-2 h-4 w-4 animate-spin" /> <Loader2 className="mr-2 h-4 w-4 animate-spin" />
Processing... Processing...
</> </>
) : ( ) : (
<> <>
<FolderOpen className="mr-2 h-4 w-4" /> <FolderOpen className="mr-2 h-4 w-4" />
Process Folder Process Folder
</> </>
)} )}
</Button> </Button>
</form> </form>
</CardContent> </CardContent>
</Card> </Card>
{awsEnabled && ( {awsEnabled && (
<Card> <Card>
<CardHeader> <CardHeader>
<CardTitle className="flex items-center gap-2"> <CardTitle className="flex items-center gap-2">
<Cloud className="h-5 w-5" /> <Cloud className="h-5 w-5" />
Process Bucket Process Bucket
</CardTitle> </CardTitle>
<CardDescription> <CardDescription>
Process all documents from an S3 bucket. AWS credentials must be Process all documents from an S3 bucket. AWS credentials must be
set as environment variables. set as environment variables.
</CardDescription> </CardDescription>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
<form onSubmit={handleBucketUpload} className="space-y-4"> <form onSubmit={handleBucketUpload} className="space-y-4">
<div className="space-y-2"> <div className="space-y-2">
<Label htmlFor="bucket-url">S3 URL</Label> <Label htmlFor="bucket-url">S3 URL</Label>
<Input <Input
id="bucket-url" id="bucket-url"
type="text" type="text"
placeholder="s3://bucket/path" placeholder="s3://bucket/path"
value={bucketUrl} value={bucketUrl}
onChange={(e) => setBucketUrl(e.target.value)} onChange={(e) => setBucketUrl(e.target.value)}
/> />
</div> </div>
<Button <Button
type="submit" type="submit"
disabled={!bucketUrl.trim() || bucketUploadLoading} disabled={!bucketUrl.trim() || bucketUploadLoading}
className="w-full" className="w-full"
> >
{bucketUploadLoading ? ( {bucketUploadLoading ? (
<> <>
<Loader2 className="mr-2 h-4 w-4 animate-spin" /> <Loader2 className="mr-2 h-4 w-4 animate-spin" />
Processing... Processing...
</> </>
) : ( ) : (
<> <>
<Cloud className="mr-2 h-4 w-4" /> <Cloud className="mr-2 h-4 w-4" />
Process Bucket Process Bucket
</> </>
)} )}
</Button> </Button>
</form> </form>
</CardContent> </CardContent>
</Card> </Card>
)} )}
</div> </div>
</div> </div>
); );
} }
export default function ProtectedAdminPage() { export default function ProtectedAdminPage() {
return ( return (
<ProtectedRoute> <ProtectedRoute>
<AdminPage /> <AdminPage />
</ProtectedRoute> </ProtectedRoute>
); );
} }

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "openrag" name = "openrag"
version = "0.1.45" version = "0.1.46"
description = "Add your description here" description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"

View file

@ -304,7 +304,7 @@ async def init_index_when_ready():
def _get_documents_dir(): def _get_documents_dir():
"""Get the documents directory path, handling both Docker and local environments.""" """Get the documents directory path, handling both Docker and local environments."""
# In Docker, the volume is mounted at /app/documents # In Docker, the volume is mounted at /app/openrag-documents
# Locally, we use openrag-documents # Locally, we use openrag-documents
container_env = detect_container_environment() container_env = detect_container_environment()
if container_env: if container_env:

View file

@ -521,15 +521,15 @@ class EnvManager:
) )
if not is_valid: if not is_valid:
return ["./openrag-documents:/app/documents:Z"] # fallback return ["./openrag-documents:/app/openrag-documents:Z"] # fallback
volume_mounts = [] volume_mounts = []
for i, path in enumerate(validated_paths): for i, path in enumerate(validated_paths):
if i == 0: if i == 0:
# First path maps to the default /app/documents # First path maps to the default /app/openrag-documents
volume_mounts.append(f"{path}:/app/documents:Z") volume_mounts.append(f"{path}:/app/openrag-documents:Z")
else: else:
# Additional paths map to numbered directories # Additional paths map to numbered directories
volume_mounts.append(f"{path}:/app/documents{i + 1}:Z") volume_mounts.append(f"{path}:/app/openrag-documents{i + 1}:Z")
return volume_mounts return volume_mounts

2
uv.lock generated
View file

@ -2353,7 +2353,7 @@ wheels = [
[[package]] [[package]]
name = "openrag" name = "openrag"
version = "0.1.44" version = "0.1.46"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "agentd" }, { name = "agentd" },