fix: openrag documents ingestion (#545)

* Changed references to /app/documents to reference /app/openrag-documents

* bump version
This commit is contained in:
Lucas Oliveira 2025-11-27 15:22:09 -03:00 committed by GitHub
parent 268d5b3d00
commit 3624b4f82b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 332 additions and 332 deletions

View file

@ -81,7 +81,7 @@ services:
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./openrag-documents:/app/documents:Z
- ./openrag-documents:/app/openrag-documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:U,z
- ./config:/app/config:Z

View file

@ -29,7 +29,7 @@ To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion
The **Knowledge Ingest** flow uses Langflow's [**File** component](https://docs.langflow.org/components-data#file) to split and embed files loaded from your local machine into the OpenSearch database.
The default path to your local folder is mounted from the `./openrag-documents` folder in your OpenRAG project directory to the `/app/documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
The default path to your local folder is mounted from the `./openrag-documents` folder in your OpenRAG project directory to the `/app/openrag-documents/` directory inside the Docker container. Files added to the host or the container will be visible in both locations. To configure this location, modify the **Documents Paths** variable in either the TUI's [Advanced Setup](/install#setup) menu or in the `.env` used by Docker Compose.
To load and process a single file from the mapped location, click **Add Knowledge**, and then click <Icon name="File" aria-hidden="true"/> **File**.
The file is loaded into your OpenSearch database, and appears in the Knowledge page.

View file

@ -1,364 +1,364 @@
"use client";
import { useState, useEffect } from "react";
import { Cloud, FolderOpen, Loader2, Upload } from "lucide-react";
import { useEffect, useState } from "react";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
import {
Card,
CardContent,
CardDescription,
CardHeader,
CardTitle,
Card,
CardContent,
CardDescription,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Upload, FolderOpen, Loader2, Cloud } from "lucide-react";
import { ProtectedRoute } from "@/components/protected-route";
import { useTask } from "@/contexts/task-context";
function AdminPage() {
console.log("AdminPage component rendered!");
const [fileUploadLoading, setFileUploadLoading] = useState(false);
const [pathUploadLoading, setPathUploadLoading] = useState(false);
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [folderPath, setFolderPath] = useState("/app/documents/");
const [bucketUploadLoading, setBucketUploadLoading] = useState(false);
const [bucketUrl, setBucketUrl] = useState("s3://");
const [uploadStatus, setUploadStatus] = useState<string>("");
const [awsEnabled, setAwsEnabled] = useState(false);
const { addTask } = useTask();
console.log("AdminPage component rendered!");
const [fileUploadLoading, setFileUploadLoading] = useState(false);
const [pathUploadLoading, setPathUploadLoading] = useState(false);
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [folderPath, setFolderPath] = useState("/app/openrag-documents/");
const [bucketUploadLoading, setBucketUploadLoading] = useState(false);
const [bucketUrl, setBucketUrl] = useState("s3://");
const [uploadStatus, setUploadStatus] = useState<string>("");
const [awsEnabled, setAwsEnabled] = useState(false);
const { addTask } = useTask();
useEffect(() => {
console.log("AdminPage useEffect running - checking AWS availability");
const checkAws = async () => {
try {
console.log("Making request to /api/upload_options");
const res = await fetch("/api/upload_options");
console.log("Response status:", res.status, "OK:", res.ok);
if (res.ok) {
const data = await res.json();
console.log("Response data:", data);
setAwsEnabled(Boolean(data.aws));
}
} catch (err) {
console.error("Failed to check AWS availability", err);
}
};
checkAws();
}, []);
useEffect(() => {
console.log("AdminPage useEffect running - checking AWS availability");
const checkAws = async () => {
try {
console.log("Making request to /api/upload_options");
const res = await fetch("/api/upload_options");
console.log("Response status:", res.status, "OK:", res.ok);
if (res.ok) {
const data = await res.json();
console.log("Response data:", data);
setAwsEnabled(Boolean(data.aws));
}
} catch (err) {
console.error("Failed to check AWS availability", err);
}
};
checkAws();
}, []);
const handleFileUpload = async (e: React.FormEvent) => {
e.preventDefault();
if (!selectedFile) return;
const handleFileUpload = async (e: React.FormEvent) => {
e.preventDefault();
if (!selectedFile) return;
setFileUploadLoading(true);
setUploadStatus("");
setFileUploadLoading(true);
setUploadStatus("");
try {
const formData = new FormData();
formData.append("file", selectedFile);
try {
const formData = new FormData();
formData.append("file", selectedFile);
const response = await fetch("/api/router/upload_ingest", {
method: "POST",
body: formData,
});
const response = await fetch("/api/router/upload_ingest", {
method: "POST",
body: formData,
});
const result = await response.json();
const result = await response.json();
if (response.ok) {
setUploadStatus(`File uploaded successfully! ID: ${result.id}`);
setSelectedFile(null);
// Reset the file input
const fileInput = document.getElementById(
"file-input",
) as HTMLInputElement;
if (fileInput) fileInput.value = "";
} else {
setUploadStatus(`Error: ${result.error || "Upload failed"}`);
}
} catch (error) {
setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Upload failed"}`,
);
} finally {
setFileUploadLoading(false);
}
};
if (response.ok) {
setUploadStatus(`File uploaded successfully! ID: ${result.id}`);
setSelectedFile(null);
// Reset the file input
const fileInput = document.getElementById(
"file-input",
) as HTMLInputElement;
if (fileInput) fileInput.value = "";
} else {
setUploadStatus(`Error: ${result.error || "Upload failed"}`);
}
} catch (error) {
setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Upload failed"}`,
);
} finally {
setFileUploadLoading(false);
}
};
const handleBucketUpload = async (e: React.FormEvent) => {
e.preventDefault();
if (!bucketUrl.trim()) return;
const handleBucketUpload = async (e: React.FormEvent) => {
e.preventDefault();
if (!bucketUrl.trim()) return;
setBucketUploadLoading(true);
setUploadStatus("");
setBucketUploadLoading(true);
setUploadStatus("");
try {
const response = await fetch("/api/upload_bucket", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ s3_url: bucketUrl }),
});
try {
const response = await fetch("/api/upload_bucket", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ s3_url: bucketUrl }),
});
const result = await response.json();
const result = await response.json();
if (response.status === 201) {
const taskId = result.task_id || result.id;
const totalFiles = result.total_files || 0;
if (response.status === 201) {
const taskId = result.task_id || result.id;
const totalFiles = result.total_files || 0;
if (!taskId) {
throw new Error("No task ID received from server");
}
if (!taskId) {
throw new Error("No task ID received from server");
}
addTask(taskId);
setUploadStatus(
`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`,
);
setBucketUrl("");
} else {
setUploadStatus(`Error: ${result.error || "Bucket processing failed"}`);
}
} catch (error) {
setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Bucket processing failed"}`,
);
} finally {
setBucketUploadLoading(false);
}
};
addTask(taskId);
setUploadStatus(
`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`,
);
setBucketUrl("");
} else {
setUploadStatus(`Error: ${result.error || "Bucket processing failed"}`);
}
} catch (error) {
setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Bucket processing failed"}`,
);
} finally {
setBucketUploadLoading(false);
}
};
const handlePathUpload = async (e: React.FormEvent) => {
e.preventDefault();
if (!folderPath.trim()) return;
const handlePathUpload = async (e: React.FormEvent) => {
e.preventDefault();
if (!folderPath.trim()) return;
setPathUploadLoading(true);
setUploadStatus("");
setPathUploadLoading(true);
setUploadStatus("");
try {
const response = await fetch("/api/upload_path", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ path: folderPath }),
});
try {
const response = await fetch("/api/upload_path", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ path: folderPath }),
});
const result = await response.json();
const result = await response.json();
if (response.status === 201) {
// New flow: Got task ID, use centralized tracking
const taskId = result.task_id || result.id;
const totalFiles = result.total_files || 0;
if (response.status === 201) {
// New flow: Got task ID, use centralized tracking
const taskId = result.task_id || result.id;
const totalFiles = result.total_files || 0;
if (!taskId) {
throw new Error("No task ID received from server");
}
if (!taskId) {
throw new Error("No task ID received from server");
}
// Add task to centralized tracking
addTask(taskId);
// Add task to centralized tracking
addTask(taskId);
setUploadStatus(
`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`,
);
setFolderPath("");
setPathUploadLoading(false);
} else if (response.ok) {
// Original flow: Direct response with results
const successful =
result.results?.filter(
(r: { status: string }) => r.status === "indexed",
).length || 0;
const total = result.results?.length || 0;
setUploadStatus(
`Path processed successfully! ${successful}/${total} files indexed.`,
);
setFolderPath("");
setPathUploadLoading(false);
} else {
setUploadStatus(`Error: ${result.error || "Path upload failed"}`);
setPathUploadLoading(false);
}
} catch (error) {
setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Path upload failed"}`,
);
setPathUploadLoading(false);
}
};
setUploadStatus(
`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`,
);
setFolderPath("");
setPathUploadLoading(false);
} else if (response.ok) {
// Original flow: Direct response with results
const successful =
result.results?.filter(
(r: { status: string }) => r.status === "indexed",
).length || 0;
const total = result.results?.length || 0;
setUploadStatus(
`Path processed successfully! ${successful}/${total} files indexed.`,
);
setFolderPath("");
setPathUploadLoading(false);
} else {
setUploadStatus(`Error: ${result.error || "Path upload failed"}`);
setPathUploadLoading(false);
}
} catch (error) {
setUploadStatus(
`Error: ${error instanceof Error ? error.message : "Path upload failed"}`,
);
setPathUploadLoading(false);
}
};
// Remove the old pollPathTaskStatus function since we're using centralized system
// Remove the old pollPathTaskStatus function since we're using centralized system
return (
<div className="space-y-8">
<div>
<h1 className="text-3xl font-bold">Ingest</h1>
<p className="text-muted-foreground">
Upload and manage documents in your database
</p>
</div>
return (
<div className="space-y-8">
<div>
<h1 className="text-3xl font-bold">Ingest</h1>
<p className="text-muted-foreground">
Upload and manage documents in your database
</p>
</div>
{uploadStatus && (
<Card
className={
uploadStatus.includes("Error")
? "border-destructive"
: "border-green-500"
}
>
<CardContent className="pt-6">
<p
className={
uploadStatus.includes("Error")
? "text-destructive"
: "text-green-600"
}
>
{uploadStatus}
</p>
</CardContent>
</Card>
)}
{uploadStatus && (
<Card
className={
uploadStatus.includes("Error")
? "border-destructive"
: "border-green-500"
}
>
<CardContent className="pt-6">
<p
className={
uploadStatus.includes("Error")
? "text-destructive"
: "text-green-600"
}
>
{uploadStatus}
</p>
</CardContent>
</Card>
)}
<div className="grid gap-6 md:grid-cols-3">
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Upload className="h-5 w-5" />
Upload File
</CardTitle>
<CardDescription>
Upload a single document to be indexed and searchable
</CardDescription>
</CardHeader>
<CardContent>
<form onSubmit={handleFileUpload} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="file-input">Select File</Label>
<Input
id="file-input"
type="file"
onChange={(e) => setSelectedFile(e.target.files?.[0] || null)}
accept=".pdf,.doc,.docx,.txt,.md"
className="cursor-pointer"
/>
</div>
<Button
type="submit"
disabled={!selectedFile || fileUploadLoading}
className="w-full"
>
{fileUploadLoading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Uploading...
</>
) : (
<>
<Upload className="mr-2 h-4 w-4" />
Upload File
</>
)}
</Button>
</form>
</CardContent>
</Card>
<div className="grid gap-6 md:grid-cols-3">
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Upload className="h-5 w-5" />
Upload File
</CardTitle>
<CardDescription>
Upload a single document to be indexed and searchable
</CardDescription>
</CardHeader>
<CardContent>
<form onSubmit={handleFileUpload} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="file-input">Select File</Label>
<Input
id="file-input"
type="file"
onChange={(e) => setSelectedFile(e.target.files?.[0] || null)}
accept=".pdf,.doc,.docx,.txt,.md"
className="cursor-pointer"
/>
</div>
<Button
type="submit"
disabled={!selectedFile || fileUploadLoading}
className="w-full"
>
{fileUploadLoading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Uploading...
</>
) : (
<>
<Upload className="mr-2 h-4 w-4" />
Upload File
</>
)}
</Button>
</form>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<FolderOpen className="h-5 w-5" />
Upload Folder
</CardTitle>
<CardDescription>
Process all documents in a folder path on the server
</CardDescription>
</CardHeader>
<CardContent>
<form onSubmit={handlePathUpload} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="folder-path">Folder Path</Label>
<Input
id="folder-path"
type="text"
placeholder="/path/to/documents"
value={folderPath}
onChange={(e) => setFolderPath(e.target.value)}
/>
</div>
<Button
type="submit"
disabled={!folderPath.trim() || pathUploadLoading}
className="w-full"
>
{pathUploadLoading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Processing...
</>
) : (
<>
<FolderOpen className="mr-2 h-4 w-4" />
Process Folder
</>
)}
</Button>
</form>
</CardContent>
</Card>
{awsEnabled && (
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Cloud className="h-5 w-5" />
Process Bucket
</CardTitle>
<CardDescription>
Process all documents from an S3 bucket. AWS credentials must be
set as environment variables.
</CardDescription>
</CardHeader>
<CardContent>
<form onSubmit={handleBucketUpload} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="bucket-url">S3 URL</Label>
<Input
id="bucket-url"
type="text"
placeholder="s3://bucket/path"
value={bucketUrl}
onChange={(e) => setBucketUrl(e.target.value)}
/>
</div>
<Button
type="submit"
disabled={!bucketUrl.trim() || bucketUploadLoading}
className="w-full"
>
{bucketUploadLoading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Processing...
</>
) : (
<>
<Cloud className="mr-2 h-4 w-4" />
Process Bucket
</>
)}
</Button>
</form>
</CardContent>
</Card>
)}
</div>
</div>
);
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<FolderOpen className="h-5 w-5" />
Upload Folder
</CardTitle>
<CardDescription>
Process all documents in a folder path on the server
</CardDescription>
</CardHeader>
<CardContent>
<form onSubmit={handlePathUpload} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="folder-path">Folder Path</Label>
<Input
id="folder-path"
type="text"
placeholder="/path/to/documents"
value={folderPath}
onChange={(e) => setFolderPath(e.target.value)}
/>
</div>
<Button
type="submit"
disabled={!folderPath.trim() || pathUploadLoading}
className="w-full"
>
{pathUploadLoading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Processing...
</>
) : (
<>
<FolderOpen className="mr-2 h-4 w-4" />
Process Folder
</>
)}
</Button>
</form>
</CardContent>
</Card>
{awsEnabled && (
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Cloud className="h-5 w-5" />
Process Bucket
</CardTitle>
<CardDescription>
Process all documents from an S3 bucket. AWS credentials must be
set as environment variables.
</CardDescription>
</CardHeader>
<CardContent>
<form onSubmit={handleBucketUpload} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="bucket-url">S3 URL</Label>
<Input
id="bucket-url"
type="text"
placeholder="s3://bucket/path"
value={bucketUrl}
onChange={(e) => setBucketUrl(e.target.value)}
/>
</div>
<Button
type="submit"
disabled={!bucketUrl.trim() || bucketUploadLoading}
className="w-full"
>
{bucketUploadLoading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Processing...
</>
) : (
<>
<Cloud className="mr-2 h-4 w-4" />
Process Bucket
</>
)}
</Button>
</form>
</CardContent>
</Card>
)}
</div>
</div>
);
}
export default function ProtectedAdminPage() {
return (
<ProtectedRoute>
<AdminPage />
</ProtectedRoute>
);
return (
<ProtectedRoute>
<AdminPage />
</ProtectedRoute>
);
}

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "openrag"
version = "0.1.45"
version = "0.1.46"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"

View file

@ -304,7 +304,7 @@ async def init_index_when_ready():
def _get_documents_dir():
"""Get the documents directory path, handling both Docker and local environments."""
# In Docker, the volume is mounted at /app/documents
# In Docker, the volume is mounted at /app/openrag-documents
# Locally, we use openrag-documents
container_env = detect_container_environment()
if container_env:

View file

@ -521,15 +521,15 @@ class EnvManager:
)
if not is_valid:
return ["./openrag-documents:/app/documents:Z"] # fallback
return ["./openrag-documents:/app/openrag-documents:Z"] # fallback
volume_mounts = []
for i, path in enumerate(validated_paths):
if i == 0:
# First path maps to the default /app/documents
volume_mounts.append(f"{path}:/app/documents:Z")
# First path maps to the default /app/openrag-documents
volume_mounts.append(f"{path}:/app/openrag-documents:Z")
else:
# Additional paths map to numbered directories
volume_mounts.append(f"{path}:/app/documents{i + 1}:Z")
volume_mounts.append(f"{path}:/app/openrag-documents{i + 1}:Z")
return volume_mounts

2
uv.lock generated
View file

@ -2353,7 +2353,7 @@ wheels = [
[[package]]
name = "openrag"
version = "0.1.44"
version = "0.1.46"
source = { editable = "." }
dependencies = [
{ name = "agentd" },