Merge branch 'main' into lfx-openrag-update-flows

This commit is contained in:
Edwin Jose 2025-09-24 11:39:09 -04:00
commit f710ae2137
26 changed files with 4435 additions and 1517 deletions

View file

@ -8,6 +8,8 @@ LANGFLOW_SECRET_KEY=
# flow ids for chat and ingestion flows
LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
# Ingest flow using docling
LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
# Set a strong admin password for OpenSearch; a bcrypt hash is generated at

View file

@ -1,48 +0,0 @@
---
title: What is OpenRAG?
slug: /
---
# OpenRAG Introduction
Let's discover **Docusaurus in less than 5 minutes**.
## Getting Started
Get started by **creating a new site**.
Or **try Docusaurus immediately** with **[docusaurus.new](https://docusaurus.new)**.
### What you'll need
- [Node.js](https://nodejs.org/en/download/) version 18.0 or above:
- When installing Node.js, you are recommended to check all checkboxes related to dependencies.
## Generate a new site
Generate a new Docusaurus site using the **classic template**.
The classic template will automatically be added to your project after you run the command:
```bash
npm init docusaurus@latest my-website classic
```
You can type this command into Command Prompt, Powershell, Terminal, or any other integrated terminal of your code editor.
The command also installs all necessary dependencies you need to run Docusaurus.
## Start your site
Run the development server:
```bash
cd my-website
npm run start
```
The `cd` command changes the directory you're working with. In order to work with your newly created Docusaurus site, you'll need to navigate the terminal there.
The `npm run start` command builds your website locally and serves it through a development server, ready for you to view at http://localhost:3000/.
Open `docs/intro.md` (this page) and edit some lines: the site **reloads automatically** and displays your changes.

View file

@ -0,0 +1,19 @@
---
title: What is OpenRAG?
slug: /what-is-openrag
---
OpenRAG is an open-source package for building agentic RAG systems.
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:
* [Langflow](https://docs.langflow.org) - Langflow is a powerful tool to build and deploy AI agents and MCP servers. It supports all major LLMs, vector databases and a growing library of AI tools.
* [OpenSearch](https://docs.opensearch.org/latest/) - OpenSearch is a community-driven, Apache 2.0-licensed open source search and analytics suite that makes it easy to ingest, search, visualize, and analyze data.
* [Docling](https://docling-project.github.io/docling/) - Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
OpenRAG builds on Langflow's familiar interface while adding OpenSearch for vector storage and Docling for simplified document parsing, with opinionated flows that serve as ready-to-use recipes for ingestion, retrieval, and generation from popular sources like OneDrive, Google Drive, and AWS. And don't fear: every part of the stack is swappable. Write your own custom components in Python, try different language models, and customize your flows to build an agentic RAG system that solves problems.
Ready to get started? Install OpenRAG and then run the Quickstart to create a powerful RAG pipeline.

View file

@ -71,7 +71,7 @@ const config = {
logo: {
alt: 'OpenRAG Logo',
src: 'img/logo.svg',
href: '/',
href: 'what-is-openrag',
},
items: [
{
@ -89,7 +89,7 @@ const config = {
items: [
{
label: 'Getting Started',
to: '/',
to: 'what-is-openrag',
},
],
},

10
docs/package-lock.json generated
View file

@ -12,6 +12,7 @@
"@docusaurus/preset-classic": "3.8.1",
"@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0",
"lucide-react": "^0.544.0",
"prism-react-renderer": "^2.3.0",
"react": "^19.0.0",
"react-dom": "^19.0.0"
@ -9801,6 +9802,15 @@
"yallist": "^3.0.2"
}
},
"node_modules/lucide-react": {
"version": "0.544.0",
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.544.0.tgz",
"integrity": "sha512-t5tS44bqd825zAW45UQxpG2CvcC4urOwn2TrwSH8u+MjeE+1NnWl6QqeQ/6NdjMqdOygyiT9p3Ev0p1NJykxjw==",
"license": "ISC",
"peerDependencies": {
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
"node_modules/markdown-extensions": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/markdown-extensions/-/markdown-extensions-2.0.0.tgz",

View file

@ -19,6 +19,7 @@
"@docusaurus/preset-classic": "3.8.1",
"@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0",
"lucide-react": "^0.544.0",
"prism-react-renderer": "^2.3.0",
"react": "^19.0.0",
"react-dom": "^19.0.0"

View file

@ -22,7 +22,7 @@ const sidebars = {
items: [
{
type: "doc",
id: "get-started/intro",
id: "get-started/what-is-openrag",
label: "Introduction"
},
{

View file

@ -0,0 +1,19 @@
import React from "react";
import * as LucideIcons from "lucide-react";
/*
How to use this component:
import Icon from "@site/src/components/icon";
<Icon name="AlertCircle" size={24} color="red" />
*/
type IconProps = {
name: string;
};
export default function Icon({ name, ...props }: IconProps) {
const Icon = LucideIcons[name];
return Icon ? <Icon {...props} /> : null;
}

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -14,7 +14,7 @@ interface DeleteDocumentResponse {
}
const deleteDocument = async (
data: DeleteDocumentRequest
data: DeleteDocumentRequest,
): Promise<DeleteDocumentResponse> => {
const response = await fetch("/api/documents/delete-by-filename", {
method: "POST",
@ -37,9 +37,11 @@ export const useDeleteDocument = () => {
return useMutation({
mutationFn: deleteDocument,
onSuccess: () => {
onSettled: () => {
// Invalidate and refetch search queries to update the UI
queryClient.invalidateQueries({ queryKey: ["search"] });
setTimeout(() => {
queryClient.invalidateQueries({ queryKey: ["search"] });
}, 1000);
},
});
};

View file

@ -54,7 +54,7 @@ export const useGetOpenAIModelsQuery = (
queryKey: ["models", "openai", params],
queryFn: getOpenAIModels,
retry: 2,
enabled: options?.enabled !== false, // Allow enabling/disabling from options
enabled: !!params?.apiKey,
staleTime: 0, // Always fetch fresh data
gcTime: 0, // Don't cache results
...options,

View file

@ -34,21 +34,28 @@ export interface ChunkResult {
export interface File {
filename: string;
mimetype: string;
chunkCount: number;
avgScore: number;
chunkCount?: number;
avgScore?: number;
source_url: string;
owner: string;
owner_name: string;
owner_email: string;
owner?: string;
owner_name?: string;
owner_email?: string;
size: number;
connector_type: string;
chunks: ChunkResult[];
status?:
| "processing"
| "active"
| "unavailable"
| "failed"
| "hidden"
| "sync";
chunks?: ChunkResult[];
}
export const useGetSearchQuery = (
query: string,
queryData?: ParsedQueryData | null,
options?: Omit<UseQueryOptions, "queryKey" | "queryFn">
options?: Omit<UseQueryOptions, "queryKey" | "queryFn">,
) => {
const queryClient = useQueryClient();
@ -149,7 +156,7 @@ export const useGetSearchQuery = (
}
});
const files: File[] = Array.from(fileMap.values()).map(file => ({
const files: File[] = Array.from(fileMap.values()).map((file) => ({
filename: file.filename,
mimetype: file.mimetype,
chunkCount: file.chunks.length,
@ -173,11 +180,11 @@ export const useGetSearchQuery = (
const queryResult = useQuery(
{
queryKey: ["search", effectiveQuery],
placeholderData: prev => prev,
placeholderData: (prev) => prev,
queryFn: getFiles,
...options,
},
queryClient
queryClient,
);
return queryResult;

View file

@ -1,17 +1,14 @@
"use client";
import {
Building2,
Cloud,
FileText,
HardDrive,
ArrowLeft,
Copy,
File as FileIcon,
Loader2,
Search,
} from "lucide-react";
import { Suspense, useCallback, useEffect, useState } from "react";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
@ -21,22 +18,16 @@ import {
type File,
useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery";
import { Label } from "@/components/ui/label";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
// Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) {
switch (connectorType) {
case "google_drive":
return <SiGoogledrive className="h-4 w-4 text-foreground" />;
case "onedrive":
return <TbBrandOnedrive className="h-4 w-4 text-foreground" />;
case "sharepoint":
return <Building2 className="h-4 w-4 text-foreground" />;
case "s3":
return <Cloud className="h-4 w-4 text-foreground" />;
default:
return <HardDrive className="h-4 w-4 text-muted-foreground" />;
}
}
const getFileTypeLabel = (mimetype: string) => {
if (mimetype === "application/pdf") return "PDF";
if (mimetype === "text/plain") return "Text";
if (mimetype === "application/msword") return "Word Document";
return "Unknown";
};
function ChunksPageContent() {
const router = useRouter();
@ -46,10 +37,47 @@ function ChunksPageContent() {
const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[]
>([]);
const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
// Calculate average chunk length
const averageChunkLength = useMemo(
() =>
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
chunks.length || 0,
[chunks]
);
const [selectAll, setSelectAll] = useState(false);
const [queryInputText, setQueryInputText] = useState(
parsedFilterData?.query ?? ""
);
// Use the same search query as the knowledge page, but we'll filter for the specific file
const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
useEffect(() => {
if (queryInputText === "") {
setChunksFilteredByQuery(chunks);
} else {
setChunksFilteredByQuery(
chunks.filter((chunk) =>
chunk.text.toLowerCase().includes(queryInputText.toLowerCase())
)
);
}
}, [queryInputText, chunks]);
const handleCopy = useCallback((text: string) => {
navigator.clipboard.writeText(text);
}, []);
const fileData = (data as File[]).find(
(file: File) => file.filename === filename
);
// Extract chunks for the specific file
useEffect(() => {
if (!filename || !(data as File[]).length) {
@ -57,16 +85,37 @@ function ChunksPageContent() {
return;
}
const fileData = (data as File[]).find(
(file: File) => file.filename === filename
);
setChunks(fileData?.chunks || []);
}, [data, filename]);
// Set selected state for all checkboxes when selectAll changes
useEffect(() => {
if (selectAll) {
setSelectedChunks(new Set(chunks.map((_, index) => index)));
} else {
setSelectedChunks(new Set());
}
}, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => {
router.back();
router.push("/knowledge");
}, [router]);
const handleChunkCardCheckboxChange = useCallback(
(index: number) => {
setSelectedChunks((prevSelected) => {
const newSelected = new Set(prevSelected);
if (newSelected.has(index)) {
newSelected.delete(index);
} else {
newSelected.add(index);
}
return newSelected;
});
},
[setSelectedChunks]
);
if (!filename) {
return (
<div className="flex items-center justify-center h-64">
@ -83,7 +132,7 @@ function ChunksPageContent() {
return (
<div
className={`fixed inset-0 md:left-72 top-[53px] flex flex-col transition-all duration-300 ${
className={`fixed inset-0 md:left-72 top-[53px] flex flex-row transition-all duration-300 ${
isMenuOpen && isPanelOpen
? "md:right-[704px]"
: // Both open: 384px (menu) + 320px (KF panel)
@ -98,29 +147,47 @@ function ChunksPageContent() {
>
<div className="flex-1 flex flex-col min-h-0 px-6 py-6">
{/* Header */}
<div className="flex items-center justify-between mb-6">
<div className="flex items-center gap-3">
<Button
variant="ghost"
size="sm"
onClick={handleBack}
className="text-muted-foreground hover:text-foreground px-2"
>
Back
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-2">
<Button variant="ghost" onClick={handleBack}>
<ArrowLeft size={18} />
<FileIcon className="text-muted-foreground" size={18} />
<h1 className="text-lg font-semibold">
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</Button>
<div className="flex flex-col">
<h2 className="text-lg font-semibold">Document Chunks</h2>
<p className="text-sm text-muted-foreground truncate max-w-md">
{decodeURIComponent(filename)}
</p>
</div>
</div>
<div className="text-sm text-muted-foreground">
{!isFetching && chunks.length > 0 && (
<span>
{chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found
</span>
)}
<div className="flex items-center gap-3 pl-4 mt-2">
<div className="flex items-center gap-2">
<Checkbox
id="selectAllChunks"
checked={selectAll}
onCheckedChange={(handleSelectAll) =>
setSelectAll(!!handleSelectAll)
}
/>
<Label
htmlFor="selectAllChunks"
className="font-medium text-muted-foreground whitespace-nowrap cursor-pointer"
>
Select all
</Label>
</div>
<div className="flex-1 flex items-center gap-2">
<Input
name="search-query"
id="search-query"
type="text"
defaultValue={parsedFilterData?.query}
value={queryInputText}
onChange={(e) => setQueryInputText(e.target.value)}
placeholder="Search chunks..."
className="flex-1 bg-muted/20 rounded-lg border border-border/50 px-4 py-3 focus-visible:ring-1 focus-visible:ring-ring"
/>
<Button variant="outline" size="sm">
<Search />
</Button>
</div>
</div>
</div>
@ -147,41 +214,130 @@ function ChunksPageContent() {
</div>
) : (
<div className="space-y-4 pb-6">
{chunks.map((chunk, index) => (
{chunksFilteredByQuery.map((chunk, index) => (
<div
key={chunk.filename + index}
className="bg-muted/20 rounded-lg p-4 border border-border/50"
className="bg-muted rounded-lg p-4 border border-border/50"
>
<div className="flex items-center justify-between mb-2">
<div className="flex items-center gap-2">
<FileText className="h-4 w-4 text-blue-400" />
<span className="font-medium truncate">
{chunk.filename}
<div className="flex items-center gap-3">
<div>
<Checkbox
checked={selectedChunks.has(index)}
onCheckedChange={() =>
handleChunkCardCheckboxChange(index)
}
/>
</div>
<span className="text-sm font-bold">
Chunk {chunk.page}
</span>
{chunk.connector_type && (
<div className="ml-2">
{getSourceIcon(chunk.connector_type)}
</div>
)}
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{chunk.text.length} chars
</span>
<div className="py-1">
<Button
className="p-1"
onClick={() => handleCopy(chunk.text)}
variant="ghost"
size="xs"
>
<Copy className="text-muted-foreground" />
</Button>
</div>
</div>
<span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
{chunk.score.toFixed(2)}
</span>
{/* TODO: Update to use active toggle */}
{/* <span className="px-2 py-1 text-green-500">
<Switch
className="ml-2 bg-green-500"
checked={true}
/>
Active
</span> */}
</div>
<div className="flex items-center gap-4 text-sm text-muted-foreground mb-3">
<span>{chunk.mimetype}</span>
<span>Page {chunk.page}</span>
{chunk.owner_name && <span>Owner: {chunk.owner_name}</span>}
</div>
<p className="text-sm text-foreground/90 leading-relaxed">
<blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-input ml-1.5 pl-4">
{chunk.text}
</p>
</blockquote>
</div>
))}
</div>
)}
</div>
</div>
{/* Right panel - Summary (TODO), Technical details, */}
<div className="w-[320px] py-20 px-2">
<div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4">Technical details</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Total chunks</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Model</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div> */}
</dl>
</div>
<div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">Original document</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div>
</dl>
</div>
</div>
</div>
);
}

View file

@ -1,16 +1,10 @@
"use client";
import {
Building2,
Cloud,
HardDrive,
Search,
Trash2,
X,
} from "lucide-react";
import { AgGridReact, CustomCellRendererProps } from "ag-grid-react";
import { useCallback, useState, useRef, ChangeEvent } from "react";
import type { ColDef } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
import { useRouter } from "next/navigation";
import { type ChangeEvent, useCallback, useRef, useState } from "react";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@ -19,13 +13,13 @@ import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery";
import { ColDef } from "ag-grid-community";
import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import { toast } from "sonner";
// Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) {
@ -51,7 +45,7 @@ function getSourceIcon(connectorType?: string) {
function SearchPage() {
const router = useRouter();
const { isMenuOpen } = useTask();
const { isMenuOpen, files: taskFiles } = useTask();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter();
const [selectedRows, setSelectedRows] = useState<File[]>([]);
@ -61,14 +55,38 @@ function SearchPage() {
const { data = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*",
parsedFilterData
parsedFilterData,
);
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
const fileResults = data as File[];
// Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return {
filename: taskFile.filename,
mimetype: taskFile.mimetype,
source_url: taskFile.source_url,
size: taskFile.size,
connector_type: taskFile.connector_type,
status: taskFile.status,
};
});
const backendFiles = data as File[];
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return (
taskFile.status !== "active" &&
!backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename,
)
);
});
// Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles];
const gridRef = useRef<AgGridReact>(null);
@ -82,13 +100,14 @@ function SearchPage() {
minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
return (
<div
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors"
<button
type="button"
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full"
onClick={() => {
router.push(
`/knowledge/chunks?filename=${encodeURIComponent(
data?.filename ?? ""
)}`
data?.filename ?? "",
)}`,
);
}}
>
@ -96,7 +115,7 @@ function SearchPage() {
<span className="font-medium text-foreground truncate">
{value}
</span>
</div>
</button>
);
},
},
@ -119,6 +138,7 @@ function SearchPage() {
{
field: "chunkCount",
headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
},
{
field: "avgScore",
@ -127,11 +147,20 @@ function SearchPage() {
cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
return (
<span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
{value.toFixed(2)}
{value?.toFixed(2) ?? "-"}
</span>
);
},
},
{
field: "status",
headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
// Default to 'active' status if no status is provided
const status = data?.status || "active";
return <StatusBadge status={status} />;
},
},
{
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
@ -172,7 +201,7 @@ function SearchPage() {
try {
// Delete each file individually since the API expects one filename at a time
const deletePromises = selectedRows.map((row) =>
deleteDocumentMutation.mutateAsync({ filename: row.filename })
deleteDocumentMutation.mutateAsync({ filename: row.filename }),
);
await Promise.all(deletePromises);
@ -180,7 +209,7 @@ function SearchPage() {
toast.success(
`Successfully deleted ${selectedRows.length} document${
selectedRows.length > 1 ? "s" : ""
}`
}`,
);
setSelectedRows([]);
setShowBulkDeleteDialog(false);
@ -193,7 +222,7 @@ function SearchPage() {
toast.error(
error instanceof Error
? error.message
: "Failed to delete some documents"
: "Failed to delete some documents",
);
}
};

View file

@ -4,11 +4,13 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react";
import { useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useState } from "react";
import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation";
import {
useGetIBMModelsQuery,
useGetOllamaModelsQuery,
useGetOpenAIModelsQuery,
} from "@/app/api/queries/useGetModelsQuery";
import { useGetSettingsQuery } from "@/app/api/queries/useGetSettingsQuery";
import { useGetOpenAIModelsQuery, useGetOllamaModelsQuery, useGetIBMModelsQuery } from "@/app/api/queries/useGetModelsQuery";
import { ConfirmationDialog } from "@/components/confirmation-dialog";
import { ModelSelectItems } from "./helpers/model-select-item";
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ProtectedRoute } from "@/components/protected-route";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
@ -33,6 +35,8 @@ import { Textarea } from "@/components/ui/textarea";
import { useAuth } from "@/contexts/auth-context";
import { useTask } from "@/contexts/task-context";
import { useDebounce } from "@/lib/debounce";
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ModelSelectItems } from "./helpers/model-select-item";
const MAX_SYSTEM_PROMPT_CHARS = 2000;
@ -105,42 +109,46 @@ function KnowledgeSourcesPage() {
// Fetch settings using React Query
const { data: settings = {} } = useGetSettingsQuery({
enabled: isAuthenticated,
enabled: isAuthenticated || isNoAuthMode,
});
// Get the current provider from settings
const currentProvider = (settings.provider?.model_provider || 'openai') as ModelProvider;
const currentProvider = (settings.provider?.model_provider ||
"openai") as ModelProvider;
// Fetch available models based on provider
const { data: openaiModelsData } = useGetOpenAIModelsQuery(
undefined, // Let backend use stored API key from configuration
{
enabled: isAuthenticated && currentProvider === 'openai',
}
enabled:
(isAuthenticated || isNoAuthMode) && currentProvider === "openai",
},
);
const { data: ollamaModelsData } = useGetOllamaModelsQuery(
undefined, // No params for now, could be extended later
{
enabled: isAuthenticated && currentProvider === 'ollama',
}
enabled:
(isAuthenticated || isNoAuthMode) && currentProvider === "ollama",
},
);
const { data: ibmModelsData } = useGetIBMModelsQuery(
undefined, // No params for now, could be extended later
{
enabled: isAuthenticated && currentProvider === 'ibm',
}
enabled: (isAuthenticated || isNoAuthMode) && currentProvider === "ibm",
},
);
// Select the appropriate models data based on provider
const modelsData = currentProvider === 'openai'
? openaiModelsData
: currentProvider === 'ollama'
? ollamaModelsData
: currentProvider === 'ibm'
? ibmModelsData
: openaiModelsData; // fallback to openai
const modelsData =
currentProvider === "openai"
? openaiModelsData
: currentProvider === "ollama"
? ollamaModelsData
: currentProvider === "ibm"
? ibmModelsData
: openaiModelsData; // fallback to openai
// Mutations
const updateFlowSettingMutation = useUpdateFlowSettingMutation({
@ -219,10 +227,10 @@ function KnowledgeSourcesPage() {
// Update processing mode
const handleProcessingModeChange = (mode: string) => {
setProcessingMode(mode);
// Update the configuration setting (backend will also update the flow automatically)
debouncedUpdate({ doclingPresets: mode });
};
// Helper function to get connector icon
const getConnectorIcon = useCallback((iconName: string) => {
const iconMap: { [key: string]: React.ReactElement } = {
@ -611,7 +619,11 @@ function KnowledgeSourcesPage() {
Language Model
</Label>
<Select
value={settings.agent?.llm_model || modelsData?.language_models?.find(m => m.default)?.value || "gpt-4"}
value={
settings.agent?.llm_model ||
modelsData?.language_models?.find((m) => m.default)?.value ||
"gpt-4"
}
onValueChange={handleModelChange}
>
<SelectTrigger id="model-select">
@ -636,10 +648,20 @@ function KnowledgeSourcesPage() {
value={systemPrompt}
onChange={(e) => setSystemPrompt(e.target.value)}
rows={6}
className={`resize-none ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'border-red-500 focus:border-red-500' : ''}`}
className={`resize-none ${
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
? "border-red-500 focus:border-red-500"
: ""
}`}
/>
<div className="flex justify-start">
<span className={`text-xs ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'text-red-500' : 'text-muted-foreground'}`}>
<span
className={`text-xs ${
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
? "text-red-500"
: "text-muted-foreground"
}`}
>
{systemPrompt.length}/{MAX_SYSTEM_PROMPT_CHARS} characters
</span>
</div>
@ -647,7 +669,10 @@ function KnowledgeSourcesPage() {
<div className="flex justify-end pt-2">
<Button
onClick={handleSystemPromptSave}
disabled={updateFlowSettingMutation.isPending || systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS}
disabled={
updateFlowSettingMutation.isPending ||
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
}
className="min-w-[120px]"
size="sm"
variant="outline"
@ -734,7 +759,9 @@ function KnowledgeSourcesPage() {
</Label>
<Select
value={
settings.knowledge?.embedding_model || modelsData?.embedding_models?.find(m => m.default)?.value || "text-embedding-ada-002"
settings.knowledge?.embedding_model ||
modelsData?.embedding_models?.find((m) => m.default)?.value ||
"text-embedding-ada-002"
}
onValueChange={handleEmbeddingModelChange}
>
@ -744,7 +771,9 @@ function KnowledgeSourcesPage() {
<SelectContent>
<ModelSelectItems
models={modelsData?.embedding_models}
fallbackModels={getFallbackModels(currentProvider).embedding}
fallbackModels={
getFallbackModels(currentProvider).embedding
}
provider={currentProvider}
/>
</SelectContent>
@ -805,7 +834,10 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3">
<RadioGroupItem value="standard" id="standard" />
<div className="flex-1">
<Label htmlFor="standard" className="text-base font-medium cursor-pointer">
<Label
htmlFor="standard"
className="text-base font-medium cursor-pointer"
>
Standard
</Label>
<div className="text-sm text-muted-foreground">
@ -816,18 +848,28 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3">
<RadioGroupItem value="ocr" id="ocr" />
<div className="flex-1">
<Label htmlFor="ocr" className="text-base font-medium cursor-pointer">
<Label
htmlFor="ocr"
className="text-base font-medium cursor-pointer"
>
Extract text from images
</Label>
<div className="text-sm text-muted-foreground">
Uses OCR to extract text from images/PDFs. Ingest is slower when enabled
Uses OCR to extract text from images/PDFs. Ingest is
slower when enabled
</div>
</div>
</div>
<div className="flex items-center space-x-3">
<RadioGroupItem value="picture_description" id="picture_description" />
<RadioGroupItem
value="picture_description"
id="picture_description"
/>
<div className="flex-1">
<Label htmlFor="picture_description" className="text-base font-medium cursor-pointer">
<Label
htmlFor="picture_description"
className="text-base font-medium cursor-pointer"
>
Generate Description
</Label>
<div className="text-sm text-muted-foreground">
@ -838,11 +880,15 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3">
<RadioGroupItem value="VLM" id="VLM" />
<div className="flex-1">
<Label htmlFor="VLM" className="text-base font-medium cursor-pointer">
<Label
htmlFor="VLM"
className="text-base font-medium cursor-pointer"
>
AI Vision
</Label>
<div className="text-sm text-muted-foreground">
Advanced processing with vision language models. Highest quality but most expensive
Advanced processing with vision language models. Highest
quality but most expensive
</div>
</div>
</div>

View file

@ -0,0 +1,49 @@
interface AnimatedProcessingIconProps {
className?: string;
size?: number;
}
export const AnimatedProcessingIcon = ({
className = "",
size = 10,
}: AnimatedProcessingIconProps) => {
const width = Math.round((size * 6) / 10);
const height = size;
return (
<svg
width={width}
height={height}
viewBox="0 0 6 10"
fill="none"
xmlns="http://www.w3.org/2000/svg"
className={className}
>
<style>
{`
.dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; }
.dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; }
.dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; }
.dot-4 { animation: pulse-wave 1.5s infinite; animation-delay: 0.3s; }
.dot-5 { animation: pulse-wave 1.5s infinite; animation-delay: 0.4s; }
@keyframes pulse-wave {
0%, 60%, 100% {
opacity: 0.25;
transform: scale(1);
}
30% {
opacity: 1;
transform: scale(1.2);
}
}
`}
</style>
<circle className="dot-1" cx="1" cy="5" r="1" fill="currentColor" />
<circle className="dot-2" cx="1" cy="9" r="1" fill="currentColor" />
<circle className="dot-3" cx="5" cy="1" r="1" fill="currentColor" />
<circle className="dot-4" cx="5" cy="5" r="1" fill="currentColor" />
<circle className="dot-5" cx="5" cy="9" r="1" fill="currentColor" />
</svg>
);
};

View file

@ -0,0 +1,58 @@
import { AnimatedProcessingIcon } from "./animated-processing-icon";
export type Status =
| "processing"
| "active"
| "unavailable"
| "hidden"
| "sync"
| "failed";
interface StatusBadgeProps {
status: Status;
className?: string;
}
const statusConfig = {
processing: {
label: "Processing",
className: "text-muted-foreground dark:text-muted-foreground ",
},
active: {
label: "Active",
className: "text-emerald-600 dark:text-emerald-400 ",
},
unavailable: {
label: "Unavailable",
className: "text-red-600 dark:text-red-400 ",
},
failed: {
label: "Failed",
className: "text-red-600 dark:text-red-400 ",
},
hidden: {
label: "Hidden",
className: "text-zinc-400 dark:text-zinc-500 ",
},
sync: {
label: "Sync",
className: "text-amber-700 dark:text-amber-300 underline",
},
};
export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
const config = statusConfig[status];
return (
<div
className={`inline-flex items-center gap-1 ${config.className} ${
className || ""
}`}
>
{status === "processing" && (
<AnimatedProcessingIcon className="text-current mr-2" size={10} />
)}
{config.label}
</div>
);
};

View file

@ -35,9 +35,22 @@ export interface Task {
files?: Record<string, Record<string, unknown>>;
}
export interface TaskFile {
filename: string;
mimetype: string;
source_url: string;
size: number;
connector_type: string;
status: "active" | "failed" | "processing";
task_id: string;
created_at: string;
updated_at: string;
}
interface TaskContextType {
tasks: Task[];
files: TaskFile[];
addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>;
@ -51,6 +64,7 @@ const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false);
@ -58,12 +72,32 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const queryClient = useQueryClient();
const refetchSearch = () => {
const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({
queryKey: ["search"],
exact: false,
});
};
}, [queryClient]);
const addFiles = useCallback(
(newFiles: Partial<TaskFile>[], taskId: string) => {
const now = new Date().toISOString();
const filesToAdd: TaskFile[] = newFiles.map((file) => ({
filename: file.filename || "",
mimetype: file.mimetype || "",
source_url: file.source_url || "",
size: file.size || 0,
connector_type: file.connector_type || "local",
status: "processing",
task_id: taskId,
created_at: now,
updated_at: now,
}));
setFiles((prevFiles) => [...prevFiles, ...filesToAdd]);
},
[],
);
const fetchTasks = useCallback(async () => {
if (!isAuthenticated && !isNoAuthMode) return;
@ -76,13 +110,87 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update
setTasks(prevTasks => {
setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts
if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find(
t => t.task_id === newTask.task_id
(t) => t.task_id === newTask.task_id,
);
// Update or add files from task.files if available
if (newTask.files && typeof newTask.files === "object") {
const taskFileEntries = Object.entries(newTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
}
setFiles((prevFiles) => {
const existingFileIndex = prevFiles.findIndex(
(f) =>
f.source_url === filePath &&
f.task_id === newTask.task_id,
);
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: newTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
}
if (
oldTask &&
oldTask.status !== "completed" &&
@ -99,9 +207,14 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
refetchSearch();
// Dispatch knowledge updated event for all knowledge-related pages
console.log(
"Task completed successfully, dispatching knowledgeUpdated event"
"Task completed successfully, dispatching knowledgeUpdated event",
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id),
);
} else if (
oldTask &&
oldTask.status !== "failed" &&
@ -114,6 +227,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
newTask.error || "Unknown error"
}`,
});
// Files will be updated to failed status by the file parsing logic above
}
});
}
@ -126,7 +241,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
} finally {
setIsFetching(false);
}
}, [isAuthenticated, isNoAuthMode]); // Removed 'tasks' from dependencies to prevent infinite loop!
}, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => {
// Immediately start aggressive polling for the new task
@ -140,19 +255,21 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const data = await response.json();
const newTasks = data.tasks || [];
const foundTask = newTasks.find(
(task: Task) => task.task_id === taskId
(task: Task) => task.task_id === taskId,
);
if (foundTask) {
// Task found! Update the tasks state
setTasks(prevTasks => {
setTasks((prevTasks) => {
// Check if task is already in the list
const exists = prevTasks.some(t => t.task_id === taskId);
const exists = prevTasks.some((t) => t.task_id === taskId);
if (!exists) {
return [...prevTasks, foundTask];
}
// Update existing task
return prevTasks.map(t => (t.task_id === taskId ? foundTask : t));
return prevTasks.map((t) =>
t.task_id === taskId ? foundTask : t,
);
});
return; // Stop polling, we found it
}
@ -177,7 +294,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
}, [fetchTasks]);
const removeTask = useCallback((taskId: string) => {
setTasks(prev => prev.filter(task => task.task_id !== taskId));
setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []);
const cancelTask = useCallback(
@ -204,11 +321,11 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
});
}
},
[fetchTasks]
[fetchTasks],
);
const toggleMenu = useCallback(() => {
setIsMenuOpen(prev => !prev);
setIsMenuOpen((prev) => !prev);
}, []);
// Periodic polling for task updates
@ -231,7 +348,9 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const value: TaskContextType = {
tasks,
files,
addTask,
addFiles,
removeTask,
refreshTasks,
cancelTask,

View file

@ -106,7 +106,6 @@ async def async_response_stream(
model: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
log_prefix: str = "response",
):
logger.info("User prompt received", prompt=prompt)
@ -121,8 +120,6 @@ async def async_response_stream(
}
if previous_response_id is not None:
request_params["previous_response_id"] = previous_response_id
if tweaks:
request_params["tweaks"] = tweaks
if "x-api-key" not in client.default_headers:
if hasattr(client, "api_key") and extra_headers is not None:
@ -199,7 +196,6 @@ async def async_response(
model: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
log_prefix: str = "response",
):
try:
@ -214,8 +210,6 @@ async def async_response(
}
if previous_response_id is not None:
request_params["previous_response_id"] = previous_response_id
if tweaks:
request_params["tweaks"] = tweaks
if extra_headers:
request_params["extra_headers"] = extra_headers
@ -249,7 +243,6 @@ async def async_stream(
model: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
log_prefix: str = "response",
):
async for chunk in async_response_stream(
@ -258,7 +251,6 @@ async def async_stream(
model,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix=log_prefix,
):
yield chunk
@ -271,7 +263,6 @@ async def async_langflow(
prompt: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
):
response_text, response_id, response_obj = await async_response(
langflow_client,
@ -279,7 +270,6 @@ async def async_langflow(
flow_id,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow",
)
return response_text, response_id
@ -292,7 +282,6 @@ async def async_langflow_stream(
prompt: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
):
logger.debug("Starting langflow stream", prompt=prompt)
try:
@ -302,8 +291,7 @@ async def async_langflow_stream(
flow_id,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow",
log_prefix="langflow",
):
logger.debug(
"Yielding chunk from langflow stream",
@ -463,7 +451,6 @@ async def async_langflow_chat(
user_id: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
store_conversation: bool = True,
):
logger.debug(
@ -497,7 +484,6 @@ async def async_langflow_chat(
flow_id,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow",
)
logger.debug(
@ -576,7 +562,6 @@ async def async_langflow_chat_stream(
user_id: str,
extra_headers: dict = None,
previous_response_id: str = None,
tweaks: dict = None,
):
logger.debug(
"async_langflow_chat_stream called",
@ -603,7 +588,6 @@ async def async_langflow_chat_stream(
flow_id,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow",
):
# Extract text content to build full response for history

View file

@ -17,14 +17,18 @@ async def get_openai_models(request, models_service, session_manager):
try:
config = get_openrag_config()
api_key = config.provider.api_key
logger.info(f"Retrieved API key from config: {'yes' if api_key else 'no'}")
logger.info(
f"Retrieved API key from config: {'yes' if api_key else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not api_key:
return JSONResponse(
{"error": "OpenAI API key is required either as query parameter or in configuration"},
status_code=400
{
"error": "OpenAI API key is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_openai_models(api_key=api_key)
@ -32,8 +36,7 @@ async def get_openai_models(request, models_service, session_manager):
except Exception as e:
logger.error(f"Failed to get OpenAI models: {str(e)}")
return JSONResponse(
{"error": f"Failed to retrieve OpenAI models: {str(e)}"},
status_code=500
{"error": f"Failed to retrieve OpenAI models: {str(e)}"}, status_code=500
)
@ -44,13 +47,31 @@ async def get_ollama_models(request, models_service, session_manager):
query_params = dict(request.query_params)
endpoint = query_params.get("endpoint")
# If no API key provided, try to get it from stored configuration
if not endpoint:
try:
config = get_openrag_config()
endpoint = config.provider.endpoint
logger.info(
f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not endpoint:
return JSONResponse(
{
"error": "Endpoint is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_ollama_models(endpoint=endpoint)
return JSONResponse(models)
except Exception as e:
logger.error(f"Failed to get Ollama models: {str(e)}")
return JSONResponse(
{"error": f"Failed to retrieve Ollama models: {str(e)}"},
status_code=500
{"error": f"Failed to retrieve Ollama models: {str(e)}"}, status_code=500
)
@ -63,15 +84,65 @@ async def get_ibm_models(request, models_service, session_manager):
api_key = query_params.get("api_key")
project_id = query_params.get("project_id")
config = get_openrag_config()
# If no API key provided, try to get it from stored configuration
if not api_key:
try:
api_key = config.provider.api_key
logger.info(
f"Retrieved API key from config: {'yes' if api_key else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not api_key:
return JSONResponse(
{
"error": "OpenAI API key is required either as query parameter or in configuration"
},
status_code=400,
)
if not endpoint:
try:
endpoint = config.provider.endpoint
logger.info(
f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not endpoint:
return JSONResponse(
{
"error": "Endpoint is required either as query parameter or in configuration"
},
status_code=400,
)
if not project_id:
try:
project_id = config.provider.project_id
logger.info(
f"Retrieved project ID from config: {'yes' if project_id else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not project_id:
return JSONResponse(
{
"error": "Project ID is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_ibm_models(
endpoint=endpoint,
api_key=api_key,
project_id=project_id
endpoint=endpoint, api_key=api_key, project_id=project_id
)
return JSONResponse(models)
except Exception as e:
logger.error(f"Failed to get IBM models: {str(e)}")
return JSONResponse(
{"error": f"Failed to retrieve IBM models: {str(e)}"},
status_code=500
)
{"error": f"Failed to retrieve IBM models: {str(e)}"}, status_code=500
)

View file

@ -7,6 +7,7 @@ from config.settings import (
LANGFLOW_CHAT_FLOW_ID,
LANGFLOW_INGEST_FLOW_ID,
LANGFLOW_PUBLIC_URL,
DOCLING_COMPONENT_ID,
clients,
get_openrag_config,
config_manager,
@ -46,22 +47,7 @@ def get_docling_preset_configs():
}
def get_docling_tweaks(docling_preset: str = None) -> dict:
"""Get Langflow tweaks for docling component based on preset"""
if not docling_preset:
# Get current preset from config
openrag_config = get_openrag_config()
docling_preset = openrag_config.knowledge.doclingPresets
preset_configs = get_docling_preset_configs()
if docling_preset not in preset_configs:
docling_preset = "standard" # fallback
preset_config = preset_configs[docling_preset]
docling_serve_opts = json.dumps(preset_config)
return {"DoclingRemote-ayRdw": {"docling_serve_opts": docling_serve_opts}}
async def get_settings(request, session_manager):
@ -234,6 +220,15 @@ async def update_settings(request, session_manager):
current_config.knowledge.doclingPresets = body["doclingPresets"]
config_updated = True
# Also update the flow with the new docling preset
try:
await _update_flow_docling_preset(body["doclingPresets"], preset_configs[body["doclingPresets"]])
logger.info(f"Successfully updated docling preset in flow to '{body['doclingPresets']}'")
except Exception as e:
logger.error(f"Failed to update docling preset in flow: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
if "chunk_size" in body:
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
return JSONResponse(
@ -527,3 +522,93 @@ async def onboarding(request, flows_service):
{"error": f"Failed to update onboarding settings: {str(e)}"},
status_code=500,
)
async def _update_flow_docling_preset(preset: str, preset_config: dict):
"""Helper function to update docling preset in the ingest flow"""
if not LANGFLOW_INGEST_FLOW_ID:
raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured")
# Get the current flow data from Langflow
response = await clients.langflow_request(
"GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}"
)
if response.status_code != 200:
raise Exception(f"Failed to get ingest flow: HTTP {response.status_code} - {response.text}")
flow_data = response.json()
# Find the target node in the flow using environment variable
nodes = flow_data.get("data", {}).get("nodes", [])
target_node = None
target_node_index = None
for i, node in enumerate(nodes):
if node.get("id") == DOCLING_COMPONENT_ID:
target_node = node
target_node_index = i
break
if target_node is None:
raise Exception(f"Docling component '{DOCLING_COMPONENT_ID}' not found in ingest flow")
# Update the docling_serve_opts value directly in the existing node
if (target_node.get("data", {}).get("node", {}).get("template", {}).get("docling_serve_opts")):
flow_data["data"]["nodes"][target_node_index]["data"]["node"]["template"]["docling_serve_opts"]["value"] = preset_config
else:
raise Exception(f"docling_serve_opts field not found in node '{DOCLING_COMPONENT_ID}'")
# Update the flow via PATCH request
patch_response = await clients.langflow_request(
"PATCH", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}", json=flow_data
)
if patch_response.status_code != 200:
raise Exception(f"Failed to update ingest flow: HTTP {patch_response.status_code} - {patch_response.text}")
async def update_docling_preset(request, session_manager):
"""Update docling preset in the ingest flow"""
try:
# Parse request body
body = await request.json()
# Validate preset parameter
if "preset" not in body:
return JSONResponse(
{"error": "preset parameter is required"},
status_code=400
)
preset = body["preset"]
preset_configs = get_docling_preset_configs()
if preset not in preset_configs:
valid_presets = list(preset_configs.keys())
return JSONResponse(
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"},
status_code=400
)
# Get the preset configuration
preset_config = preset_configs[preset]
# Use the helper function to update the flow
await _update_flow_docling_preset(preset, preset_config)
logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
return JSONResponse({
"message": f"Successfully updated docling preset to '{preset}'",
"preset": preset,
"preset_config": preset_config
})
except Exception as e:
logger.error("Failed to update docling preset", error=str(e))
return JSONResponse(
{"error": f"Failed to update docling preset: {str(e)}"},
status_code=500
)

View file

@ -544,6 +544,9 @@ OLLAMA_LLM_TEXT_COMPONENT_ID = os.getenv(
"OLLAMA_LLM_TEXT_COMPONENT_ID", "OllamaModel-XDGqZ"
)
# Docling component ID for ingest flow
DOCLING_COMPONENT_ID = os.getenv("DOCLING_COMPONENT_ID", "DoclingRemote-78KoX")
# Global clients instance
clients = AppClients()

View file

@ -971,12 +971,23 @@ async def create_app():
"/onboarding",
require_auth(services["session_manager"])(
partial(
settings.onboarding,
settings.onboarding,
flows_service=services["flows_service"]
)
),
methods=["POST"],
),
# Docling preset update endpoint
Route(
"/settings/docling-preset",
require_auth(services["session_manager"])(
partial(
settings.update_docling_preset,
session_manager=services["session_manager"]
)
),
methods=["PATCH"],
),
Route(
"/nudges",
require_auth(services["session_manager"])(

View file

@ -2,7 +2,6 @@ import json
from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID
from agent import async_chat, async_langflow, async_chat_stream
from auth_context import set_auth_context
from api.settings import get_docling_tweaks
from utils.logging_config import get_logger
logger = get_logger(__name__)
@ -127,8 +126,6 @@ class ChatService:
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
)
# Get docling tweaks based on current configuration
docling_tweaks = get_docling_tweaks()
if stream:
from agent import async_langflow_chat_stream
@ -140,7 +137,6 @@ class ChatService:
user_id,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=docling_tweaks,
)
else:
from agent import async_langflow_chat
@ -152,7 +148,6 @@ class ChatService:
user_id,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=docling_tweaks,
)
response_data = {"response": response_text}
if response_id:
@ -202,8 +197,6 @@ class ChatService:
from agent import async_langflow_chat
# Get docling tweaks (might not be used by nudges flow, but keeping consistent)
docling_tweaks = get_docling_tweaks()
response_text, response_id = await async_langflow_chat(
langflow_client,
@ -211,7 +204,6 @@ class ChatService:
prompt,
user_id,
extra_headers=extra_headers,
tweaks=docling_tweaks,
store_conversation=False,
)
response_data = {"response": response_text}
@ -242,8 +234,6 @@ class ChatService:
raise ValueError(
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
)
# Get docling tweaks based on current configuration
docling_tweaks = get_docling_tweaks()
response_text, response_id = await async_langflow(
langflow_client=langflow_client,
@ -251,7 +241,6 @@ class ChatService:
prompt=document_prompt,
extra_headers=extra_headers,
previous_response_id=previous_response_id,
tweaks=docling_tweaks,
)
else: # chat
# Set auth context for chat tools and provide user_id

View file

@ -17,7 +17,9 @@ class TaskService:
def __init__(self, document_service=None, process_pool=None):
self.document_service = document_service
self.process_pool = process_pool
self.task_store: dict[str, dict[str, UploadTask]] = {} # user_id -> {task_id -> UploadTask}
self.task_store: dict[
str, dict[str, UploadTask]
] = {} # user_id -> {task_id -> UploadTask}
self.background_tasks = set()
if self.process_pool is None:
@ -122,18 +124,27 @@ class TaskService:
# Process files with limited concurrency to avoid overwhelming the system
max_workers = get_worker_count()
semaphore = asyncio.Semaphore(max_workers * 2) # Allow 2x process pool size for async I/O
semaphore = asyncio.Semaphore(
max_workers * 2
) # Allow 2x process pool size for async I/O
async def process_with_semaphore(file_path: str):
async with semaphore:
await self.document_service.process_single_file_task(upload_task, file_path)
await self.document_service.process_single_file_task(
upload_task, file_path
)
tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()]
tasks = [
process_with_semaphore(file_path)
for file_path in upload_task.file_tasks.keys()
]
await asyncio.gather(*tasks, return_exceptions=True)
except Exception as e:
logger.error("Background upload processor failed", task_id=task_id, error=str(e))
logger.error(
"Background upload processor failed", task_id=task_id, error=str(e)
)
import traceback
traceback.print_exc()
@ -141,7 +152,9 @@ class TaskService:
self.task_store[user_id][task_id].status = TaskStatus.FAILED
self.task_store[user_id][task_id].updated_at = time.time()
async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None:
async def background_custom_processor(
self, user_id: str, task_id: str, items: list
) -> None:
"""Background task to process items using custom processor"""
try:
upload_task = self.task_store[user_id][task_id]
@ -163,7 +176,9 @@ class TaskService:
try:
await processor.process_item(upload_task, item, file_task)
except Exception as e:
logger.error("Failed to process item", item=str(item), error=str(e))
logger.error(
"Failed to process item", item=str(item), error=str(e)
)
import traceback
traceback.print_exc()
@ -190,7 +205,9 @@ class TaskService:
pass
raise # Re-raise to properly handle cancellation
except Exception as e:
logger.error("Background custom processor failed", task_id=task_id, error=str(e))
logger.error(
"Background custom processor failed", task_id=task_id, error=str(e)
)
import traceback
traceback.print_exc()
@ -212,7 +229,10 @@ class TaskService:
upload_task = None
for candidate_user_id in candidate_user_ids:
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
if (
candidate_user_id in self.task_store
and task_id in self.task_store[candidate_user_id]
):
upload_task = self.task_store[candidate_user_id][task_id]
break
@ -271,10 +291,23 @@ class TaskService:
if task_id in tasks_by_id:
continue
# Calculate running and pending counts
# Calculate running and pending counts and build file statuses
running_files_count = 0
pending_files_count = 0
for file_task in upload_task.file_tasks.values():
file_statuses = {}
for file_path, file_task in upload_task.file_tasks.items():
if file_task.status.value != "completed":
file_statuses[file_path] = {
"status": file_task.status.value,
"result": file_task.result,
"error": file_task.error,
"retry_count": file_task.retry_count,
"created_at": file_task.created_at,
"updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds,
}
if file_task.status.value == "running":
running_files_count += 1
elif file_task.status.value == "pending":
@ -292,6 +325,7 @@ class TaskService:
"created_at": upload_task.created_at,
"updated_at": upload_task.updated_at,
"duration_seconds": upload_task.duration_seconds,
"files": file_statuses,
}
# First, add user-owned tasks; then shared anonymous;
@ -312,7 +346,10 @@ class TaskService:
store_user_id = None
for candidate_user_id in candidate_user_ids:
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
if (
candidate_user_id in self.task_store
and task_id in self.task_store[candidate_user_id]
):
store_user_id = candidate_user_id
break
@ -326,7 +363,10 @@ class TaskService:
return False
# Cancel the background task to stop scheduling new work
if hasattr(upload_task, "background_task") and not upload_task.background_task.done():
if (
hasattr(upload_task, "background_task")
and not upload_task.background_task.done()
):
upload_task.background_task.cancel()
# Mark task as failed (cancelled)