Merge branch 'main' into lfx-openrag-update-flows

This commit is contained in:
Edwin Jose 2025-09-24 11:39:09 -04:00
commit f710ae2137
26 changed files with 4435 additions and 1517 deletions

View file

@ -8,6 +8,8 @@ LANGFLOW_SECRET_KEY=
# flow ids for chat and ingestion flows # flow ids for chat and ingestion flows
LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0 LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813 LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
# Ingest flow using docling
LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
# Set a strong admin password for OpenSearch; a bcrypt hash is generated at # Set a strong admin password for OpenSearch; a bcrypt hash is generated at

View file

@ -1,48 +0,0 @@
---
title: What is OpenRAG?
slug: /
---
# OpenRAG Introduction
Let's discover **Docusaurus in less than 5 minutes**.
## Getting Started
Get started by **creating a new site**.
Or **try Docusaurus immediately** with **[docusaurus.new](https://docusaurus.new)**.
### What you'll need
- [Node.js](https://nodejs.org/en/download/) version 18.0 or above:
- When installing Node.js, you are recommended to check all checkboxes related to dependencies.
## Generate a new site
Generate a new Docusaurus site using the **classic template**.
The classic template will automatically be added to your project after you run the command:
```bash
npm init docusaurus@latest my-website classic
```
You can type this command into Command Prompt, Powershell, Terminal, or any other integrated terminal of your code editor.
The command also installs all necessary dependencies you need to run Docusaurus.
## Start your site
Run the development server:
```bash
cd my-website
npm run start
```
The `cd` command changes the directory you're working with. In order to work with your newly created Docusaurus site, you'll need to navigate the terminal there.
The `npm run start` command builds your website locally and serves it through a development server, ready for you to view at http://localhost:3000/.
Open `docs/intro.md` (this page) and edit some lines: the site **reloads automatically** and displays your changes.

View file

@ -0,0 +1,19 @@
---
title: What is OpenRAG?
slug: /what-is-openrag
---
OpenRAG is an open-source package for building agentic RAG systems.
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:
* [Langflow](https://docs.langflow.org) - Langflow is a powerful tool to build and deploy AI agents and MCP servers. It supports all major LLMs, vector databases and a growing library of AI tools.
* [OpenSearch](https://docs.opensearch.org/latest/) - OpenSearch is a community-driven, Apache 2.0-licensed open source search and analytics suite that makes it easy to ingest, search, visualize, and analyze data.
* [Docling](https://docling-project.github.io/docling/) - Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
OpenRAG builds on Langflow's familiar interface while adding OpenSearch for vector storage and Docling for simplified document parsing, with opinionated flows that serve as ready-to-use recipes for ingestion, retrieval, and generation from popular sources like OneDrive, Google Drive, and AWS. And don't fear: every part of the stack is swappable. Write your own custom components in Python, try different language models, and customize your flows to build an agentic RAG system that solves problems.
Ready to get started? Install OpenRAG and then run the Quickstart to create a powerful RAG pipeline.

View file

@ -71,7 +71,7 @@ const config = {
logo: { logo: {
alt: 'OpenRAG Logo', alt: 'OpenRAG Logo',
src: 'img/logo.svg', src: 'img/logo.svg',
href: '/', href: 'what-is-openrag',
}, },
items: [ items: [
{ {
@ -89,7 +89,7 @@ const config = {
items: [ items: [
{ {
label: 'Getting Started', label: 'Getting Started',
to: '/', to: 'what-is-openrag',
}, },
], ],
}, },

10
docs/package-lock.json generated
View file

@ -12,6 +12,7 @@
"@docusaurus/preset-classic": "3.8.1", "@docusaurus/preset-classic": "3.8.1",
"@mdx-js/react": "^3.0.0", "@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0", "clsx": "^2.0.0",
"lucide-react": "^0.544.0",
"prism-react-renderer": "^2.3.0", "prism-react-renderer": "^2.3.0",
"react": "^19.0.0", "react": "^19.0.0",
"react-dom": "^19.0.0" "react-dom": "^19.0.0"
@ -9801,6 +9802,15 @@
"yallist": "^3.0.2" "yallist": "^3.0.2"
} }
}, },
"node_modules/lucide-react": {
"version": "0.544.0",
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.544.0.tgz",
"integrity": "sha512-t5tS44bqd825zAW45UQxpG2CvcC4urOwn2TrwSH8u+MjeE+1NnWl6QqeQ/6NdjMqdOygyiT9p3Ev0p1NJykxjw==",
"license": "ISC",
"peerDependencies": {
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
"node_modules/markdown-extensions": { "node_modules/markdown-extensions": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/markdown-extensions/-/markdown-extensions-2.0.0.tgz", "resolved": "https://registry.npmjs.org/markdown-extensions/-/markdown-extensions-2.0.0.tgz",

View file

@ -19,6 +19,7 @@
"@docusaurus/preset-classic": "3.8.1", "@docusaurus/preset-classic": "3.8.1",
"@mdx-js/react": "^3.0.0", "@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0", "clsx": "^2.0.0",
"lucide-react": "^0.544.0",
"prism-react-renderer": "^2.3.0", "prism-react-renderer": "^2.3.0",
"react": "^19.0.0", "react": "^19.0.0",
"react-dom": "^19.0.0" "react-dom": "^19.0.0"

View file

@ -22,7 +22,7 @@ const sidebars = {
items: [ items: [
{ {
type: "doc", type: "doc",
id: "get-started/intro", id: "get-started/what-is-openrag",
label: "Introduction" label: "Introduction"
}, },
{ {

View file

@ -0,0 +1,19 @@
import React from "react";
import * as LucideIcons from "lucide-react";
/*
How to use this component:
import Icon from "@site/src/components/icon";
<Icon name="AlertCircle" size={24} color="red" />
*/
type IconProps = {
name: string;
};
export default function Icon({ name, ...props }: IconProps) {
const Icon = LucideIcons[name];
return Icon ? <Icon {...props} /> : null;
}

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -14,7 +14,7 @@ interface DeleteDocumentResponse {
} }
const deleteDocument = async ( const deleteDocument = async (
data: DeleteDocumentRequest data: DeleteDocumentRequest,
): Promise<DeleteDocumentResponse> => { ): Promise<DeleteDocumentResponse> => {
const response = await fetch("/api/documents/delete-by-filename", { const response = await fetch("/api/documents/delete-by-filename", {
method: "POST", method: "POST",
@ -37,9 +37,11 @@ export const useDeleteDocument = () => {
return useMutation({ return useMutation({
mutationFn: deleteDocument, mutationFn: deleteDocument,
onSuccess: () => { onSettled: () => {
// Invalidate and refetch search queries to update the UI // Invalidate and refetch search queries to update the UI
queryClient.invalidateQueries({ queryKey: ["search"] }); setTimeout(() => {
queryClient.invalidateQueries({ queryKey: ["search"] });
}, 1000);
}, },
}); });
}; };

View file

@ -54,7 +54,7 @@ export const useGetOpenAIModelsQuery = (
queryKey: ["models", "openai", params], queryKey: ["models", "openai", params],
queryFn: getOpenAIModels, queryFn: getOpenAIModels,
retry: 2, retry: 2,
enabled: options?.enabled !== false, // Allow enabling/disabling from options enabled: !!params?.apiKey,
staleTime: 0, // Always fetch fresh data staleTime: 0, // Always fetch fresh data
gcTime: 0, // Don't cache results gcTime: 0, // Don't cache results
...options, ...options,

View file

@ -34,21 +34,28 @@ export interface ChunkResult {
export interface File { export interface File {
filename: string; filename: string;
mimetype: string; mimetype: string;
chunkCount: number; chunkCount?: number;
avgScore: number; avgScore?: number;
source_url: string; source_url: string;
owner: string; owner?: string;
owner_name: string; owner_name?: string;
owner_email: string; owner_email?: string;
size: number; size: number;
connector_type: string; connector_type: string;
chunks: ChunkResult[]; status?:
| "processing"
| "active"
| "unavailable"
| "failed"
| "hidden"
| "sync";
chunks?: ChunkResult[];
} }
export const useGetSearchQuery = ( export const useGetSearchQuery = (
query: string, query: string,
queryData?: ParsedQueryData | null, queryData?: ParsedQueryData | null,
options?: Omit<UseQueryOptions, "queryKey" | "queryFn"> options?: Omit<UseQueryOptions, "queryKey" | "queryFn">,
) => { ) => {
const queryClient = useQueryClient(); const queryClient = useQueryClient();
@ -149,7 +156,7 @@ export const useGetSearchQuery = (
} }
}); });
const files: File[] = Array.from(fileMap.values()).map(file => ({ const files: File[] = Array.from(fileMap.values()).map((file) => ({
filename: file.filename, filename: file.filename,
mimetype: file.mimetype, mimetype: file.mimetype,
chunkCount: file.chunks.length, chunkCount: file.chunks.length,
@ -173,11 +180,11 @@ export const useGetSearchQuery = (
const queryResult = useQuery( const queryResult = useQuery(
{ {
queryKey: ["search", effectiveQuery], queryKey: ["search", effectiveQuery],
placeholderData: prev => prev, placeholderData: (prev) => prev,
queryFn: getFiles, queryFn: getFiles,
...options, ...options,
}, },
queryClient queryClient,
); );
return queryResult; return queryResult;

View file

@ -1,17 +1,14 @@
"use client"; "use client";
import { import {
Building2, ArrowLeft,
Cloud, Copy,
FileText, File as FileIcon,
HardDrive,
Loader2, Loader2,
Search, Search,
} from "lucide-react"; } from "lucide-react";
import { Suspense, useCallback, useEffect, useState } from "react"; import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation"; import { useRouter, useSearchParams } from "next/navigation";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
@ -21,22 +18,16 @@ import {
type File, type File,
useGetSearchQuery, useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery"; } from "../../api/queries/useGetSearchQuery";
import { Label } from "@/components/ui/label";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
// Function to get the appropriate icon for a connector type const getFileTypeLabel = (mimetype: string) => {
function getSourceIcon(connectorType?: string) { if (mimetype === "application/pdf") return "PDF";
switch (connectorType) { if (mimetype === "text/plain") return "Text";
case "google_drive": if (mimetype === "application/msword") return "Word Document";
return <SiGoogledrive className="h-4 w-4 text-foreground" />; return "Unknown";
case "onedrive": };
return <TbBrandOnedrive className="h-4 w-4 text-foreground" />;
case "sharepoint":
return <Building2 className="h-4 w-4 text-foreground" />;
case "s3":
return <Cloud className="h-4 w-4 text-foreground" />;
default:
return <HardDrive className="h-4 w-4 text-muted-foreground" />;
}
}
function ChunksPageContent() { function ChunksPageContent() {
const router = useRouter(); const router = useRouter();
@ -46,10 +37,47 @@ function ChunksPageContent() {
const filename = searchParams.get("filename"); const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]); const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[]
>([]);
const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
// Calculate average chunk length
const averageChunkLength = useMemo(
() =>
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
chunks.length || 0,
[chunks]
);
const [selectAll, setSelectAll] = useState(false);
const [queryInputText, setQueryInputText] = useState(
parsedFilterData?.query ?? ""
);
// Use the same search query as the knowledge page, but we'll filter for the specific file // Use the same search query as the knowledge page, but we'll filter for the specific file
const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
useEffect(() => {
if (queryInputText === "") {
setChunksFilteredByQuery(chunks);
} else {
setChunksFilteredByQuery(
chunks.filter((chunk) =>
chunk.text.toLowerCase().includes(queryInputText.toLowerCase())
)
);
}
}, [queryInputText, chunks]);
const handleCopy = useCallback((text: string) => {
navigator.clipboard.writeText(text);
}, []);
const fileData = (data as File[]).find(
(file: File) => file.filename === filename
);
// Extract chunks for the specific file // Extract chunks for the specific file
useEffect(() => { useEffect(() => {
if (!filename || !(data as File[]).length) { if (!filename || !(data as File[]).length) {
@ -57,16 +85,37 @@ function ChunksPageContent() {
return; return;
} }
const fileData = (data as File[]).find(
(file: File) => file.filename === filename
);
setChunks(fileData?.chunks || []); setChunks(fileData?.chunks || []);
}, [data, filename]); }, [data, filename]);
// Set selected state for all checkboxes when selectAll changes
useEffect(() => {
if (selectAll) {
setSelectedChunks(new Set(chunks.map((_, index) => index)));
} else {
setSelectedChunks(new Set());
}
}, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => { const handleBack = useCallback(() => {
router.back(); router.push("/knowledge");
}, [router]); }, [router]);
const handleChunkCardCheckboxChange = useCallback(
(index: number) => {
setSelectedChunks((prevSelected) => {
const newSelected = new Set(prevSelected);
if (newSelected.has(index)) {
newSelected.delete(index);
} else {
newSelected.add(index);
}
return newSelected;
});
},
[setSelectedChunks]
);
if (!filename) { if (!filename) {
return ( return (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
@ -83,7 +132,7 @@ function ChunksPageContent() {
return ( return (
<div <div
className={`fixed inset-0 md:left-72 top-[53px] flex flex-col transition-all duration-300 ${ className={`fixed inset-0 md:left-72 top-[53px] flex flex-row transition-all duration-300 ${
isMenuOpen && isPanelOpen isMenuOpen && isPanelOpen
? "md:right-[704px]" ? "md:right-[704px]"
: // Both open: 384px (menu) + 320px (KF panel) : // Both open: 384px (menu) + 320px (KF panel)
@ -98,29 +147,47 @@ function ChunksPageContent() {
> >
<div className="flex-1 flex flex-col min-h-0 px-6 py-6"> <div className="flex-1 flex flex-col min-h-0 px-6 py-6">
{/* Header */} {/* Header */}
<div className="flex items-center justify-between mb-6"> <div className="flex flex-col mb-6">
<div className="flex items-center gap-3"> <div className="flex items-center gap-3 mb-2">
<Button <Button variant="ghost" onClick={handleBack}>
variant="ghost" <ArrowLeft size={18} />
size="sm" <FileIcon className="text-muted-foreground" size={18} />
onClick={handleBack} <h1 className="text-lg font-semibold">
className="text-muted-foreground hover:text-foreground px-2" {filename.replace(/\.[^/.]+$/, "")}
> </h1>
Back
</Button> </Button>
<div className="flex flex-col">
<h2 className="text-lg font-semibold">Document Chunks</h2>
<p className="text-sm text-muted-foreground truncate max-w-md">
{decodeURIComponent(filename)}
</p>
</div>
</div> </div>
<div className="text-sm text-muted-foreground"> <div className="flex items-center gap-3 pl-4 mt-2">
{!isFetching && chunks.length > 0 && ( <div className="flex items-center gap-2">
<span> <Checkbox
{chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found id="selectAllChunks"
</span> checked={selectAll}
)} onCheckedChange={(handleSelectAll) =>
setSelectAll(!!handleSelectAll)
}
/>
<Label
htmlFor="selectAllChunks"
className="font-medium text-muted-foreground whitespace-nowrap cursor-pointer"
>
Select all
</Label>
</div>
<div className="flex-1 flex items-center gap-2">
<Input
name="search-query"
id="search-query"
type="text"
defaultValue={parsedFilterData?.query}
value={queryInputText}
onChange={(e) => setQueryInputText(e.target.value)}
placeholder="Search chunks..."
className="flex-1 bg-muted/20 rounded-lg border border-border/50 px-4 py-3 focus-visible:ring-1 focus-visible:ring-ring"
/>
<Button variant="outline" size="sm">
<Search />
</Button>
</div>
</div> </div>
</div> </div>
@ -147,41 +214,130 @@ function ChunksPageContent() {
</div> </div>
) : ( ) : (
<div className="space-y-4 pb-6"> <div className="space-y-4 pb-6">
{chunks.map((chunk, index) => ( {chunksFilteredByQuery.map((chunk, index) => (
<div <div
key={chunk.filename + index} key={chunk.filename + index}
className="bg-muted/20 rounded-lg p-4 border border-border/50" className="bg-muted rounded-lg p-4 border border-border/50"
> >
<div className="flex items-center justify-between mb-2"> <div className="flex items-center justify-between mb-2">
<div className="flex items-center gap-2"> <div className="flex items-center gap-3">
<FileText className="h-4 w-4 text-blue-400" /> <div>
<span className="font-medium truncate"> <Checkbox
{chunk.filename} checked={selectedChunks.has(index)}
onCheckedChange={() =>
handleChunkCardCheckboxChange(index)
}
/>
</div>
<span className="text-sm font-bold">
Chunk {chunk.page}
</span> </span>
{chunk.connector_type && ( <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
<div className="ml-2"> {chunk.text.length} chars
{getSourceIcon(chunk.connector_type)} </span>
</div> <div className="py-1">
)} <Button
className="p-1"
onClick={() => handleCopy(chunk.text)}
variant="ghost"
size="xs"
>
<Copy className="text-muted-foreground" />
</Button>
</div>
</div> </div>
<span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
{chunk.score.toFixed(2)} {/* TODO: Update to use active toggle */}
</span> {/* <span className="px-2 py-1 text-green-500">
<Switch
className="ml-2 bg-green-500"
checked={true}
/>
Active
</span> */}
</div> </div>
<div className="flex items-center gap-4 text-sm text-muted-foreground mb-3"> <blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-input ml-1.5 pl-4">
<span>{chunk.mimetype}</span>
<span>Page {chunk.page}</span>
{chunk.owner_name && <span>Owner: {chunk.owner_name}</span>}
</div>
<p className="text-sm text-foreground/90 leading-relaxed">
{chunk.text} {chunk.text}
</p> </blockquote>
</div> </div>
))} ))}
</div> </div>
)} )}
</div> </div>
</div> </div>
{/* Right panel - Summary (TODO), Technical details, */}
<div className="w-[320px] py-20 px-2">
<div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4">Technical details</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Total chunks</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Model</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div> */}
</dl>
</div>
<div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">Original document</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div>
</dl>
</div>
</div>
</div> </div>
); );
} }

View file

@ -1,16 +1,10 @@
"use client"; "use client";
import { import type { ColDef } from "ag-grid-community";
Building2, import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
Cloud, import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
HardDrive,
Search,
Trash2,
X,
} from "lucide-react";
import { AgGridReact, CustomCellRendererProps } from "ag-grid-react";
import { useCallback, useState, useRef, ChangeEvent } from "react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { type ChangeEvent, useCallback, useRef, useState } from "react";
import { SiGoogledrive } from "react-icons/si"; import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb"; import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@ -19,13 +13,13 @@ import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery"; import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery";
import { ColDef } from "ag-grid-community";
import "@/components/AgGrid/registerAgGridModules"; import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css"; import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import { toast } from "sonner";
// Function to get the appropriate icon for a connector type // Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) { function getSourceIcon(connectorType?: string) {
@ -51,7 +45,7 @@ function getSourceIcon(connectorType?: string) {
function SearchPage() { function SearchPage() {
const router = useRouter(); const router = useRouter();
const { isMenuOpen } = useTask(); const { isMenuOpen, files: taskFiles } = useTask();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter(); useKnowledgeFilter();
const [selectedRows, setSelectedRows] = useState<File[]>([]); const [selectedRows, setSelectedRows] = useState<File[]>([]);
@ -61,14 +55,38 @@ function SearchPage() {
const { data = [], isFetching } = useGetSearchQuery( const { data = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*", parsedFilterData?.query || "*",
parsedFilterData parsedFilterData,
); );
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => { const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value); gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
}; };
const fileResults = data as File[]; // Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return {
filename: taskFile.filename,
mimetype: taskFile.mimetype,
source_url: taskFile.source_url,
size: taskFile.size,
connector_type: taskFile.connector_type,
status: taskFile.status,
};
});
const backendFiles = data as File[];
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return (
taskFile.status !== "active" &&
!backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename,
)
);
});
// Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles];
const gridRef = useRef<AgGridReact>(null); const gridRef = useRef<AgGridReact>(null);
@ -82,13 +100,14 @@ function SearchPage() {
minWidth: 220, minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => { cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
return ( return (
<div <button
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors" type="button"
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full"
onClick={() => { onClick={() => {
router.push( router.push(
`/knowledge/chunks?filename=${encodeURIComponent( `/knowledge/chunks?filename=${encodeURIComponent(
data?.filename ?? "" data?.filename ?? "",
)}` )}`,
); );
}} }}
> >
@ -96,7 +115,7 @@ function SearchPage() {
<span className="font-medium text-foreground truncate"> <span className="font-medium text-foreground truncate">
{value} {value}
</span> </span>
</div> </button>
); );
}, },
}, },
@ -119,6 +138,7 @@ function SearchPage() {
{ {
field: "chunkCount", field: "chunkCount",
headerName: "Chunks", headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
}, },
{ {
field: "avgScore", field: "avgScore",
@ -127,11 +147,20 @@ function SearchPage() {
cellRenderer: ({ value }: CustomCellRendererProps<File>) => { cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
return ( return (
<span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded"> <span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
{value.toFixed(2)} {value?.toFixed(2) ?? "-"}
</span> </span>
); );
}, },
}, },
{
field: "status",
headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
// Default to 'active' status if no status is provided
const status = data?.status || "active";
return <StatusBadge status={status} />;
},
},
{ {
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
return <KnowledgeActionsDropdown filename={data?.filename || ""} />; return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
@ -172,7 +201,7 @@ function SearchPage() {
try { try {
// Delete each file individually since the API expects one filename at a time // Delete each file individually since the API expects one filename at a time
const deletePromises = selectedRows.map((row) => const deletePromises = selectedRows.map((row) =>
deleteDocumentMutation.mutateAsync({ filename: row.filename }) deleteDocumentMutation.mutateAsync({ filename: row.filename }),
); );
await Promise.all(deletePromises); await Promise.all(deletePromises);
@ -180,7 +209,7 @@ function SearchPage() {
toast.success( toast.success(
`Successfully deleted ${selectedRows.length} document${ `Successfully deleted ${selectedRows.length} document${
selectedRows.length > 1 ? "s" : "" selectedRows.length > 1 ? "s" : ""
}` }`,
); );
setSelectedRows([]); setSelectedRows([]);
setShowBulkDeleteDialog(false); setShowBulkDeleteDialog(false);
@ -193,7 +222,7 @@ function SearchPage() {
toast.error( toast.error(
error instanceof Error error instanceof Error
? error.message ? error.message
: "Failed to delete some documents" : "Failed to delete some documents",
); );
} }
}; };

View file

@ -4,11 +4,13 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react";
import { useSearchParams } from "next/navigation"; import { useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useState } from "react"; import { Suspense, useCallback, useEffect, useState } from "react";
import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation"; import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation";
import {
useGetIBMModelsQuery,
useGetOllamaModelsQuery,
useGetOpenAIModelsQuery,
} from "@/app/api/queries/useGetModelsQuery";
import { useGetSettingsQuery } from "@/app/api/queries/useGetSettingsQuery"; import { useGetSettingsQuery } from "@/app/api/queries/useGetSettingsQuery";
import { useGetOpenAIModelsQuery, useGetOllamaModelsQuery, useGetIBMModelsQuery } from "@/app/api/queries/useGetModelsQuery";
import { ConfirmationDialog } from "@/components/confirmation-dialog"; import { ConfirmationDialog } from "@/components/confirmation-dialog";
import { ModelSelectItems } from "./helpers/model-select-item";
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Badge } from "@/components/ui/badge"; import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
@ -33,6 +35,8 @@ import { Textarea } from "@/components/ui/textarea";
import { useAuth } from "@/contexts/auth-context"; import { useAuth } from "@/contexts/auth-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { useDebounce } from "@/lib/debounce"; import { useDebounce } from "@/lib/debounce";
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ModelSelectItems } from "./helpers/model-select-item";
const MAX_SYSTEM_PROMPT_CHARS = 2000; const MAX_SYSTEM_PROMPT_CHARS = 2000;
@ -105,42 +109,46 @@ function KnowledgeSourcesPage() {
// Fetch settings using React Query // Fetch settings using React Query
const { data: settings = {} } = useGetSettingsQuery({ const { data: settings = {} } = useGetSettingsQuery({
enabled: isAuthenticated, enabled: isAuthenticated || isNoAuthMode,
}); });
// Get the current provider from settings // Get the current provider from settings
const currentProvider = (settings.provider?.model_provider || 'openai') as ModelProvider; const currentProvider = (settings.provider?.model_provider ||
"openai") as ModelProvider;
// Fetch available models based on provider // Fetch available models based on provider
const { data: openaiModelsData } = useGetOpenAIModelsQuery( const { data: openaiModelsData } = useGetOpenAIModelsQuery(
undefined, // Let backend use stored API key from configuration undefined, // Let backend use stored API key from configuration
{ {
enabled: isAuthenticated && currentProvider === 'openai', enabled:
} (isAuthenticated || isNoAuthMode) && currentProvider === "openai",
},
); );
const { data: ollamaModelsData } = useGetOllamaModelsQuery( const { data: ollamaModelsData } = useGetOllamaModelsQuery(
undefined, // No params for now, could be extended later undefined, // No params for now, could be extended later
{ {
enabled: isAuthenticated && currentProvider === 'ollama', enabled:
} (isAuthenticated || isNoAuthMode) && currentProvider === "ollama",
},
); );
const { data: ibmModelsData } = useGetIBMModelsQuery( const { data: ibmModelsData } = useGetIBMModelsQuery(
undefined, // No params for now, could be extended later undefined, // No params for now, could be extended later
{ {
enabled: isAuthenticated && currentProvider === 'ibm', enabled: (isAuthenticated || isNoAuthMode) && currentProvider === "ibm",
} },
); );
// Select the appropriate models data based on provider // Select the appropriate models data based on provider
const modelsData = currentProvider === 'openai' const modelsData =
? openaiModelsData currentProvider === "openai"
: currentProvider === 'ollama' ? openaiModelsData
? ollamaModelsData : currentProvider === "ollama"
: currentProvider === 'ibm' ? ollamaModelsData
? ibmModelsData : currentProvider === "ibm"
: openaiModelsData; // fallback to openai ? ibmModelsData
: openaiModelsData; // fallback to openai
// Mutations // Mutations
const updateFlowSettingMutation = useUpdateFlowSettingMutation({ const updateFlowSettingMutation = useUpdateFlowSettingMutation({
@ -219,10 +227,10 @@ function KnowledgeSourcesPage() {
// Update processing mode // Update processing mode
const handleProcessingModeChange = (mode: string) => { const handleProcessingModeChange = (mode: string) => {
setProcessingMode(mode); setProcessingMode(mode);
// Update the configuration setting (backend will also update the flow automatically)
debouncedUpdate({ doclingPresets: mode }); debouncedUpdate({ doclingPresets: mode });
}; };
// Helper function to get connector icon // Helper function to get connector icon
const getConnectorIcon = useCallback((iconName: string) => { const getConnectorIcon = useCallback((iconName: string) => {
const iconMap: { [key: string]: React.ReactElement } = { const iconMap: { [key: string]: React.ReactElement } = {
@ -611,7 +619,11 @@ function KnowledgeSourcesPage() {
Language Model Language Model
</Label> </Label>
<Select <Select
value={settings.agent?.llm_model || modelsData?.language_models?.find(m => m.default)?.value || "gpt-4"} value={
settings.agent?.llm_model ||
modelsData?.language_models?.find((m) => m.default)?.value ||
"gpt-4"
}
onValueChange={handleModelChange} onValueChange={handleModelChange}
> >
<SelectTrigger id="model-select"> <SelectTrigger id="model-select">
@ -636,10 +648,20 @@ function KnowledgeSourcesPage() {
value={systemPrompt} value={systemPrompt}
onChange={(e) => setSystemPrompt(e.target.value)} onChange={(e) => setSystemPrompt(e.target.value)}
rows={6} rows={6}
className={`resize-none ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'border-red-500 focus:border-red-500' : ''}`} className={`resize-none ${
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
? "border-red-500 focus:border-red-500"
: ""
}`}
/> />
<div className="flex justify-start"> <div className="flex justify-start">
<span className={`text-xs ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'text-red-500' : 'text-muted-foreground'}`}> <span
className={`text-xs ${
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
? "text-red-500"
: "text-muted-foreground"
}`}
>
{systemPrompt.length}/{MAX_SYSTEM_PROMPT_CHARS} characters {systemPrompt.length}/{MAX_SYSTEM_PROMPT_CHARS} characters
</span> </span>
</div> </div>
@ -647,7 +669,10 @@ function KnowledgeSourcesPage() {
<div className="flex justify-end pt-2"> <div className="flex justify-end pt-2">
<Button <Button
onClick={handleSystemPromptSave} onClick={handleSystemPromptSave}
disabled={updateFlowSettingMutation.isPending || systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS} disabled={
updateFlowSettingMutation.isPending ||
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
}
className="min-w-[120px]" className="min-w-[120px]"
size="sm" size="sm"
variant="outline" variant="outline"
@ -734,7 +759,9 @@ function KnowledgeSourcesPage() {
</Label> </Label>
<Select <Select
value={ value={
settings.knowledge?.embedding_model || modelsData?.embedding_models?.find(m => m.default)?.value || "text-embedding-ada-002" settings.knowledge?.embedding_model ||
modelsData?.embedding_models?.find((m) => m.default)?.value ||
"text-embedding-ada-002"
} }
onValueChange={handleEmbeddingModelChange} onValueChange={handleEmbeddingModelChange}
> >
@ -744,7 +771,9 @@ function KnowledgeSourcesPage() {
<SelectContent> <SelectContent>
<ModelSelectItems <ModelSelectItems
models={modelsData?.embedding_models} models={modelsData?.embedding_models}
fallbackModels={getFallbackModels(currentProvider).embedding} fallbackModels={
getFallbackModels(currentProvider).embedding
}
provider={currentProvider} provider={currentProvider}
/> />
</SelectContent> </SelectContent>
@ -805,7 +834,10 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3"> <div className="flex items-center space-x-3">
<RadioGroupItem value="standard" id="standard" /> <RadioGroupItem value="standard" id="standard" />
<div className="flex-1"> <div className="flex-1">
<Label htmlFor="standard" className="text-base font-medium cursor-pointer"> <Label
htmlFor="standard"
className="text-base font-medium cursor-pointer"
>
Standard Standard
</Label> </Label>
<div className="text-sm text-muted-foreground"> <div className="text-sm text-muted-foreground">
@ -816,18 +848,28 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3"> <div className="flex items-center space-x-3">
<RadioGroupItem value="ocr" id="ocr" /> <RadioGroupItem value="ocr" id="ocr" />
<div className="flex-1"> <div className="flex-1">
<Label htmlFor="ocr" className="text-base font-medium cursor-pointer"> <Label
htmlFor="ocr"
className="text-base font-medium cursor-pointer"
>
Extract text from images Extract text from images
</Label> </Label>
<div className="text-sm text-muted-foreground"> <div className="text-sm text-muted-foreground">
Uses OCR to extract text from images/PDFs. Ingest is slower when enabled Uses OCR to extract text from images/PDFs. Ingest is
slower when enabled
</div> </div>
</div> </div>
</div> </div>
<div className="flex items-center space-x-3"> <div className="flex items-center space-x-3">
<RadioGroupItem value="picture_description" id="picture_description" /> <RadioGroupItem
value="picture_description"
id="picture_description"
/>
<div className="flex-1"> <div className="flex-1">
<Label htmlFor="picture_description" className="text-base font-medium cursor-pointer"> <Label
htmlFor="picture_description"
className="text-base font-medium cursor-pointer"
>
Generate Description Generate Description
</Label> </Label>
<div className="text-sm text-muted-foreground"> <div className="text-sm text-muted-foreground">
@ -838,11 +880,15 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3"> <div className="flex items-center space-x-3">
<RadioGroupItem value="VLM" id="VLM" /> <RadioGroupItem value="VLM" id="VLM" />
<div className="flex-1"> <div className="flex-1">
<Label htmlFor="VLM" className="text-base font-medium cursor-pointer"> <Label
htmlFor="VLM"
className="text-base font-medium cursor-pointer"
>
AI Vision AI Vision
</Label> </Label>
<div className="text-sm text-muted-foreground"> <div className="text-sm text-muted-foreground">
Advanced processing with vision language models. Highest quality but most expensive Advanced processing with vision language models. Highest
quality but most expensive
</div> </div>
</div> </div>
</div> </div>

View file

@ -0,0 +1,49 @@
interface AnimatedProcessingIconProps {
className?: string;
size?: number;
}
export const AnimatedProcessingIcon = ({
className = "",
size = 10,
}: AnimatedProcessingIconProps) => {
const width = Math.round((size * 6) / 10);
const height = size;
return (
<svg
width={width}
height={height}
viewBox="0 0 6 10"
fill="none"
xmlns="http://www.w3.org/2000/svg"
className={className}
>
<style>
{`
.dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; }
.dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; }
.dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; }
.dot-4 { animation: pulse-wave 1.5s infinite; animation-delay: 0.3s; }
.dot-5 { animation: pulse-wave 1.5s infinite; animation-delay: 0.4s; }
@keyframes pulse-wave {
0%, 60%, 100% {
opacity: 0.25;
transform: scale(1);
}
30% {
opacity: 1;
transform: scale(1.2);
}
}
`}
</style>
<circle className="dot-1" cx="1" cy="5" r="1" fill="currentColor" />
<circle className="dot-2" cx="1" cy="9" r="1" fill="currentColor" />
<circle className="dot-3" cx="5" cy="1" r="1" fill="currentColor" />
<circle className="dot-4" cx="5" cy="5" r="1" fill="currentColor" />
<circle className="dot-5" cx="5" cy="9" r="1" fill="currentColor" />
</svg>
);
};

View file

@ -0,0 +1,58 @@
import { AnimatedProcessingIcon } from "./animated-processing-icon";
export type Status =
| "processing"
| "active"
| "unavailable"
| "hidden"
| "sync"
| "failed";
interface StatusBadgeProps {
status: Status;
className?: string;
}
const statusConfig = {
processing: {
label: "Processing",
className: "text-muted-foreground dark:text-muted-foreground ",
},
active: {
label: "Active",
className: "text-emerald-600 dark:text-emerald-400 ",
},
unavailable: {
label: "Unavailable",
className: "text-red-600 dark:text-red-400 ",
},
failed: {
label: "Failed",
className: "text-red-600 dark:text-red-400 ",
},
hidden: {
label: "Hidden",
className: "text-zinc-400 dark:text-zinc-500 ",
},
sync: {
label: "Sync",
className: "text-amber-700 dark:text-amber-300 underline",
},
};
export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
const config = statusConfig[status];
return (
<div
className={`inline-flex items-center gap-1 ${config.className} ${
className || ""
}`}
>
{status === "processing" && (
<AnimatedProcessingIcon className="text-current mr-2" size={10} />
)}
{config.label}
</div>
);
};

View file

@ -35,9 +35,22 @@ export interface Task {
files?: Record<string, Record<string, unknown>>; files?: Record<string, Record<string, unknown>>;
} }
export interface TaskFile {
filename: string;
mimetype: string;
source_url: string;
size: number;
connector_type: string;
status: "active" | "failed" | "processing";
task_id: string;
created_at: string;
updated_at: string;
}
interface TaskContextType { interface TaskContextType {
tasks: Task[]; tasks: Task[];
files: TaskFile[];
addTask: (taskId: string) => void; addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void; removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>; refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>; cancelTask: (taskId: string) => Promise<void>;
@ -51,6 +64,7 @@ const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) { export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]); const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false); const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false); const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false);
@ -58,12 +72,32 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const queryClient = useQueryClient(); const queryClient = useQueryClient();
const refetchSearch = () => { const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({ queryClient.invalidateQueries({
queryKey: ["search"], queryKey: ["search"],
exact: false, exact: false,
}); });
}; }, [queryClient]);
const addFiles = useCallback(
(newFiles: Partial<TaskFile>[], taskId: string) => {
const now = new Date().toISOString();
const filesToAdd: TaskFile[] = newFiles.map((file) => ({
filename: file.filename || "",
mimetype: file.mimetype || "",
source_url: file.source_url || "",
size: file.size || 0,
connector_type: file.connector_type || "local",
status: "processing",
task_id: taskId,
created_at: now,
updated_at: now,
}));
setFiles((prevFiles) => [...prevFiles, ...filesToAdd]);
},
[],
);
const fetchTasks = useCallback(async () => { const fetchTasks = useCallback(async () => {
if (!isAuthenticated && !isNoAuthMode) return; if (!isAuthenticated && !isNoAuthMode) return;
@ -76,13 +110,87 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const newTasks = data.tasks || []; const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update // Update tasks and check for status changes in the same state update
setTasks(prevTasks => { setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts // Check for newly completed tasks to show toasts
if (prevTasks.length > 0) { if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => { newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find( const oldTask = prevTasks.find(
t => t.task_id === newTask.task_id (t) => t.task_id === newTask.task_id,
); );
// Update or add files from task.files if available
if (newTask.files && typeof newTask.files === "object") {
const taskFileEntries = Object.entries(newTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
}
setFiles((prevFiles) => {
const existingFileIndex = prevFiles.findIndex(
(f) =>
f.source_url === filePath &&
f.task_id === newTask.task_id,
);
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: newTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
}
if ( if (
oldTask && oldTask &&
oldTask.status !== "completed" && oldTask.status !== "completed" &&
@ -99,9 +207,14 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
refetchSearch(); refetchSearch();
// Dispatch knowledge updated event for all knowledge-related pages // Dispatch knowledge updated event for all knowledge-related pages
console.log( console.log(
"Task completed successfully, dispatching knowledgeUpdated event" "Task completed successfully, dispatching knowledgeUpdated event",
); );
window.dispatchEvent(new CustomEvent("knowledgeUpdated")); window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id),
);
} else if ( } else if (
oldTask && oldTask &&
oldTask.status !== "failed" && oldTask.status !== "failed" &&
@ -114,6 +227,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
newTask.error || "Unknown error" newTask.error || "Unknown error"
}`, }`,
}); });
// Files will be updated to failed status by the file parsing logic above
} }
}); });
} }
@ -126,7 +241,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
} finally { } finally {
setIsFetching(false); setIsFetching(false);
} }
}, [isAuthenticated, isNoAuthMode]); // Removed 'tasks' from dependencies to prevent infinite loop! }, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => { const addTask = useCallback((taskId: string) => {
// Immediately start aggressive polling for the new task // Immediately start aggressive polling for the new task
@ -140,19 +255,21 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const data = await response.json(); const data = await response.json();
const newTasks = data.tasks || []; const newTasks = data.tasks || [];
const foundTask = newTasks.find( const foundTask = newTasks.find(
(task: Task) => task.task_id === taskId (task: Task) => task.task_id === taskId,
); );
if (foundTask) { if (foundTask) {
// Task found! Update the tasks state // Task found! Update the tasks state
setTasks(prevTasks => { setTasks((prevTasks) => {
// Check if task is already in the list // Check if task is already in the list
const exists = prevTasks.some(t => t.task_id === taskId); const exists = prevTasks.some((t) => t.task_id === taskId);
if (!exists) { if (!exists) {
return [...prevTasks, foundTask]; return [...prevTasks, foundTask];
} }
// Update existing task // Update existing task
return prevTasks.map(t => (t.task_id === taskId ? foundTask : t)); return prevTasks.map((t) =>
t.task_id === taskId ? foundTask : t,
);
}); });
return; // Stop polling, we found it return; // Stop polling, we found it
} }
@ -177,7 +294,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
}, [fetchTasks]); }, [fetchTasks]);
const removeTask = useCallback((taskId: string) => { const removeTask = useCallback((taskId: string) => {
setTasks(prev => prev.filter(task => task.task_id !== taskId)); setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []); }, []);
const cancelTask = useCallback( const cancelTask = useCallback(
@ -204,11 +321,11 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
}); });
} }
}, },
[fetchTasks] [fetchTasks],
); );
const toggleMenu = useCallback(() => { const toggleMenu = useCallback(() => {
setIsMenuOpen(prev => !prev); setIsMenuOpen((prev) => !prev);
}, []); }, []);
// Periodic polling for task updates // Periodic polling for task updates
@ -231,7 +348,9 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const value: TaskContextType = { const value: TaskContextType = {
tasks, tasks,
files,
addTask, addTask,
addFiles,
removeTask, removeTask,
refreshTasks, refreshTasks,
cancelTask, cancelTask,

View file

@ -106,7 +106,6 @@ async def async_response_stream(
model: str, model: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
log_prefix: str = "response", log_prefix: str = "response",
): ):
logger.info("User prompt received", prompt=prompt) logger.info("User prompt received", prompt=prompt)
@ -121,8 +120,6 @@ async def async_response_stream(
} }
if previous_response_id is not None: if previous_response_id is not None:
request_params["previous_response_id"] = previous_response_id request_params["previous_response_id"] = previous_response_id
if tweaks:
request_params["tweaks"] = tweaks
if "x-api-key" not in client.default_headers: if "x-api-key" not in client.default_headers:
if hasattr(client, "api_key") and extra_headers is not None: if hasattr(client, "api_key") and extra_headers is not None:
@ -199,7 +196,6 @@ async def async_response(
model: str, model: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
log_prefix: str = "response", log_prefix: str = "response",
): ):
try: try:
@ -214,8 +210,6 @@ async def async_response(
} }
if previous_response_id is not None: if previous_response_id is not None:
request_params["previous_response_id"] = previous_response_id request_params["previous_response_id"] = previous_response_id
if tweaks:
request_params["tweaks"] = tweaks
if extra_headers: if extra_headers:
request_params["extra_headers"] = extra_headers request_params["extra_headers"] = extra_headers
@ -249,7 +243,6 @@ async def async_stream(
model: str, model: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
log_prefix: str = "response", log_prefix: str = "response",
): ):
async for chunk in async_response_stream( async for chunk in async_response_stream(
@ -258,7 +251,6 @@ async def async_stream(
model, model,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix=log_prefix, log_prefix=log_prefix,
): ):
yield chunk yield chunk
@ -271,7 +263,6 @@ async def async_langflow(
prompt: str, prompt: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
): ):
response_text, response_id, response_obj = await async_response( response_text, response_id, response_obj = await async_response(
langflow_client, langflow_client,
@ -279,7 +270,6 @@ async def async_langflow(
flow_id, flow_id,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow", log_prefix="langflow",
) )
return response_text, response_id return response_text, response_id
@ -292,7 +282,6 @@ async def async_langflow_stream(
prompt: str, prompt: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
): ):
logger.debug("Starting langflow stream", prompt=prompt) logger.debug("Starting langflow stream", prompt=prompt)
try: try:
@ -302,8 +291,7 @@ async def async_langflow_stream(
flow_id, flow_id,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=tweaks, log_prefix="langflow",
log_prefix="langflow",
): ):
logger.debug( logger.debug(
"Yielding chunk from langflow stream", "Yielding chunk from langflow stream",
@ -463,7 +451,6 @@ async def async_langflow_chat(
user_id: str, user_id: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
store_conversation: bool = True, store_conversation: bool = True,
): ):
logger.debug( logger.debug(
@ -497,7 +484,6 @@ async def async_langflow_chat(
flow_id, flow_id,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow", log_prefix="langflow",
) )
logger.debug( logger.debug(
@ -576,7 +562,6 @@ async def async_langflow_chat_stream(
user_id: str, user_id: str,
extra_headers: dict = None, extra_headers: dict = None,
previous_response_id: str = None, previous_response_id: str = None,
tweaks: dict = None,
): ):
logger.debug( logger.debug(
"async_langflow_chat_stream called", "async_langflow_chat_stream called",
@ -603,7 +588,6 @@ async def async_langflow_chat_stream(
flow_id, flow_id,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=tweaks,
log_prefix="langflow", log_prefix="langflow",
): ):
# Extract text content to build full response for history # Extract text content to build full response for history

View file

@ -17,14 +17,18 @@ async def get_openai_models(request, models_service, session_manager):
try: try:
config = get_openrag_config() config = get_openrag_config()
api_key = config.provider.api_key api_key = config.provider.api_key
logger.info(f"Retrieved API key from config: {'yes' if api_key else 'no'}") logger.info(
f"Retrieved API key from config: {'yes' if api_key else 'no'}"
)
except Exception as e: except Exception as e:
logger.error(f"Failed to get config: {e}") logger.error(f"Failed to get config: {e}")
if not api_key: if not api_key:
return JSONResponse( return JSONResponse(
{"error": "OpenAI API key is required either as query parameter or in configuration"}, {
status_code=400 "error": "OpenAI API key is required either as query parameter or in configuration"
},
status_code=400,
) )
models = await models_service.get_openai_models(api_key=api_key) models = await models_service.get_openai_models(api_key=api_key)
@ -32,8 +36,7 @@ async def get_openai_models(request, models_service, session_manager):
except Exception as e: except Exception as e:
logger.error(f"Failed to get OpenAI models: {str(e)}") logger.error(f"Failed to get OpenAI models: {str(e)}")
return JSONResponse( return JSONResponse(
{"error": f"Failed to retrieve OpenAI models: {str(e)}"}, {"error": f"Failed to retrieve OpenAI models: {str(e)}"}, status_code=500
status_code=500
) )
@ -44,13 +47,31 @@ async def get_ollama_models(request, models_service, session_manager):
query_params = dict(request.query_params) query_params = dict(request.query_params)
endpoint = query_params.get("endpoint") endpoint = query_params.get("endpoint")
# If no API key provided, try to get it from stored configuration
if not endpoint:
try:
config = get_openrag_config()
endpoint = config.provider.endpoint
logger.info(
f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not endpoint:
return JSONResponse(
{
"error": "Endpoint is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_ollama_models(endpoint=endpoint) models = await models_service.get_ollama_models(endpoint=endpoint)
return JSONResponse(models) return JSONResponse(models)
except Exception as e: except Exception as e:
logger.error(f"Failed to get Ollama models: {str(e)}") logger.error(f"Failed to get Ollama models: {str(e)}")
return JSONResponse( return JSONResponse(
{"error": f"Failed to retrieve Ollama models: {str(e)}"}, {"error": f"Failed to retrieve Ollama models: {str(e)}"}, status_code=500
status_code=500
) )
@ -63,15 +84,65 @@ async def get_ibm_models(request, models_service, session_manager):
api_key = query_params.get("api_key") api_key = query_params.get("api_key")
project_id = query_params.get("project_id") project_id = query_params.get("project_id")
config = get_openrag_config()
# If no API key provided, try to get it from stored configuration
if not api_key:
try:
api_key = config.provider.api_key
logger.info(
f"Retrieved API key from config: {'yes' if api_key else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not api_key:
return JSONResponse(
{
"error": "OpenAI API key is required either as query parameter or in configuration"
},
status_code=400,
)
if not endpoint:
try:
endpoint = config.provider.endpoint
logger.info(
f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not endpoint:
return JSONResponse(
{
"error": "Endpoint is required either as query parameter or in configuration"
},
status_code=400,
)
if not project_id:
try:
project_id = config.provider.project_id
logger.info(
f"Retrieved project ID from config: {'yes' if project_id else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not project_id:
return JSONResponse(
{
"error": "Project ID is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_ibm_models( models = await models_service.get_ibm_models(
endpoint=endpoint, endpoint=endpoint, api_key=api_key, project_id=project_id
api_key=api_key,
project_id=project_id
) )
return JSONResponse(models) return JSONResponse(models)
except Exception as e: except Exception as e:
logger.error(f"Failed to get IBM models: {str(e)}") logger.error(f"Failed to get IBM models: {str(e)}")
return JSONResponse( return JSONResponse(
{"error": f"Failed to retrieve IBM models: {str(e)}"}, {"error": f"Failed to retrieve IBM models: {str(e)}"}, status_code=500
status_code=500 )
)

View file

@ -7,6 +7,7 @@ from config.settings import (
LANGFLOW_CHAT_FLOW_ID, LANGFLOW_CHAT_FLOW_ID,
LANGFLOW_INGEST_FLOW_ID, LANGFLOW_INGEST_FLOW_ID,
LANGFLOW_PUBLIC_URL, LANGFLOW_PUBLIC_URL,
DOCLING_COMPONENT_ID,
clients, clients,
get_openrag_config, get_openrag_config,
config_manager, config_manager,
@ -46,22 +47,7 @@ def get_docling_preset_configs():
} }
def get_docling_tweaks(docling_preset: str = None) -> dict:
"""Get Langflow tweaks for docling component based on preset"""
if not docling_preset:
# Get current preset from config
openrag_config = get_openrag_config()
docling_preset = openrag_config.knowledge.doclingPresets
preset_configs = get_docling_preset_configs()
if docling_preset not in preset_configs:
docling_preset = "standard" # fallback
preset_config = preset_configs[docling_preset]
docling_serve_opts = json.dumps(preset_config)
return {"DoclingRemote-ayRdw": {"docling_serve_opts": docling_serve_opts}}
async def get_settings(request, session_manager): async def get_settings(request, session_manager):
@ -234,6 +220,15 @@ async def update_settings(request, session_manager):
current_config.knowledge.doclingPresets = body["doclingPresets"] current_config.knowledge.doclingPresets = body["doclingPresets"]
config_updated = True config_updated = True
# Also update the flow with the new docling preset
try:
await _update_flow_docling_preset(body["doclingPresets"], preset_configs[body["doclingPresets"]])
logger.info(f"Successfully updated docling preset in flow to '{body['doclingPresets']}'")
except Exception as e:
logger.error(f"Failed to update docling preset in flow: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
if "chunk_size" in body: if "chunk_size" in body:
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0: if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
return JSONResponse( return JSONResponse(
@ -527,3 +522,93 @@ async def onboarding(request, flows_service):
{"error": f"Failed to update onboarding settings: {str(e)}"}, {"error": f"Failed to update onboarding settings: {str(e)}"},
status_code=500, status_code=500,
) )
async def _update_flow_docling_preset(preset: str, preset_config: dict):
"""Helper function to update docling preset in the ingest flow"""
if not LANGFLOW_INGEST_FLOW_ID:
raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured")
# Get the current flow data from Langflow
response = await clients.langflow_request(
"GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}"
)
if response.status_code != 200:
raise Exception(f"Failed to get ingest flow: HTTP {response.status_code} - {response.text}")
flow_data = response.json()
# Find the target node in the flow using environment variable
nodes = flow_data.get("data", {}).get("nodes", [])
target_node = None
target_node_index = None
for i, node in enumerate(nodes):
if node.get("id") == DOCLING_COMPONENT_ID:
target_node = node
target_node_index = i
break
if target_node is None:
raise Exception(f"Docling component '{DOCLING_COMPONENT_ID}' not found in ingest flow")
# Update the docling_serve_opts value directly in the existing node
if (target_node.get("data", {}).get("node", {}).get("template", {}).get("docling_serve_opts")):
flow_data["data"]["nodes"][target_node_index]["data"]["node"]["template"]["docling_serve_opts"]["value"] = preset_config
else:
raise Exception(f"docling_serve_opts field not found in node '{DOCLING_COMPONENT_ID}'")
# Update the flow via PATCH request
patch_response = await clients.langflow_request(
"PATCH", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}", json=flow_data
)
if patch_response.status_code != 200:
raise Exception(f"Failed to update ingest flow: HTTP {patch_response.status_code} - {patch_response.text}")
async def update_docling_preset(request, session_manager):
"""Update docling preset in the ingest flow"""
try:
# Parse request body
body = await request.json()
# Validate preset parameter
if "preset" not in body:
return JSONResponse(
{"error": "preset parameter is required"},
status_code=400
)
preset = body["preset"]
preset_configs = get_docling_preset_configs()
if preset not in preset_configs:
valid_presets = list(preset_configs.keys())
return JSONResponse(
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"},
status_code=400
)
# Get the preset configuration
preset_config = preset_configs[preset]
# Use the helper function to update the flow
await _update_flow_docling_preset(preset, preset_config)
logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
return JSONResponse({
"message": f"Successfully updated docling preset to '{preset}'",
"preset": preset,
"preset_config": preset_config
})
except Exception as e:
logger.error("Failed to update docling preset", error=str(e))
return JSONResponse(
{"error": f"Failed to update docling preset: {str(e)}"},
status_code=500
)

View file

@ -544,6 +544,9 @@ OLLAMA_LLM_TEXT_COMPONENT_ID = os.getenv(
"OLLAMA_LLM_TEXT_COMPONENT_ID", "OllamaModel-XDGqZ" "OLLAMA_LLM_TEXT_COMPONENT_ID", "OllamaModel-XDGqZ"
) )
# Docling component ID for ingest flow
DOCLING_COMPONENT_ID = os.getenv("DOCLING_COMPONENT_ID", "DoclingRemote-78KoX")
# Global clients instance # Global clients instance
clients = AppClients() clients = AppClients()

View file

@ -971,12 +971,23 @@ async def create_app():
"/onboarding", "/onboarding",
require_auth(services["session_manager"])( require_auth(services["session_manager"])(
partial( partial(
settings.onboarding, settings.onboarding,
flows_service=services["flows_service"] flows_service=services["flows_service"]
) )
), ),
methods=["POST"], methods=["POST"],
), ),
# Docling preset update endpoint
Route(
"/settings/docling-preset",
require_auth(services["session_manager"])(
partial(
settings.update_docling_preset,
session_manager=services["session_manager"]
)
),
methods=["PATCH"],
),
Route( Route(
"/nudges", "/nudges",
require_auth(services["session_manager"])( require_auth(services["session_manager"])(

View file

@ -2,7 +2,6 @@ import json
from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID
from agent import async_chat, async_langflow, async_chat_stream from agent import async_chat, async_langflow, async_chat_stream
from auth_context import set_auth_context from auth_context import set_auth_context
from api.settings import get_docling_tweaks
from utils.logging_config import get_logger from utils.logging_config import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)
@ -127,8 +126,6 @@ class ChatService:
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY." "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
) )
# Get docling tweaks based on current configuration
docling_tweaks = get_docling_tweaks()
if stream: if stream:
from agent import async_langflow_chat_stream from agent import async_langflow_chat_stream
@ -140,7 +137,6 @@ class ChatService:
user_id, user_id,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=docling_tweaks,
) )
else: else:
from agent import async_langflow_chat from agent import async_langflow_chat
@ -152,7 +148,6 @@ class ChatService:
user_id, user_id,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=docling_tweaks,
) )
response_data = {"response": response_text} response_data = {"response": response_text}
if response_id: if response_id:
@ -202,8 +197,6 @@ class ChatService:
from agent import async_langflow_chat from agent import async_langflow_chat
# Get docling tweaks (might not be used by nudges flow, but keeping consistent)
docling_tweaks = get_docling_tweaks()
response_text, response_id = await async_langflow_chat( response_text, response_id = await async_langflow_chat(
langflow_client, langflow_client,
@ -211,7 +204,6 @@ class ChatService:
prompt, prompt,
user_id, user_id,
extra_headers=extra_headers, extra_headers=extra_headers,
tweaks=docling_tweaks,
store_conversation=False, store_conversation=False,
) )
response_data = {"response": response_text} response_data = {"response": response_text}
@ -242,8 +234,6 @@ class ChatService:
raise ValueError( raise ValueError(
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY." "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
) )
# Get docling tweaks based on current configuration
docling_tweaks = get_docling_tweaks()
response_text, response_id = await async_langflow( response_text, response_id = await async_langflow(
langflow_client=langflow_client, langflow_client=langflow_client,
@ -251,7 +241,6 @@ class ChatService:
prompt=document_prompt, prompt=document_prompt,
extra_headers=extra_headers, extra_headers=extra_headers,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
tweaks=docling_tweaks,
) )
else: # chat else: # chat
# Set auth context for chat tools and provide user_id # Set auth context for chat tools and provide user_id

View file

@ -17,7 +17,9 @@ class TaskService:
def __init__(self, document_service=None, process_pool=None): def __init__(self, document_service=None, process_pool=None):
self.document_service = document_service self.document_service = document_service
self.process_pool = process_pool self.process_pool = process_pool
self.task_store: dict[str, dict[str, UploadTask]] = {} # user_id -> {task_id -> UploadTask} self.task_store: dict[
str, dict[str, UploadTask]
] = {} # user_id -> {task_id -> UploadTask}
self.background_tasks = set() self.background_tasks = set()
if self.process_pool is None: if self.process_pool is None:
@ -122,18 +124,27 @@ class TaskService:
# Process files with limited concurrency to avoid overwhelming the system # Process files with limited concurrency to avoid overwhelming the system
max_workers = get_worker_count() max_workers = get_worker_count()
semaphore = asyncio.Semaphore(max_workers * 2) # Allow 2x process pool size for async I/O semaphore = asyncio.Semaphore(
max_workers * 2
) # Allow 2x process pool size for async I/O
async def process_with_semaphore(file_path: str): async def process_with_semaphore(file_path: str):
async with semaphore: async with semaphore:
await self.document_service.process_single_file_task(upload_task, file_path) await self.document_service.process_single_file_task(
upload_task, file_path
)
tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()] tasks = [
process_with_semaphore(file_path)
for file_path in upload_task.file_tasks.keys()
]
await asyncio.gather(*tasks, return_exceptions=True) await asyncio.gather(*tasks, return_exceptions=True)
except Exception as e: except Exception as e:
logger.error("Background upload processor failed", task_id=task_id, error=str(e)) logger.error(
"Background upload processor failed", task_id=task_id, error=str(e)
)
import traceback import traceback
traceback.print_exc() traceback.print_exc()
@ -141,7 +152,9 @@ class TaskService:
self.task_store[user_id][task_id].status = TaskStatus.FAILED self.task_store[user_id][task_id].status = TaskStatus.FAILED
self.task_store[user_id][task_id].updated_at = time.time() self.task_store[user_id][task_id].updated_at = time.time()
async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None: async def background_custom_processor(
self, user_id: str, task_id: str, items: list
) -> None:
"""Background task to process items using custom processor""" """Background task to process items using custom processor"""
try: try:
upload_task = self.task_store[user_id][task_id] upload_task = self.task_store[user_id][task_id]
@ -163,7 +176,9 @@ class TaskService:
try: try:
await processor.process_item(upload_task, item, file_task) await processor.process_item(upload_task, item, file_task)
except Exception as e: except Exception as e:
logger.error("Failed to process item", item=str(item), error=str(e)) logger.error(
"Failed to process item", item=str(item), error=str(e)
)
import traceback import traceback
traceback.print_exc() traceback.print_exc()
@ -190,7 +205,9 @@ class TaskService:
pass pass
raise # Re-raise to properly handle cancellation raise # Re-raise to properly handle cancellation
except Exception as e: except Exception as e:
logger.error("Background custom processor failed", task_id=task_id, error=str(e)) logger.error(
"Background custom processor failed", task_id=task_id, error=str(e)
)
import traceback import traceback
traceback.print_exc() traceback.print_exc()
@ -212,7 +229,10 @@ class TaskService:
upload_task = None upload_task = None
for candidate_user_id in candidate_user_ids: for candidate_user_id in candidate_user_ids:
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]: if (
candidate_user_id in self.task_store
and task_id in self.task_store[candidate_user_id]
):
upload_task = self.task_store[candidate_user_id][task_id] upload_task = self.task_store[candidate_user_id][task_id]
break break
@ -271,10 +291,23 @@ class TaskService:
if task_id in tasks_by_id: if task_id in tasks_by_id:
continue continue
# Calculate running and pending counts # Calculate running and pending counts and build file statuses
running_files_count = 0 running_files_count = 0
pending_files_count = 0 pending_files_count = 0
for file_task in upload_task.file_tasks.values(): file_statuses = {}
for file_path, file_task in upload_task.file_tasks.items():
if file_task.status.value != "completed":
file_statuses[file_path] = {
"status": file_task.status.value,
"result": file_task.result,
"error": file_task.error,
"retry_count": file_task.retry_count,
"created_at": file_task.created_at,
"updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds,
}
if file_task.status.value == "running": if file_task.status.value == "running":
running_files_count += 1 running_files_count += 1
elif file_task.status.value == "pending": elif file_task.status.value == "pending":
@ -292,6 +325,7 @@ class TaskService:
"created_at": upload_task.created_at, "created_at": upload_task.created_at,
"updated_at": upload_task.updated_at, "updated_at": upload_task.updated_at,
"duration_seconds": upload_task.duration_seconds, "duration_seconds": upload_task.duration_seconds,
"files": file_statuses,
} }
# First, add user-owned tasks; then shared anonymous; # First, add user-owned tasks; then shared anonymous;
@ -312,7 +346,10 @@ class TaskService:
store_user_id = None store_user_id = None
for candidate_user_id in candidate_user_ids: for candidate_user_id in candidate_user_ids:
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]: if (
candidate_user_id in self.task_store
and task_id in self.task_store[candidate_user_id]
):
store_user_id = candidate_user_id store_user_id = candidate_user_id
break break
@ -326,7 +363,10 @@ class TaskService:
return False return False
# Cancel the background task to stop scheduling new work # Cancel the background task to stop scheduling new work
if hasattr(upload_task, "background_task") and not upload_task.background_task.done(): if (
hasattr(upload_task, "background_task")
and not upload_task.background_task.done()
):
upload_task.background_task.cancel() upload_task.background_task.cancel()
# Mark task as failed (cancelled) # Mark task as failed (cancelled)