From 7dfe90c8a6cfb9236dc66bdd908af1345ef21050 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Thu, 2 Oct 2025 15:49:21 -0300
Subject: [PATCH 01/39] fixed ibm logo
---
frontend/components/logo/ibm-logo.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/frontend/components/logo/ibm-logo.tsx b/frontend/components/logo/ibm-logo.tsx
index 158ffa3b..e37adec1 100644
--- a/frontend/components/logo/ibm-logo.tsx
+++ b/frontend/components/logo/ibm-logo.tsx
@@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps) {
{...props}
>
IBM watsonx.ai Logo
-
+
Date: Thu, 2 Oct 2025 16:50:34 -0300
Subject: [PATCH 02/39] implement tasks fetching and cancelling on useQuery
---
.../api/mutations/useCancelTaskMutation.ts | 47 +++++++++++
.../app/api/queries/useGetTaskStatusQuery.ts | 80 +++++++++++++++++++
.../src/app/api/queries/useGetTasksQuery.ts | 79 ++++++++++++++++++
3 files changed, 206 insertions(+)
create mode 100644 frontend/src/app/api/mutations/useCancelTaskMutation.ts
create mode 100644 frontend/src/app/api/queries/useGetTaskStatusQuery.ts
create mode 100644 frontend/src/app/api/queries/useGetTasksQuery.ts
diff --git a/frontend/src/app/api/mutations/useCancelTaskMutation.ts b/frontend/src/app/api/mutations/useCancelTaskMutation.ts
new file mode 100644
index 00000000..1bf2faed
--- /dev/null
+++ b/frontend/src/app/api/mutations/useCancelTaskMutation.ts
@@ -0,0 +1,47 @@
+import {
+ type UseMutationOptions,
+ useMutation,
+ useQueryClient,
+} from "@tanstack/react-query";
+
+export interface CancelTaskRequest {
+ taskId: string;
+}
+
+export interface CancelTaskResponse {
+ status: string;
+ task_id: string;
+}
+
+export const useCancelTaskMutation = (
+ options?: Omit<
+ UseMutationOptions,
+ "mutationFn"
+ >
+) => {
+ const queryClient = useQueryClient();
+
+ async function cancelTask(
+ variables: CancelTaskRequest,
+ ): Promise {
+ const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, {
+ method: "POST",
+ });
+
+ if (!response.ok) {
+ const errorData = await response.json().catch(() => ({}));
+ throw new Error(errorData.error || "Failed to cancel task");
+ }
+
+ return response.json();
+ }
+
+ return useMutation({
+ mutationFn: cancelTask,
+ onSuccess: () => {
+ // Invalidate tasks query to refresh the list
+ queryClient.invalidateQueries({ queryKey: ["tasks"] });
+ },
+ ...options,
+ });
+};
diff --git a/frontend/src/app/api/queries/useGetTaskStatusQuery.ts b/frontend/src/app/api/queries/useGetTaskStatusQuery.ts
new file mode 100644
index 00000000..17cd2d16
--- /dev/null
+++ b/frontend/src/app/api/queries/useGetTaskStatusQuery.ts
@@ -0,0 +1,80 @@
+import {
+ type UseQueryOptions,
+ useQuery,
+ useQueryClient,
+} from "@tanstack/react-query";
+
+export interface TaskStatus {
+ task_id: string;
+ status:
+ | "pending"
+ | "running"
+ | "processing"
+ | "completed"
+ | "failed"
+ | "error";
+ total_files?: number;
+ processed_files?: number;
+ successful_files?: number;
+ failed_files?: number;
+ running_files?: number;
+ pending_files?: number;
+ created_at: string;
+ updated_at: string;
+ duration_seconds?: number;
+ result?: Record;
+ error?: string;
+ files?: Record>;
+}
+
+export const useGetTaskStatusQuery = (
+ taskId: string,
+ options?: Omit, "queryKey" | "queryFn">
+) => {
+ const queryClient = useQueryClient();
+
+ async function getTaskStatus(): Promise {
+ if (!taskId) {
+ return null;
+ }
+
+ const response = await fetch(`/api/tasks/${taskId}`);
+
+ if (!response.ok) {
+ if (response.status === 404) {
+ return null; // Task not found
+ }
+ throw new Error("Failed to fetch task status");
+ }
+
+ return response.json();
+ }
+
+ const queryResult = useQuery(
+ {
+ queryKey: ["task-status", taskId],
+ queryFn: getTaskStatus,
+ refetchInterval: (data) => {
+ // Only poll if the task is still active
+ if (!data) {
+ return false; // Stop polling if no data
+ }
+
+ const isActive =
+ data.status === "pending" ||
+ data.status === "running" ||
+ data.status === "processing";
+
+ return isActive ? 3000 : false; // Poll every 3 seconds if active
+ },
+ refetchIntervalInBackground: true,
+ staleTime: 0, // Always consider data stale to ensure fresh updates
+ gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
+ enabled: !!taskId, // Only run if taskId is provided
+ ...options,
+ },
+ queryClient,
+ );
+
+ return queryResult;
+};
diff --git a/frontend/src/app/api/queries/useGetTasksQuery.ts b/frontend/src/app/api/queries/useGetTasksQuery.ts
new file mode 100644
index 00000000..1ea59d26
--- /dev/null
+++ b/frontend/src/app/api/queries/useGetTasksQuery.ts
@@ -0,0 +1,79 @@
+import {
+ type UseQueryOptions,
+ useQuery,
+ useQueryClient,
+} from "@tanstack/react-query";
+
+export interface Task {
+ task_id: string;
+ status:
+ | "pending"
+ | "running"
+ | "processing"
+ | "completed"
+ | "failed"
+ | "error";
+ total_files?: number;
+ processed_files?: number;
+ successful_files?: number;
+ failed_files?: number;
+ running_files?: number;
+ pending_files?: number;
+ created_at: string;
+ updated_at: string;
+ duration_seconds?: number;
+ result?: Record;
+ error?: string;
+ files?: Record>;
+}
+
+export interface TasksResponse {
+ tasks: Task[];
+}
+
+export const useGetTasksQuery = (
+ options?: Omit, "queryKey" | "queryFn">
+) => {
+ const queryClient = useQueryClient();
+
+ async function getTasks(): Promise {
+ const response = await fetch("/api/tasks");
+
+ if (!response.ok) {
+ throw new Error("Failed to fetch tasks");
+ }
+
+ const data: TasksResponse = await response.json();
+ return data.tasks || [];
+ }
+
+ const queryResult = useQuery(
+ {
+ queryKey: ["tasks"],
+ queryFn: getTasks,
+ refetchInterval: (query) => {
+ // Only poll if there are tasks with pending or running status
+ const data = query.state.data;
+ if (!data || data.length === 0) {
+ return false; // Stop polling if no tasks
+ }
+
+ const hasActiveTasks = data.some(
+ (task: Task) =>
+ task.status === "pending" ||
+ task.status === "running" ||
+ task.status === "processing"
+ );
+
+ return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist
+ },
+ refetchIntervalInBackground: true,
+ staleTime: 0, // Always consider data stale to ensure fresh updates
+ gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
+ ...options,
+ },
+ queryClient,
+ );
+
+ return queryResult;
+};
From aa61ba265c3a9744873c817c75779a15910fe9e7 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Thu, 2 Oct 2025 16:50:47 -0300
Subject: [PATCH 03/39] made files and toast appear just once, use new queries
---
frontend/components/knowledge-dropdown.tsx | 1117 ++++++++++----------
frontend/src/contexts/task-context.tsx | 427 ++++----
2 files changed, 733 insertions(+), 811 deletions(-)
diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx
index ee49fc3a..9b71ee81 100644
--- a/frontend/components/knowledge-dropdown.tsx
+++ b/frontend/components/knowledge-dropdown.tsx
@@ -1,24 +1,25 @@
"use client";
import {
- ChevronDown,
- Cloud,
- FolderOpen,
- Loader2,
- PlugZap,
- Plus,
- Upload,
+ ChevronDown,
+ Cloud,
+ FolderOpen,
+ Loader2,
+ PlugZap,
+ Plus,
+ Upload,
} from "lucide-react";
import { useRouter } from "next/navigation";
import { useEffect, useRef, useState } from "react";
import { toast } from "sonner";
+import { useGetTasksQuery } from "@/app/api/queries/useGetTasksQuery";
import { Button } from "@/components/ui/button";
import {
- Dialog,
- DialogContent,
- DialogDescription,
- DialogHeader,
- DialogTitle,
+ Dialog,
+ DialogContent,
+ DialogDescription,
+ DialogHeader,
+ DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
@@ -26,600 +27,590 @@ import { useTask } from "@/contexts/task-context";
import { cn } from "@/lib/utils";
interface KnowledgeDropdownProps {
- active?: boolean;
- variant?: "navigation" | "button";
+ active?: boolean;
+ variant?: "navigation" | "button";
}
export function KnowledgeDropdown({
- active,
- variant = "navigation",
+ active,
+ variant = "navigation",
}: KnowledgeDropdownProps) {
- const { addTask } = useTask();
- const router = useRouter();
- const [isOpen, setIsOpen] = useState(false);
- const [showFolderDialog, setShowFolderDialog] = useState(false);
- const [showS3Dialog, setShowS3Dialog] = useState(false);
- const [awsEnabled, setAwsEnabled] = useState(false);
- const [folderPath, setFolderPath] = useState("/app/documents/");
- const [bucketUrl, setBucketUrl] = useState("s3://");
- const [folderLoading, setFolderLoading] = useState(false);
- const [s3Loading, setS3Loading] = useState(false);
- const [fileUploading, setFileUploading] = useState(false);
- const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false);
- const [cloudConnectors, setCloudConnectors] = useState<{
- [key: string]: {
- name: string;
- available: boolean;
- connected: boolean;
- hasToken: boolean;
- };
- }>({});
- const fileInputRef = useRef(null);
- const dropdownRef = useRef(null);
+ const { addTask } = useTask();
+ const { refetch: refetchTasks } = useGetTasksQuery();
+ const router = useRouter();
+ const [isOpen, setIsOpen] = useState(false);
+ const [showFolderDialog, setShowFolderDialog] = useState(false);
+ const [showS3Dialog, setShowS3Dialog] = useState(false);
+ const [awsEnabled, setAwsEnabled] = useState(false);
+ const [folderPath, setFolderPath] = useState("/app/documents/");
+ const [bucketUrl, setBucketUrl] = useState("s3://");
+ const [folderLoading, setFolderLoading] = useState(false);
+ const [s3Loading, setS3Loading] = useState(false);
+ const [fileUploading, setFileUploading] = useState(false);
+ const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false);
+ const [cloudConnectors, setCloudConnectors] = useState<{
+ [key: string]: {
+ name: string;
+ available: boolean;
+ connected: boolean;
+ hasToken: boolean;
+ };
+ }>({});
+ const fileInputRef = useRef(null);
+ const dropdownRef = useRef(null);
- // Check AWS availability and cloud connectors on mount
- useEffect(() => {
- const checkAvailability = async () => {
- try {
- // Check AWS
- const awsRes = await fetch("/api/upload_options");
- if (awsRes.ok) {
- const awsData = await awsRes.json();
- setAwsEnabled(Boolean(awsData.aws));
- }
+ // Check AWS availability and cloud connectors on mount
+ useEffect(() => {
+ const checkAvailability = async () => {
+ try {
+ // Check AWS
+ const awsRes = await fetch("/api/upload_options");
+ if (awsRes.ok) {
+ const awsData = await awsRes.json();
+ setAwsEnabled(Boolean(awsData.aws));
+ }
- // Check cloud connectors
- const connectorsRes = await fetch("/api/connectors");
- if (connectorsRes.ok) {
- const connectorsResult = await connectorsRes.json();
- const cloudConnectorTypes = [
- "google_drive",
- "onedrive",
- "sharepoint",
- ];
- const connectorInfo: {
- [key: string]: {
- name: string;
- available: boolean;
- connected: boolean;
- hasToken: boolean;
- };
- } = {};
+ // Check cloud connectors
+ const connectorsRes = await fetch("/api/connectors");
+ if (connectorsRes.ok) {
+ const connectorsResult = await connectorsRes.json();
+ const cloudConnectorTypes = [
+ "google_drive",
+ "onedrive",
+ "sharepoint",
+ ];
+ const connectorInfo: {
+ [key: string]: {
+ name: string;
+ available: boolean;
+ connected: boolean;
+ hasToken: boolean;
+ };
+ } = {};
- for (const type of cloudConnectorTypes) {
- if (connectorsResult.connectors[type]) {
- connectorInfo[type] = {
- name: connectorsResult.connectors[type].name,
- available: connectorsResult.connectors[type].available,
- connected: false,
- hasToken: false,
- };
+ for (const type of cloudConnectorTypes) {
+ if (connectorsResult.connectors[type]) {
+ connectorInfo[type] = {
+ name: connectorsResult.connectors[type].name,
+ available: connectorsResult.connectors[type].available,
+ connected: false,
+ hasToken: false,
+ };
- // Check connection status
- try {
- const statusRes = await fetch(`/api/connectors/${type}/status`);
- if (statusRes.ok) {
- const statusData = await statusRes.json();
- const connections = statusData.connections || [];
- const activeConnection = connections.find(
- (conn: { is_active: boolean; connection_id: string }) =>
- conn.is_active
- );
- const isConnected = activeConnection !== undefined;
+ // Check connection status
+ try {
+ const statusRes = await fetch(`/api/connectors/${type}/status`);
+ if (statusRes.ok) {
+ const statusData = await statusRes.json();
+ const connections = statusData.connections || [];
+ const activeConnection = connections.find(
+ (conn: { is_active: boolean; connection_id: string }) =>
+ conn.is_active,
+ );
+ const isConnected = activeConnection !== undefined;
- if (isConnected && activeConnection) {
- connectorInfo[type].connected = true;
+ if (isConnected && activeConnection) {
+ connectorInfo[type].connected = true;
- // Check token availability
- try {
- const tokenRes = await fetch(
- `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}`
- );
- if (tokenRes.ok) {
- const tokenData = await tokenRes.json();
- if (tokenData.access_token) {
- connectorInfo[type].hasToken = true;
- }
- }
- } catch {
- // Token check failed
- }
- }
- }
- } catch {
- // Status check failed
- }
- }
- }
+ // Check token availability
+ try {
+ const tokenRes = await fetch(
+ `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}`,
+ );
+ if (tokenRes.ok) {
+ const tokenData = await tokenRes.json();
+ if (tokenData.access_token) {
+ connectorInfo[type].hasToken = true;
+ }
+ }
+ } catch {
+ // Token check failed
+ }
+ }
+ }
+ } catch {
+ // Status check failed
+ }
+ }
+ }
- setCloudConnectors(connectorInfo);
- }
- } catch (err) {
- console.error("Failed to check availability", err);
- }
- };
- checkAvailability();
- }, []);
+ setCloudConnectors(connectorInfo);
+ }
+ } catch (err) {
+ console.error("Failed to check availability", err);
+ }
+ };
+ checkAvailability();
+ }, []);
- // Handle click outside to close dropdown
- useEffect(() => {
- const handleClickOutside = (event: MouseEvent) => {
- if (
- dropdownRef.current &&
- !dropdownRef.current.contains(event.target as Node)
- ) {
- setIsOpen(false);
- }
- };
+ // Handle click outside to close dropdown
+ useEffect(() => {
+ const handleClickOutside = (event: MouseEvent) => {
+ if (
+ dropdownRef.current &&
+ !dropdownRef.current.contains(event.target as Node)
+ ) {
+ setIsOpen(false);
+ }
+ };
- if (isOpen) {
- document.addEventListener("mousedown", handleClickOutside);
- return () =>
- document.removeEventListener("mousedown", handleClickOutside);
- }
- }, [isOpen]);
+ if (isOpen) {
+ document.addEventListener("mousedown", handleClickOutside);
+ return () =>
+ document.removeEventListener("mousedown", handleClickOutside);
+ }
+ }, [isOpen]);
- const handleFileUpload = () => {
- fileInputRef.current?.click();
- };
+ const handleFileUpload = () => {
+ fileInputRef.current?.click();
+ };
- const handleFileChange = async (e: React.ChangeEvent) => {
- const files = e.target.files;
- if (files && files.length > 0) {
- // Close dropdown and disable button immediately after file selection
- setIsOpen(false);
- setFileUploading(true);
+ const handleFileChange = async (e: React.ChangeEvent) => {
+ const files = e.target.files;
+ if (files && files.length > 0) {
+ // Close dropdown and disable button immediately after file selection
+ setIsOpen(false);
+ setFileUploading(true);
- // Trigger the same file upload event as the chat page
- window.dispatchEvent(
- new CustomEvent("fileUploadStart", {
- detail: { filename: files[0].name },
- })
- );
+ // Trigger the same file upload event as the chat page
+ window.dispatchEvent(
+ new CustomEvent("fileUploadStart", {
+ detail: { filename: files[0].name },
+ }),
+ );
- try {
- const formData = new FormData();
- formData.append("file", files[0]);
+ try {
+ const formData = new FormData();
+ formData.append("file", files[0]);
- // Use router upload and ingest endpoint (automatically routes based on configuration)
- const uploadIngestRes = await fetch("/api/router/upload_ingest", {
- method: "POST",
- body: formData,
- });
+ // Use router upload and ingest endpoint (automatically routes based on configuration)
+ const uploadIngestRes = await fetch("/api/router/upload_ingest", {
+ method: "POST",
+ body: formData,
+ });
- const uploadIngestJson = await uploadIngestRes.json();
+ const uploadIngestJson = await uploadIngestRes.json();
- if (!uploadIngestRes.ok) {
- throw new Error(
- uploadIngestJson?.error || "Upload and ingest failed"
- );
- }
+ if (!uploadIngestRes.ok) {
+ throw new Error(
+ uploadIngestJson?.error || "Upload and ingest failed",
+ );
+ }
- // Extract results from the response - handle both unified and simple formats
- const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id;
- const filePath =
- uploadIngestJson?.upload?.path ||
- uploadIngestJson?.path ||
- "uploaded";
- const runJson = uploadIngestJson?.ingestion;
- const deleteResult = uploadIngestJson?.deletion;
+ // Extract results from the response - handle both unified and simple formats
+ const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id || uploadIngestJson?.task_id;
+ const filePath =
+ uploadIngestJson?.upload?.path ||
+ uploadIngestJson?.path ||
+ "uploaded";
+ const runJson = uploadIngestJson?.ingestion;
+ const deleteResult = uploadIngestJson?.deletion;
+ console.log("c", uploadIngestJson )
+ if (!fileId) {
+ throw new Error("Upload successful but no file id returned");
+ }
+ // Check if ingestion actually succeeded
+ if (
+ runJson &&
+ runJson.status !== "COMPLETED" &&
+ runJson.status !== "SUCCESS"
+ ) {
+ const errorMsg = runJson.error || "Ingestion pipeline failed";
+ throw new Error(
+ `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`,
+ );
+ }
+ // Log deletion status if provided
+ if (deleteResult) {
+ if (deleteResult.status === "deleted") {
+ console.log(
+ "File successfully cleaned up from Langflow:",
+ deleteResult.file_id,
+ );
+ } else if (deleteResult.status === "delete_failed") {
+ console.warn(
+ "Failed to cleanup file from Langflow:",
+ deleteResult.error,
+ );
+ }
+ }
+ // Notify UI
+ window.dispatchEvent(
+ new CustomEvent("fileUploaded", {
+ detail: {
+ file: files[0],
+ result: {
+ file_id: fileId,
+ file_path: filePath,
+ run: runJson,
+ deletion: deleteResult,
+ unified: true,
+ },
+ },
+ }),
+ );
- if (!fileId) {
- throw new Error("Upload successful but no file id returned");
- }
+ refetchTasks();
+ } catch (error) {
+ window.dispatchEvent(
+ new CustomEvent("fileUploadError", {
+ detail: {
+ filename: files[0].name,
+ error: error instanceof Error ? error.message : "Upload failed",
+ },
+ }),
+ );
+ } finally {
+ window.dispatchEvent(new CustomEvent("fileUploadComplete"));
+ setFileUploading(false);
+ }
+ }
- // Check if ingestion actually succeeded
- if (
- runJson &&
- runJson.status !== "COMPLETED" &&
- runJson.status !== "SUCCESS"
- ) {
- const errorMsg = runJson.error || "Ingestion pipeline failed";
- throw new Error(
- `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`
- );
- }
+ // Reset file input
+ if (fileInputRef.current) {
+ fileInputRef.current.value = "";
+ }
+ };
- // Log deletion status if provided
- if (deleteResult) {
- if (deleteResult.status === "deleted") {
- console.log(
- "File successfully cleaned up from Langflow:",
- deleteResult.file_id
- );
- } else if (deleteResult.status === "delete_failed") {
- console.warn(
- "Failed to cleanup file from Langflow:",
- deleteResult.error
- );
- }
- }
+ const handleFolderUpload = async () => {
+ if (!folderPath.trim()) return;
- // Notify UI
- window.dispatchEvent(
- new CustomEvent("fileUploaded", {
- detail: {
- file: files[0],
- result: {
- file_id: fileId,
- file_path: filePath,
- run: runJson,
- deletion: deleteResult,
- unified: true,
- },
- },
- })
- );
+ setFolderLoading(true);
+ setShowFolderDialog(false);
- // Trigger search refresh after successful ingestion
- window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
- } catch (error) {
- window.dispatchEvent(
- new CustomEvent("fileUploadError", {
- detail: {
- filename: files[0].name,
- error: error instanceof Error ? error.message : "Upload failed",
- },
- })
- );
- } finally {
- window.dispatchEvent(new CustomEvent("fileUploadComplete"));
- setFileUploading(false);
- // Don't call refetchSearch() here - the knowledgeUpdated event will handle it
- }
- }
+ try {
+ const response = await fetch("/api/upload_path", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify({ path: folderPath }),
+ });
- // Reset file input
- if (fileInputRef.current) {
- fileInputRef.current.value = "";
- }
- };
+ const result = await response.json();
- const handleFolderUpload = async () => {
- if (!folderPath.trim()) return;
+ if (response.status === 201) {
+ const taskId = result.task_id || result.id;
- setFolderLoading(true);
- setShowFolderDialog(false);
+ if (!taskId) {
+ throw new Error("No task ID received from server");
+ }
- try {
- const response = await fetch("/api/upload_path", {
- method: "POST",
- headers: {
- "Content-Type": "application/json",
- },
- body: JSON.stringify({ path: folderPath }),
- });
+ addTask(taskId);
+ setFolderPath("");
+ // Refetch tasks to show the new task
+ refetchTasks();
+ } else if (response.ok) {
+ setFolderPath("");
+ // Refetch tasks even for direct uploads in case tasks were created
+ refetchTasks();
+ } else {
+ console.error("Folder upload failed:", result.error);
+ if (response.status === 400) {
+ toast.error("Upload failed", {
+ description: result.error || "Bad request",
+ });
+ }
+ }
+ } catch (error) {
+ console.error("Folder upload error:", error);
+ } finally {
+ setFolderLoading(false);
+ }
+ };
- const result = await response.json();
+ const handleS3Upload = async () => {
+ if (!bucketUrl.trim()) return;
- if (response.status === 201) {
- const taskId = result.task_id || result.id;
+ setS3Loading(true);
+ setShowS3Dialog(false);
- if (!taskId) {
- throw new Error("No task ID received from server");
- }
+ try {
+ const response = await fetch("/api/upload_bucket", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify({ s3_url: bucketUrl }),
+ });
- addTask(taskId);
- setFolderPath("");
- // Trigger search refresh after successful folder processing starts
- console.log(
- "Folder upload successful, dispatching knowledgeUpdated event"
- );
- window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
- } else if (response.ok) {
- setFolderPath("");
- console.log(
- "Folder upload successful (direct), dispatching knowledgeUpdated event"
- );
- window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
- } else {
- console.error("Folder upload failed:", result.error);
- if (response.status === 400) {
- toast.error("Upload failed", {
- description: result.error || "Bad request",
- });
- }
- }
- } catch (error) {
- console.error("Folder upload error:", error);
- } finally {
- setFolderLoading(false);
- // Don't call refetchSearch() here - the knowledgeUpdated event will handle it
- }
- };
+ const result = await response.json();
- const handleS3Upload = async () => {
- if (!bucketUrl.trim()) return;
+ if (response.status === 201) {
+ const taskId = result.task_id || result.id;
- setS3Loading(true);
- setShowS3Dialog(false);
+ if (!taskId) {
+ throw new Error("No task ID received from server");
+ }
- try {
- const response = await fetch("/api/upload_bucket", {
- method: "POST",
- headers: {
- "Content-Type": "application/json",
- },
- body: JSON.stringify({ s3_url: bucketUrl }),
- });
+ addTask(taskId);
+ setBucketUrl("s3://");
+ // Refetch tasks to show the new task
+ refetchTasks();
+ } else {
+ console.error("S3 upload failed:", result.error);
+ if (response.status === 400) {
+ toast.error("Upload failed", {
+ description: result.error || "Bad request",
+ });
+ }
+ }
+ } catch (error) {
+ console.error("S3 upload error:", error);
+ } finally {
+ setS3Loading(false);
+ }
+ };
- const result = await response.json();
+ const cloudConnectorItems = Object.entries(cloudConnectors)
+ .filter(([, info]) => info.available)
+ .map(([type, info]) => ({
+ label: info.name,
+ icon: PlugZap,
+ onClick: async () => {
+ setIsOpen(false);
+ if (info.connected && info.hasToken) {
+ setIsNavigatingToCloud(true);
+ try {
+ router.push(`/upload/${type}`);
+ // Keep loading state for a short time to show feedback
+ setTimeout(() => setIsNavigatingToCloud(false), 1000);
+ } catch {
+ setIsNavigatingToCloud(false);
+ }
+ } else {
+ router.push("/settings");
+ }
+ },
+ disabled: !info.connected || !info.hasToken,
+ tooltip: !info.connected
+ ? `Connect ${info.name} in Settings first`
+ : !info.hasToken
+ ? `Reconnect ${info.name} - access token required`
+ : undefined,
+ }));
- if (response.status === 201) {
- const taskId = result.task_id || result.id;
+ const menuItems = [
+ {
+ label: "Add File",
+ icon: Upload,
+ onClick: handleFileUpload,
+ },
+ {
+ label: "Process Folder",
+ icon: FolderOpen,
+ onClick: () => {
+ setIsOpen(false);
+ setShowFolderDialog(true);
+ },
+ },
+ ...(awsEnabled
+ ? [
+ {
+ label: "Process S3 Bucket",
+ icon: Cloud,
+ onClick: () => {
+ setIsOpen(false);
+ setShowS3Dialog(true);
+ },
+ },
+ ]
+ : []),
+ ...cloudConnectorItems,
+ ];
- if (!taskId) {
- throw new Error("No task ID received from server");
- }
+ // Comprehensive loading state
+ const isLoading =
+ fileUploading || folderLoading || s3Loading || isNavigatingToCloud;
- addTask(taskId);
- setBucketUrl("s3://");
- // Trigger search refresh after successful S3 processing starts
- console.log("S3 upload successful, dispatching knowledgeUpdated event");
- window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
- } else {
- console.error("S3 upload failed:", result.error);
- if (response.status === 400) {
- toast.error("Upload failed", {
- description: result.error || "Bad request",
- });
- }
- }
- } catch (error) {
- console.error("S3 upload error:", error);
- } finally {
- setS3Loading(false);
- // Don't call refetchSearch() here - the knowledgeUpdated event will handle it
- }
- };
+ return (
+ <>
+
+
!isLoading && setIsOpen(!isOpen)}
+ disabled={isLoading}
+ className={cn(
+ variant === "button"
+ ? "rounded-lg h-12 px-4 flex items-center gap-2 bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+ : "text-sm group flex p-3 w-full justify-start font-medium cursor-pointer hover:bg-accent hover:text-accent-foreground rounded-lg transition-all disabled:opacity-50 disabled:cursor-not-allowed",
+ variant === "navigation" && active
+ ? "bg-accent text-accent-foreground shadow-sm"
+ : variant === "navigation"
+ ? "text-foreground hover:text-accent-foreground"
+ : "",
+ )}
+ >
+ {variant === "button" ? (
+ <>
+ {isLoading ? (
+
+ ) : (
+
+ )}
+
+ {isLoading
+ ? fileUploading
+ ? "Uploading..."
+ : folderLoading
+ ? "Processing Folder..."
+ : s3Loading
+ ? "Processing S3..."
+ : isNavigatingToCloud
+ ? "Loading..."
+ : "Processing..."
+ : "Add Knowledge"}
+
+ {!isLoading && (
+
+ )}
+ >
+ ) : (
+ <>
+
+ {isLoading ? (
+
+ ) : (
+
+ )}
+ Knowledge
+
+ {!isLoading && (
+
+ )}
+ >
+ )}
+
- const cloudConnectorItems = Object.entries(cloudConnectors)
- .filter(([, info]) => info.available)
- .map(([type, info]) => ({
- label: info.name,
- icon: PlugZap,
- onClick: async () => {
- setIsOpen(false);
- if (info.connected && info.hasToken) {
- setIsNavigatingToCloud(true);
- try {
- router.push(`/upload/${type}`);
- // Keep loading state for a short time to show feedback
- setTimeout(() => setIsNavigatingToCloud(false), 1000);
- } catch {
- setIsNavigatingToCloud(false);
- }
- } else {
- router.push("/settings");
- }
- },
- disabled: !info.connected || !info.hasToken,
- tooltip: !info.connected
- ? `Connect ${info.name} in Settings first`
- : !info.hasToken
- ? `Reconnect ${info.name} - access token required`
- : undefined,
- }));
+ {isOpen && !isLoading && (
+
+
+ {menuItems.map((item, index) => (
+
+ {item.label}
+
+ ))}
+
+
+ )}
- const menuItems = [
- {
- label: "Add File",
- icon: Upload,
- onClick: handleFileUpload,
- },
- {
- label: "Process Folder",
- icon: FolderOpen,
- onClick: () => {
- setIsOpen(false);
- setShowFolderDialog(true);
- },
- },
- ...(awsEnabled
- ? [
- {
- label: "Process S3 Bucket",
- icon: Cloud,
- onClick: () => {
- setIsOpen(false);
- setShowS3Dialog(true);
- },
- },
- ]
- : []),
- ...cloudConnectorItems,
- ];
+
+
- // Comprehensive loading state
- const isLoading =
- fileUploading || folderLoading || s3Loading || isNavigatingToCloud;
+ {/* Process Folder Dialog */}
+
+
+
+
+
+ Process Folder
+
+
+ Process all documents in a folder path
+
+
+
+
+ Folder Path
+ setFolderPath(e.target.value)}
+ />
+
+
+ setShowFolderDialog(false)}
+ >
+ Cancel
+
+
+ {folderLoading ? "Processing..." : "Process Folder"}
+
+
+
+
+
- return (
- <>
-
-
!isLoading && setIsOpen(!isOpen)}
- disabled={isLoading}
- className={cn(
- variant === "button"
- ? "rounded-lg h-12 px-4 flex items-center gap-2 bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
- : "text-sm group flex p-3 w-full justify-start font-medium cursor-pointer hover:bg-accent hover:text-accent-foreground rounded-lg transition-all disabled:opacity-50 disabled:cursor-not-allowed",
- variant === "navigation" && active
- ? "bg-accent text-accent-foreground shadow-sm"
- : variant === "navigation"
- ? "text-foreground hover:text-accent-foreground"
- : ""
- )}
- >
- {variant === "button" ? (
- <>
- {isLoading ? (
-
- ) : (
-
- )}
-
- {isLoading
- ? fileUploading
- ? "Uploading..."
- : folderLoading
- ? "Processing Folder..."
- : s3Loading
- ? "Processing S3..."
- : isNavigatingToCloud
- ? "Loading..."
- : "Processing..."
- : "Add Knowledge"}
-
- {!isLoading && (
-
- )}
- >
- ) : (
- <>
-
- {isLoading ? (
-
- ) : (
-
- )}
- Knowledge
-
- {!isLoading && (
-
- )}
- >
- )}
-
-
- {isOpen && !isLoading && (
-
-
- {menuItems.map((item, index) => (
-
- {item.label}
-
- ))}
-
-
- )}
-
-
-
-
- {/* Process Folder Dialog */}
-
-
-
-
-
- Process Folder
-
-
- Process all documents in a folder path
-
-
-
-
- Folder Path
- setFolderPath(e.target.value)}
- />
-
-
- setShowFolderDialog(false)}
- >
- Cancel
-
-
- {folderLoading ? "Processing..." : "Process Folder"}
-
-
-
-
-
-
- {/* Process S3 Bucket Dialog */}
-
-
-
-
-
- Process S3 Bucket
-
-
- Process all documents from an S3 bucket. AWS credentials must be
- configured.
-
-
-
-
- S3 URL
- setBucketUrl(e.target.value)}
- />
-
-
- setShowS3Dialog(false)}>
- Cancel
-
-
- {s3Loading ? "Processing..." : "Process Bucket"}
-
-
-
-
-
- >
- );
+ {/* Process S3 Bucket Dialog */}
+
+
+
+
+
+ Process S3 Bucket
+
+
+ Process all documents from an S3 bucket. AWS credentials must be
+ configured.
+
+
+
+
+ S3 URL
+ setBucketUrl(e.target.value)}
+ />
+
+
+ setShowS3Dialog(false)}>
+ Cancel
+
+
+ {s3Loading ? "Processing..." : "Process Bucket"}
+
+
+
+
+
+ >
+ );
}
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index 5eb10ea9..b3275422 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -7,33 +7,18 @@ import {
useCallback,
useContext,
useEffect,
+ useRef,
useState,
} from "react";
import { toast } from "sonner";
+import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation";
+import {
+ type Task,
+ useGetTasksQuery,
+} from "@/app/api/queries/useGetTasksQuery";
import { useAuth } from "@/contexts/auth-context";
-export interface Task {
- task_id: string;
- status:
- | "pending"
- | "running"
- | "processing"
- | "completed"
- | "failed"
- | "error";
- total_files?: number;
- processed_files?: number;
- successful_files?: number;
- failed_files?: number;
- running_files?: number;
- pending_files?: number;
- created_at: string;
- updated_at: string;
- duration_seconds?: number;
- result?: Record;
- error?: string;
- files?: Record>;
-}
+// Task interface is now imported from useGetTasksQuery
export interface TaskFile {
filename: string;
@@ -58,20 +43,45 @@ interface TaskContextType {
isFetching: boolean;
isMenuOpen: boolean;
toggleMenu: () => void;
+ // React Query states
+ isLoading: boolean;
+ error: Error | null;
}
const TaskContext = createContext(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) {
- const [tasks, setTasks] = useState([]);
const [files, setFiles] = useState([]);
- const [isPolling, setIsPolling] = useState(false);
- const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false);
+ const previousTasksRef = useRef([]);
const { isAuthenticated, isNoAuthMode } = useAuth();
const queryClient = useQueryClient();
+ // Use React Query hooks
+ const {
+ data: tasks = [],
+ isLoading,
+ error,
+ refetch: refetchTasks,
+ isFetching,
+ } = useGetTasksQuery({
+ enabled: isAuthenticated || isNoAuthMode,
+ });
+
+ const cancelTaskMutation = useCancelTaskMutation({
+ onSuccess: () => {
+ toast.success("Task cancelled", {
+ description: "Task has been cancelled successfully",
+ });
+ },
+ onError: (error) => {
+ toast.error("Failed to cancel task", {
+ description: error.message,
+ });
+ },
+ });
+
const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({
queryKey: ["search"],
@@ -99,252 +109,171 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
[],
);
- const fetchTasks = useCallback(async () => {
- if (!isAuthenticated && !isNoAuthMode) return;
-
- setIsFetching(true);
- try {
- const response = await fetch("/api/tasks");
- if (response.ok) {
- const data = await response.json();
- const newTasks = data.tasks || [];
-
- // Update tasks and check for status changes in the same state update
- setTasks((prevTasks) => {
- // Check for newly completed tasks to show toasts
- if (prevTasks.length > 0) {
- newTasks.forEach((newTask: Task) => {
- const oldTask = prevTasks.find(
- (t) => t.task_id === newTask.task_id,
- );
-
- // Update or add files from task.files if available
- if (newTask.files && typeof newTask.files === "object") {
- const taskFileEntries = Object.entries(newTask.files);
- const now = new Date().toISOString();
-
- taskFileEntries.forEach(([filePath, fileInfo]) => {
- if (typeof fileInfo === "object" && fileInfo) {
- const fileName = filePath.split("/").pop() || filePath;
- const fileStatus = fileInfo.status as string;
-
- // Map backend file status to our TaskFile status
- let mappedStatus: TaskFile["status"];
- switch (fileStatus) {
- case "pending":
- case "running":
- mappedStatus = "processing";
- break;
- case "completed":
- mappedStatus = "active";
- break;
- case "failed":
- mappedStatus = "failed";
- break;
- default:
- mappedStatus = "processing";
- }
-
- setFiles((prevFiles) => {
- const existingFileIndex = prevFiles.findIndex(
- (f) =>
- f.source_url === filePath &&
- f.task_id === newTask.task_id,
- );
-
- // Detect connector type based on file path or other indicators
- let connectorType = "local";
- if (filePath.includes("/") && !filePath.startsWith("/")) {
- // Likely S3 key format (bucket/path/file.ext)
- connectorType = "s3";
- }
-
- const fileEntry: TaskFile = {
- filename: fileName,
- mimetype: "", // We don't have this info from the task
- source_url: filePath,
- size: 0, // We don't have this info from the task
- connector_type: connectorType,
- status: mappedStatus,
- task_id: newTask.task_id,
- created_at:
- typeof fileInfo.created_at === "string"
- ? fileInfo.created_at
- : now,
- updated_at:
- typeof fileInfo.updated_at === "string"
- ? fileInfo.updated_at
- : now,
- };
-
- if (existingFileIndex >= 0) {
- // Update existing file
- const updatedFiles = [...prevFiles];
- updatedFiles[existingFileIndex] = fileEntry;
- return updatedFiles;
- } else {
- // Add new file
- return [...prevFiles, fileEntry];
- }
- });
- }
- });
- }
-
- if (
- oldTask &&
- oldTask.status !== "completed" &&
- newTask.status === "completed"
- ) {
- // Task just completed - show success toast
- toast.success("Task completed successfully", {
- description: `Task ${newTask.task_id} has finished processing.`,
- action: {
- label: "View",
- onClick: () => console.log("View task", newTask.task_id),
- },
- });
- refetchSearch();
- // Dispatch knowledge updated event for all knowledge-related pages
- console.log(
- "Task completed successfully, dispatching knowledgeUpdated event",
- );
- window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
-
- // Remove files for this completed task from the files list
- setFiles((prevFiles) =>
- prevFiles.filter((file) => file.task_id !== newTask.task_id),
- );
- } else if (
- oldTask &&
- oldTask.status !== "failed" &&
- oldTask.status !== "error" &&
- (newTask.status === "failed" || newTask.status === "error")
- ) {
- // Task just failed - show error toast
- toast.error("Task failed", {
- description: `Task ${newTask.task_id} failed: ${
- newTask.error || "Unknown error"
- }`,
- });
-
- // Files will be updated to failed status by the file parsing logic above
- }
- });
- }
-
- return newTasks;
- });
- }
- } catch (error) {
- console.error("Failed to fetch tasks:", error);
- } finally {
- setIsFetching(false);
+ // Handle task status changes and file updates
+ useEffect(() => {
+ if (tasks.length === 0) {
+ // Store current tasks as previous for next comparison
+ previousTasksRef.current = tasks;
+ return;
}
- }, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
+ console.log(tasks, previousTasksRef.current);
- const addTask = useCallback((taskId: string) => {
- // Immediately start aggressive polling for the new task
- let pollAttempts = 0;
- const maxPollAttempts = 30; // Poll for up to 30 seconds
+ // Check for task status changes by comparing with previous tasks
+ tasks.forEach((currentTask) => {
+ const previousTask = previousTasksRef.current.find(
+ (prev) => prev.task_id === currentTask.task_id,
+ );
- const aggressivePoll = async () => {
- try {
- const response = await fetch("/api/tasks");
- if (response.ok) {
- const data = await response.json();
- const newTasks = data.tasks || [];
- const foundTask = newTasks.find(
- (task: Task) => task.task_id === taskId,
- );
+ // Only show toasts if we have previous data and status has changed
+ if (((previousTask && previousTask.status !== currentTask.status) || (!previousTask && previousTasksRef.current.length !== 0))) {
+ console.log("task status changed", currentTask.status);
+ // Process files from failed task and add them to files list
+ if (currentTask.files && typeof currentTask.files === "object") {
+ console.log("processing files", currentTask.files);
+ const taskFileEntries = Object.entries(currentTask.files);
+ const now = new Date().toISOString();
- if (foundTask) {
- // Task found! Update the tasks state
- setTasks((prevTasks) => {
- // Check if task is already in the list
- const exists = prevTasks.some((t) => t.task_id === taskId);
- if (!exists) {
- return [...prevTasks, foundTask];
+ taskFileEntries.forEach(([filePath, fileInfo]) => {
+ if (typeof fileInfo === "object" && fileInfo) {
+ const fileName = filePath.split("/").pop() || filePath;
+ const fileStatus = fileInfo.status as string;
+
+ // Map backend file status to our TaskFile status
+ let mappedStatus: TaskFile["status"];
+ switch (fileStatus) {
+ case "pending":
+ case "running":
+ mappedStatus = "processing";
+ break;
+ case "completed":
+ mappedStatus = "active";
+ break;
+ case "failed":
+ mappedStatus = "failed";
+ break;
+ default:
+ mappedStatus = "processing";
}
- // Update existing task
- return prevTasks.map((t) =>
- t.task_id === taskId ? foundTask : t,
- );
- });
- return; // Stop polling, we found it
- }
+
+ setFiles((prevFiles) => {
+ const existingFileIndex = prevFiles.findIndex(
+ (f) =>
+ f.source_url === filePath &&
+ f.task_id === currentTask.task_id,
+ );
+
+ // Detect connector type based on file path or other indicators
+ let connectorType = "local";
+ if (filePath.includes("/") && !filePath.startsWith("/")) {
+ // Likely S3 key format (bucket/path/file.ext)
+ connectorType = "s3";
+ }
+
+ const fileEntry: TaskFile = {
+ filename: fileName,
+ mimetype: "", // We don't have this info from the task
+ source_url: filePath,
+ size: 0, // We don't have this info from the task
+ connector_type: connectorType,
+ status: mappedStatus,
+ task_id: currentTask.task_id,
+ created_at:
+ typeof fileInfo.created_at === "string"
+ ? fileInfo.created_at
+ : now,
+ updated_at:
+ typeof fileInfo.updated_at === "string"
+ ? fileInfo.updated_at
+ : now,
+ };
+
+ if (existingFileIndex >= 0) {
+ // Update existing file
+ const updatedFiles = [...prevFiles];
+ updatedFiles[existingFileIndex] = fileEntry;
+ return updatedFiles;
+ } else {
+ // Add new file
+ return [...prevFiles, fileEntry];
+ }
+ });
+ }
+ });
+ }
+ if (
+ previousTask && previousTask.status !== "completed" &&
+ currentTask.status === "completed"
+ ) {
+ // Task just completed - show success toast
+ toast.success("Task completed successfully", {
+ description: `Task ${currentTask.task_id} has finished processing.`,
+ action: {
+ label: "View",
+ onClick: () => console.log("View task", currentTask.task_id),
+ },
+ });
+ refetchSearch();
+ // Remove files for this completed task from the files list
+ // setFiles((prevFiles) =>
+ // prevFiles.filter((file) => file.task_id !== currentTask.task_id),
+ // );
+ } else if (
+ previousTask && previousTask.status !== "failed" &&
+ previousTask.status !== "error" &&
+ (currentTask.status === "failed" || currentTask.status === "error")
+ ) {
+ // Task just failed - show error toast
+ toast.error("Task failed", {
+ description: `Task ${currentTask.task_id} failed: ${
+ currentTask.error || "Unknown error"
+ }`,
+ });
}
- } catch (error) {
- console.error("Aggressive polling failed:", error);
}
+ });
- pollAttempts++;
- if (pollAttempts < maxPollAttempts) {
- // Continue polling every 1 second for new tasks
- setTimeout(aggressivePoll, 1000);
- }
- };
+ // Store current tasks as previous for next comparison
+ previousTasksRef.current = tasks;
+ }, [tasks, refetchSearch]);
- // Start aggressive polling after a short delay to allow backend to process
- setTimeout(aggressivePoll, 500);
- }, []);
+ const addTask = useCallback(
+ (_taskId: string) => {
+ // React Query will automatically handle polling when tasks are active
+ // Just trigger a refetch to get the latest data
+ refetchTasks();
+ },
+ [refetchTasks],
+ );
const refreshTasks = useCallback(async () => {
- await fetchTasks();
- }, [fetchTasks]);
+ await refetchTasks();
+ }, [refetchTasks]);
- const removeTask = useCallback((taskId: string) => {
- setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
+ const removeTask = useCallback((_taskId: string) => {
+ // This is now handled by React Query automatically
+ // Tasks will be removed from the list when they're no longer returned by the API
}, []);
const cancelTask = useCallback(
async (taskId: string) => {
- try {
- const response = await fetch(`/api/tasks/${taskId}/cancel`, {
- method: "POST",
- });
-
- if (response.ok) {
- // Immediately refresh tasks to show the updated status
- await fetchTasks();
- toast.success("Task cancelled", {
- description: `Task ${taskId.substring(0, 8)}... has been cancelled`,
- });
- } else {
- const errorData = await response.json().catch(() => ({}));
- throw new Error(errorData.error || "Failed to cancel task");
- }
- } catch (error) {
- console.error("Failed to cancel task:", error);
- toast.error("Failed to cancel task", {
- description: error instanceof Error ? error.message : "Unknown error",
- });
- }
+ cancelTaskMutation.mutate({ taskId });
},
- [fetchTasks],
+ [cancelTaskMutation],
);
const toggleMenu = useCallback(() => {
setIsMenuOpen((prev) => !prev);
}, []);
- // Periodic polling for task updates
- useEffect(() => {
- if (!isAuthenticated && !isNoAuthMode) return;
-
- setIsPolling(true);
-
- // Initial fetch
- fetchTasks();
-
- // Set up polling interval - every 3 seconds (more responsive for active tasks)
- const interval = setInterval(fetchTasks, 3000);
-
- return () => {
- clearInterval(interval);
- setIsPolling(false);
- };
- }, [isAuthenticated, isNoAuthMode, fetchTasks]);
+ // Determine if we're polling based on React Query's refetch interval
+ const isPolling =
+ isFetching &&
+ tasks.some(
+ (task) =>
+ task.status === "pending" ||
+ task.status === "running" ||
+ task.status === "processing",
+ );
const value: TaskContextType = {
tasks,
@@ -358,6 +287,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
isFetching,
isMenuOpen,
toggleMenu,
+ isLoading,
+ error,
};
return {children} ;
From bf04d7ea600cc554a8b2151ce7e8848e824355e4 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Thu, 2 Oct 2025 14:51:33 -0600
Subject: [PATCH 04/39] [design sweep]: update no auth cloud connector warning
message
---
frontend/src/app/settings/page.tsx | 46 +++++++++++++++++++++++-------
1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index 6017ab5b..aca2d293 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -628,25 +628,49 @@ function KnowledgeSourcesPage() {
{/* Conditional Sync Settings or No-Auth Message */}
{
- isNoAuthMode ? (
-
+ true ? (
+
-
- Cloud connectors are only available with auth mode enabled
+
+ Cloud connectors require authentication
- Please provide the following environment variables and
- restart:
+ Add the Google OAuth variables below to your .env{" "}
+ then restart the OpenRAG containers.
-
- # make here
- https://console.cloud.google.com/apis/credentials
+
+
+
+ 18
+
+ # Google OAuth
+
+
+
+ 19
+
+ # Create credentials here:
+
+
+
+ 20
+
+
+ # https://console.cloud.google.com/apis/credentials
+
+
+
+
+ 21
+ GOOGLE_OAUTH_CLIENT_ID=
+
+
+ 22
+ GOOGLE_OAUTH_CLIENT_SECRET=
-
GOOGLE_OAUTH_CLIENT_ID=
-
GOOGLE_OAUTH_CLIENT_SECRET=
From 892dcb3d74561671924c452d8439b8e89699e6df Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Thu, 2 Oct 2025 14:53:00 -0600
Subject: [PATCH 05/39] revert
---
frontend/src/app/settings/page.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index aca2d293..71c0c8e4 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -628,7 +628,7 @@ function KnowledgeSourcesPage() {
{/* Conditional Sync Settings or No-Auth Message */}
{
- true ? (
+ isNoAuthMode ? (
From db715c122bf42dfd9a049607eda247fd5f18ff25 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Thu, 2 Oct 2025 18:00:09 -0300
Subject: [PATCH 06/39] made failed and processing files not accessible and
deletable
---
frontend/src/app/knowledge/page.tsx | 604 ++++++++++++++--------------
1 file changed, 307 insertions(+), 297 deletions(-)
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index 1b8b60ef..41485289 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -17,270 +17,280 @@ import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
+import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
-import { filterAccentClasses } from "@/components/knowledge-filter-panel";
// Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) {
- switch (connectorType) {
- case "google_drive":
- return (
-
- );
- case "onedrive":
- return (
-
- );
- case "sharepoint":
- return ;
- case "s3":
- return ;
- default:
- return (
-
- );
- }
+ switch (connectorType) {
+ case "google_drive":
+ return (
+
+ );
+ case "onedrive":
+ return (
+
+ );
+ case "sharepoint":
+ return ;
+ case "s3":
+ return ;
+ default:
+ return (
+
+ );
+ }
}
function SearchPage() {
- const router = useRouter();
- const { isMenuOpen, files: taskFiles } = useTask();
- const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
- useKnowledgeFilter();
- const [selectedRows, setSelectedRows] = useState([]);
- const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
+ const router = useRouter();
+ const { isMenuOpen, files: taskFiles } = useTask();
+ const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
+ useKnowledgeFilter();
+ const [selectedRows, setSelectedRows] = useState([]);
+ const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
- const deleteDocumentMutation = useDeleteDocument();
+ const deleteDocumentMutation = useDeleteDocument();
- const { data = [], isFetching } = useGetSearchQuery(
- parsedFilterData?.query || "*",
- parsedFilterData
- );
+ const { data = [], isFetching } = useGetSearchQuery(
+ parsedFilterData?.query || "*",
+ parsedFilterData,
+ );
- const handleTableSearch = (e: ChangeEvent) => {
- gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
- };
+ const handleTableSearch = (e: ChangeEvent) => {
+ gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
+ };
- // Convert TaskFiles to File format and merge with backend results
- const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
- return {
- filename: taskFile.filename,
- mimetype: taskFile.mimetype,
- source_url: taskFile.source_url,
- size: taskFile.size,
- connector_type: taskFile.connector_type,
- status: taskFile.status,
- };
- });
+ // Convert TaskFiles to File format and merge with backend results
+ const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
+ return {
+ filename: taskFile.filename,
+ mimetype: taskFile.mimetype,
+ source_url: taskFile.source_url,
+ size: taskFile.size,
+ connector_type: taskFile.connector_type,
+ status: taskFile.status,
+ };
+ });
- const backendFiles = data as File[];
+ const backendFiles = data as File[];
- const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
- return (
- taskFile.status !== "active" &&
- !backendFiles.some(
- (backendFile) => backendFile.filename === taskFile.filename
- )
- );
- });
+ const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
+ return (
+ taskFile.status !== "active" &&
+ !backendFiles.some(
+ (backendFile) => backendFile.filename === taskFile.filename,
+ )
+ );
+ });
- // Combine task files first, then backend files
- const fileResults = [...backendFiles, ...filteredTaskFiles];
+ // Combine task files first, then backend files
+ const fileResults = [...backendFiles, ...filteredTaskFiles];
- const gridRef = useRef(null);
+ const gridRef = useRef(null);
- const [columnDefs] = useState[]>([
- {
- field: "filename",
- headerName: "Source",
- checkboxSelection: true,
- headerCheckboxSelection: true,
- initialFlex: 2,
- minWidth: 220,
- cellRenderer: ({ data, value }: CustomCellRendererProps) => {
- return (
- {
- router.push(
- `/knowledge/chunks?filename=${encodeURIComponent(
- data?.filename ?? ""
- )}`
- );
- }}
- >
- {getSourceIcon(data?.connector_type)}
-
- {value}
-
-
- );
- },
- },
- {
- field: "size",
- headerName: "Size",
- valueFormatter: (params) =>
- params.value ? `${Math.round(params.value / 1024)} KB` : "-",
- },
- {
- field: "mimetype",
- headerName: "Type",
- },
- {
- field: "owner",
- headerName: "Owner",
- valueFormatter: (params) =>
- params.data?.owner_name || params.data?.owner_email || "—",
- },
- {
- field: "chunkCount",
- headerName: "Chunks",
- valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
- },
- {
- field: "avgScore",
- headerName: "Avg score",
- initialFlex: 0.5,
- cellRenderer: ({ value }: CustomCellRendererProps) => {
- return (
-
- {value?.toFixed(2) ?? "-"}
-
- );
- },
- },
- {
- field: "status",
- headerName: "Status",
- cellRenderer: ({ data }: CustomCellRendererProps) => {
- // Default to 'active' status if no status is provided
- const status = data?.status || "active";
- return ;
- },
- },
- {
- cellRenderer: ({ data }: CustomCellRendererProps) => {
- return ;
- },
- cellStyle: {
- alignItems: "center",
- display: "flex",
- justifyContent: "center",
- padding: 0,
- },
- colId: "actions",
- filter: false,
- minWidth: 0,
- width: 40,
- resizable: false,
- sortable: false,
- initialFlex: 0,
- },
- ]);
+ const [columnDefs] = useState[]>([
+ {
+ field: "filename",
+ headerName: "Source",
+ checkboxSelection: (data) => (data?.data?.status || "active") === "active",
+ headerCheckboxSelection: true,
+ initialFlex: 2,
+ minWidth: 220,
+ cellRenderer: ({ data, value }: CustomCellRendererProps) => {
+ return (
+ {((data?.status || "active") !== "active") &&
+
+ }
+
{
+ if ((data?.status || "active") !== "active") {
+ return;
+ }
+ router.push(
+ `/knowledge/chunks?filename=${encodeURIComponent(
+ data?.filename ?? "",
+ )}`,
+ );
+ }}
+ >
+ {getSourceIcon(data?.connector_type)}
+
+ {value}
+
+
+ );
+ },
+ },
+ {
+ field: "size",
+ headerName: "Size",
+ valueFormatter: (params) =>
+ params.value ? `${Math.round(params.value / 1024)} KB` : "-",
+ },
+ {
+ field: "mimetype",
+ headerName: "Type",
+ },
+ {
+ field: "owner",
+ headerName: "Owner",
+ valueFormatter: (params) =>
+ params.data?.owner_name || params.data?.owner_email || "—",
+ },
+ {
+ field: "chunkCount",
+ headerName: "Chunks",
+ valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
+ },
+ {
+ field: "avgScore",
+ headerName: "Avg score",
+ initialFlex: 0.5,
+ cellRenderer: ({ value }: CustomCellRendererProps) => {
+ return (
+
+ {value?.toFixed(2) ?? "-"}
+
+ );
+ },
+ },
+ {
+ field: "status",
+ headerName: "Status",
+ cellRenderer: ({ data }: CustomCellRendererProps) => {
+ // Default to 'active' status if no status is provided
+ const status = data?.status || "active";
+ return ;
+ },
+ },
+ {
+ cellRenderer: ({ data }: CustomCellRendererProps) => {
+ const status = data?.status || "active";
+ if (status !== "active") {
+ return null;
+ }
+ return ;
+ },
+ cellStyle: {
+ alignItems: "center",
+ display: "flex",
+ justifyContent: "center",
+ padding: 0,
+ },
+ colId: "actions",
+ filter: false,
+ minWidth: 0,
+ width: 40,
+ resizable: false,
+ sortable: false,
+ initialFlex: 0,
+ },
+ ]);
- const defaultColDef: ColDef = {
- resizable: false,
- suppressMovable: true,
- initialFlex: 1,
- minWidth: 100,
- };
+ const defaultColDef: ColDef = {
+ resizable: false,
+ suppressMovable: true,
+ initialFlex: 1,
+ minWidth: 100,
+ };
- const onSelectionChanged = useCallback(() => {
- if (gridRef.current) {
- const selectedNodes = gridRef.current.api.getSelectedRows();
- setSelectedRows(selectedNodes);
- }
- }, []);
+ const onSelectionChanged = useCallback(() => {
+ if (gridRef.current) {
+ const selectedNodes = gridRef.current.api.getSelectedRows();
+ setSelectedRows(selectedNodes);
+ }
+ }, []);
- const handleBulkDelete = async () => {
- if (selectedRows.length === 0) return;
+ const handleBulkDelete = async () => {
+ if (selectedRows.length === 0) return;
- try {
- // Delete each file individually since the API expects one filename at a time
- const deletePromises = selectedRows.map((row) =>
- deleteDocumentMutation.mutateAsync({ filename: row.filename })
- );
+ try {
+ // Delete each file individually since the API expects one filename at a time
+ const deletePromises = selectedRows.map((row) =>
+ deleteDocumentMutation.mutateAsync({ filename: row.filename }),
+ );
- await Promise.all(deletePromises);
+ await Promise.all(deletePromises);
- toast.success(
- `Successfully deleted ${selectedRows.length} document${
- selectedRows.length > 1 ? "s" : ""
- }`
- );
- setSelectedRows([]);
- setShowBulkDeleteDialog(false);
+ toast.success(
+ `Successfully deleted ${selectedRows.length} document${
+ selectedRows.length > 1 ? "s" : ""
+ }`,
+ );
+ setSelectedRows([]);
+ setShowBulkDeleteDialog(false);
- // Clear selection in the grid
- if (gridRef.current) {
- gridRef.current.api.deselectAll();
- }
- } catch (error) {
- toast.error(
- error instanceof Error
- ? error.message
- : "Failed to delete some documents"
- );
- }
- };
+ // Clear selection in the grid
+ if (gridRef.current) {
+ gridRef.current.api.deselectAll();
+ }
+ } catch (error) {
+ toast.error(
+ error instanceof Error
+ ? error.message
+ : "Failed to delete some documents",
+ );
+ }
+ };
- return (
-
-
-
-
Project Knowledge
-
-
+ return (
+
+
+
+
Project Knowledge
+
+
- {/* Search Input Area */}
-
-
params.data.filename}
- domLayout="normal"
- onSelectionChanged={onSelectionChanged}
- noRowsOverlayComponent={() => (
-
-
- No knowledge
-
-
- Add files from local or your preferred cloud.
-
-
- )}
- />
-
+ {selectedRows.length > 0 && (
+
setShowBulkDeleteDialog(true)}
+ >
+ Delete
+
+ )}
+
+
+
params.data.filename}
+ domLayout="normal"
+ onSelectionChanged={onSelectionChanged}
+ noRowsOverlayComponent={() => (
+
+
+ No knowledge
+
+
+ Add files from local or your preferred cloud.
+
+
+ )}
+ />
+
- {/* Bulk Delete Confirmation Dialog */}
-
1 ? "s" : ""
- }? This will remove all chunks and data associated with these documents. This action cannot be undone.
+ {/* Bulk Delete Confirmation Dialog */}
+ 1 ? "s" : ""
+ }? This will remove all chunks and data associated with these documents. This action cannot be undone.
Documents to be deleted:
${selectedRows.map((row) => `• ${row.filename}`).join("\n")}`}
- confirmText="Delete All"
- onConfirm={handleBulkDelete}
- isLoading={deleteDocumentMutation.isPending}
- />
-
- );
+ confirmText="Delete All"
+ onConfirm={handleBulkDelete}
+ isLoading={deleteDocumentMutation.isPending}
+ />
+
+ );
}
export default function ProtectedSearchPage() {
- return (
-
-
-
- );
+ return (
+
+
+
+ );
}
From 581879f5f6ffc5807096f8d93c481e13c4787ee3 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Thu, 2 Oct 2025 18:04:45 -0300
Subject: [PATCH 07/39] fixed animated processing icon
---
.../ui/animated-processing-icon.tsx | 52 +++++++------------
frontend/src/components/ui/status-badge.tsx | 2 +-
2 files changed, 21 insertions(+), 33 deletions(-)
diff --git a/frontend/src/components/ui/animated-processing-icon.tsx b/frontend/src/components/ui/animated-processing-icon.tsx
index eb36b2ab..51815414 100644
--- a/frontend/src/components/ui/animated-processing-icon.tsx
+++ b/frontend/src/components/ui/animated-processing-icon.tsx
@@ -1,26 +1,16 @@
-interface AnimatedProcessingIconProps {
- className?: string;
- size?: number;
-}
+import type { SVGProps } from "react";
-export const AnimatedProcessingIcon = ({
- className = "",
- size = 10,
-}: AnimatedProcessingIconProps) => {
- const width = Math.round((size * 6) / 10);
- const height = size;
-
- return (
-
-
-
-
-
-
-
-
- );
+
+
+
+
+
+
+
+ );
};
diff --git a/frontend/src/components/ui/status-badge.tsx b/frontend/src/components/ui/status-badge.tsx
index d3b1a323..07dce58b 100644
--- a/frontend/src/components/ui/status-badge.tsx
+++ b/frontend/src/components/ui/status-badge.tsx
@@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
}`}
>
{status === "processing" && (
-
+
)}
{config.label}
From 7d6feef9992b6bfeaf78db085a961dd5c4e72c4e Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Thu, 2 Oct 2025 18:09:02 -0300
Subject: [PATCH 08/39] fixed status badge size
---
frontend/src/components/ui/status-badge.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/frontend/src/components/ui/status-badge.tsx b/frontend/src/components/ui/status-badge.tsx
index 07dce58b..e57ad3b5 100644
--- a/frontend/src/components/ui/status-badge.tsx
+++ b/frontend/src/components/ui/status-badge.tsx
@@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
}`}
>
{status === "processing" && (
-
+
)}
{config.label}
From 179a7403ccaec179e2d0cad3e88c29a3deca08f1 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Thu, 2 Oct 2025 18:09:18 -0300
Subject: [PATCH 09/39] fixed router, langflow files and processors to not use
temp names
---
src/api/langflow_files.py | 15 ++++++++-------
src/api/router.py | 18 ++++++++++--------
src/models/processors.py | 8 ++------
3 files changed, 20 insertions(+), 21 deletions(-)
diff --git a/src/api/langflow_files.py b/src/api/langflow_files.py
index 1c83724d..ae36bf04 100644
--- a/src/api/langflow_files.py
+++ b/src/api/langflow_files.py
@@ -189,19 +189,20 @@ async def upload_and_ingest_user_file(
# Create temporary file for task processing
import tempfile
import os
-
+
# Read file content
content = await upload_file.read()
-
- # Create temporary file
+
+ # Create temporary file with the actual filename (not a temp prefix)
+ # Store in temp directory but use the real filename
+ temp_dir = tempfile.gettempdir()
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
- temp_fd, temp_path = tempfile.mkstemp(
- suffix=f"_{safe_filename}"
- )
+ temp_path = os.path.join(temp_dir, safe_filename)
+
try:
# Write content to temp file
- with os.fdopen(temp_fd, 'wb') as temp_file:
+ with open(temp_path, 'wb') as temp_file:
temp_file.write(content)
logger.debug("Created temporary file for task processing", temp_path=temp_path)
diff --git a/src/api/router.py b/src/api/router.py
index 56789d41..857914c0 100644
--- a/src/api/router.py
+++ b/src/api/router.py
@@ -114,20 +114,22 @@ async def langflow_upload_ingest_task(
temp_file_paths = []
try:
+ # Create temp directory reference once
+ temp_dir = tempfile.gettempdir()
+
for upload_file in upload_files:
# Read file content
content = await upload_file.read()
-
- # Create temporary file
+
+ # Create temporary file with the actual filename (not a temp prefix)
+ # Store in temp directory but use the real filename
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
- temp_fd, temp_path = tempfile.mkstemp(
- suffix=f"_{safe_filename}"
- )
-
+ temp_path = os.path.join(temp_dir, safe_filename)
+
# Write content to temp file
- with os.fdopen(temp_fd, 'wb') as temp_file:
+ with open(temp_path, 'wb') as temp_file:
temp_file.write(content)
-
+
temp_file_paths.append(temp_path)
logger.debug(
diff --git a/src/models/processors.py b/src/models/processors.py
index ecec9c49..0ce8e33d 100644
--- a/src/models/processors.py
+++ b/src/models/processors.py
@@ -574,12 +574,8 @@ class LangflowFileProcessor(TaskProcessor):
content = f.read()
# Create file tuple for upload
- temp_filename = os.path.basename(item)
- # Extract original filename from temp file suffix (remove tmp prefix)
- if "_" in temp_filename:
- filename = temp_filename.split("_", 1)[1] # Get everything after first _
- else:
- filename = temp_filename
+ # The temp file now has the actual filename, no need to extract it
+ filename = os.path.basename(item)
content_type, _ = mimetypes.guess_type(filename)
if not content_type:
content_type = 'application/octet-stream'
From 12a72b2cd98ec56b800301cfe81c400464ea2c77 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Thu, 2 Oct 2025 16:30:22 -0600
Subject: [PATCH 10/39] remove unused fields
---
frontend/src/app/knowledge/chunks/page.tsx | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index cb96eddc..267d0afc 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -298,12 +298,12 @@ function ChunksPageContent() {
Original document
-
+ {/*
Name
{fileData?.filename}
-
+
*/}
Type
@@ -318,23 +318,23 @@ function ChunksPageContent() {
: "Unknown"}
- */}
{/* TODO: Uncomment after data is available */}
{/*
Source
*/}
- */}
From 042ffb421fc55c9500530646182887f85834c073 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Thu, 2 Oct 2025 16:41:59 -0600
Subject: [PATCH 11/39] fix search
---
frontend/src/app/knowledge/chunks/page.tsx | 47 +++++++++++++++-------
1 file changed, 33 insertions(+), 14 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 267d0afc..bc00f886 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -1,6 +1,6 @@
"use client";
-import { ArrowLeft, Check, Copy, Loader2, Search } from "lucide-react";
+import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation";
import { ProtectedRoute } from "@/components/protected-route";
@@ -14,7 +14,7 @@ import {
} from "../../api/queries/useGetSearchQuery";
import { Label } from "@/components/ui/label";
import { Checkbox } from "@/components/ui/checkbox";
-import { Input } from "@/components/ui/input";
+import { filterAccentClasses } from "@/components/knowledge-filter-panel";
const getFileTypeLabel = (mimetype: string) => {
if (mimetype === "application/pdf") return "PDF";
@@ -26,8 +26,9 @@ const getFileTypeLabel = (mimetype: string) => {
function ChunksPageContent() {
const router = useRouter();
const searchParams = useSearchParams();
+ const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
+ useKnowledgeFilter();
const { isMenuOpen } = useTask();
- const { parsedFilterData, isPanelOpen } = useKnowledgeFilter();
const filename = searchParams.get("filename");
const [chunks, setChunks] = useState([]);
@@ -158,17 +159,35 @@ function ChunksPageContent() {
-
-
: null}
- id="search-query"
- type="text"
- defaultValue={parsedFilterData?.query}
- value={queryInputText}
- onChange={(e) => setQueryInputText(e.target.value)}
- placeholder="Search chunks..."
- />
+
+
+ {selectedFilter?.name && (
+
+ {selectedFilter?.name}
+ setSelectedFilter(null)}
+ />
+
+ )}
+
+
setQueryInputText(e.target.value)}
+ />
+
Date: Fri, 3 Oct 2025 10:11:39 -0300
Subject: [PATCH 12/39] fixed issues with already ingested files, fixed state
issues
---
frontend/src/app/knowledge/page.tsx | 11 +++++------
frontend/src/contexts/task-context.tsx | 9 +++++----
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index 41485289..c6d254c4 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -75,14 +75,13 @@ function SearchPage() {
};
});
- const backendFiles = data as File[];
+ const backendFiles = (data as File[]).filter((file) => !taskFilesAsFiles.some((taskFile) => taskFile.filename === file.filename && taskFile.status === "processing"));
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return (
taskFile.status !== "active" &&
!backendFiles.some(
- (backendFile) => backendFile.filename === taskFile.filename,
- )
+ (backendFile) => backendFile.filename === taskFile.filename,)
);
});
@@ -91,7 +90,7 @@ function SearchPage() {
const gridRef = useRef(null);
- const [columnDefs] = useState[]>([
+ const columnDefs = [
{
field: "filename",
headerName: "Source",
@@ -189,8 +188,8 @@ function SearchPage() {
resizable: false,
sortable: false,
initialFlex: 0,
- },
- ]);
+ }
+ ];
const defaultColDef: ColDef = {
resizable: false,
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index b3275422..8928ae8f 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -212,11 +212,12 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
onClick: () => console.log("View task", currentTask.task_id),
},
});
+ setTimeout(() => {
refetchSearch();
- // Remove files for this completed task from the files list
- // setFiles((prevFiles) =>
- // prevFiles.filter((file) => file.task_id !== currentTask.task_id),
- // );
+ setFiles((prevFiles) =>
+ prevFiles.filter((file) => file.task_id !== currentTask.task_id && file.status !== "failed"),
+ );
+ }, 500);
} else if (
previousTask && previousTask.status !== "failed" &&
previousTask.status !== "error" &&
From 7b860b3f6f55c7c3a2d09b392f37a4c39a0d51ed Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Fri, 3 Oct 2025 09:08:33 -0600
Subject: [PATCH 13/39] fix frontend ID
---
frontend/src/app/api/queries/useGetSearchQuery.ts | 5 +++--
frontend/src/app/knowledge/chunks/page.tsx | 6 ++++--
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/frontend/src/app/api/queries/useGetSearchQuery.ts b/frontend/src/app/api/queries/useGetSearchQuery.ts
index 5383178d..d0c1a3a9 100644
--- a/frontend/src/app/api/queries/useGetSearchQuery.ts
+++ b/frontend/src/app/api/queries/useGetSearchQuery.ts
@@ -29,6 +29,7 @@ export interface ChunkResult {
owner_email?: string;
file_size?: number;
connector_type?: string;
+ index?: number;
}
export interface File {
@@ -55,7 +56,7 @@ export interface File {
export const useGetSearchQuery = (
query: string,
queryData?: ParsedQueryData | null,
- options?: Omit,
+ options?: Omit
) => {
const queryClient = useQueryClient();
@@ -184,7 +185,7 @@ export const useGetSearchQuery = (
queryFn: getFiles,
...options,
},
- queryClient,
+ queryClient
);
return queryResult;
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index bc00f886..9b02506b 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -86,7 +86,9 @@ function ChunksPageContent() {
return;
}
- setChunks(fileData?.chunks || []);
+ setChunks(
+ fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || []
+ );
}, [data, filename]);
// Set selected state for all checkboxes when selectAll changes
@@ -246,7 +248,7 @@ function ChunksPageContent() {
/>
- Chunk {chunk.page}
+ Chunk {chunk.index}
{chunk.text.length} chars
From ac333b99d7317b8b3400e280d836454ffae64676 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Fri, 3 Oct 2025 09:12:35 -0600
Subject: [PATCH 14/39] added value
---
frontend/src/app/knowledge/chunks/page.tsx | 1 +
1 file changed, 1 insertion(+)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 9b02506b..bbb7f019 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -188,6 +188,7 @@ function ChunksPageContent() {
type="text"
placeholder="Search your documents..."
onChange={(e) => setQueryInputText(e.target.value)}
+ value={queryInputText}
/>
From 11a6f0e8d415ce989929c49e566da75d474b11bf Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Fri, 3 Oct 2025 10:47:31 -0600
Subject: [PATCH 15/39] updated header and cloud picker header
---
frontend/src/app/upload/[provider]/page.tsx | 659 +++++++++---------
.../components/cloud-picker/picker-header.tsx | 8 +-
2 files changed, 332 insertions(+), 335 deletions(-)
diff --git a/frontend/src/app/upload/[provider]/page.tsx b/frontend/src/app/upload/[provider]/page.tsx
index 7c72ec3d..8e0a306c 100644
--- a/frontend/src/app/upload/[provider]/page.tsx
+++ b/frontend/src/app/upload/[provider]/page.tsx
@@ -12,367 +12,370 @@ import { useTask } from "@/contexts/task-context";
// CloudFile interface is now imported from the unified cloud picker
interface CloudConnector {
- id: string;
- name: string;
- description: string;
- status: "not_connected" | "connecting" | "connected" | "error";
- type: string;
- connectionId?: string;
- clientId: string;
- hasAccessToken: boolean;
- accessTokenError?: string;
+ id: string;
+ name: string;
+ description: string;
+ status: "not_connected" | "connecting" | "connected" | "error";
+ type: string;
+ connectionId?: string;
+ clientId: string;
+ hasAccessToken: boolean;
+ accessTokenError?: string;
}
export default function UploadProviderPage() {
- const params = useParams();
- const router = useRouter();
- const provider = params.provider as string;
- const { addTask, tasks } = useTask();
+ const params = useParams();
+ const router = useRouter();
+ const provider = params.provider as string;
+ const { addTask, tasks } = useTask();
- const [connector, setConnector] = useState(null);
- const [isLoading, setIsLoading] = useState(true);
- const [error, setError] = useState(null);
- const [accessToken, setAccessToken] = useState(null);
- const [selectedFiles, setSelectedFiles] = useState([]);
- const [isIngesting, setIsIngesting] = useState(false);
- const [currentSyncTaskId, setCurrentSyncTaskId] = useState(
- null,
- );
- const [ingestSettings, setIngestSettings] = useState({
- chunkSize: 1000,
- chunkOverlap: 200,
- ocr: false,
- pictureDescriptions: false,
- embeddingModel: "text-embedding-3-small",
- });
+ const [connector, setConnector] = useState(null);
+ const [isLoading, setIsLoading] = useState(true);
+ const [error, setError] = useState(null);
+ const [accessToken, setAccessToken] = useState(null);
+ const [selectedFiles, setSelectedFiles] = useState([]);
+ const [isIngesting, setIsIngesting] = useState(false);
+ const [currentSyncTaskId, setCurrentSyncTaskId] = useState(
+ null
+ );
+ const [ingestSettings, setIngestSettings] = useState({
+ chunkSize: 1000,
+ chunkOverlap: 200,
+ ocr: false,
+ pictureDescriptions: false,
+ embeddingModel: "text-embedding-3-small",
+ });
- useEffect(() => {
- const fetchConnectorInfo = async () => {
- setIsLoading(true);
- setError(null);
+ useEffect(() => {
+ const fetchConnectorInfo = async () => {
+ setIsLoading(true);
+ setError(null);
- try {
- // Fetch available connectors to validate the provider
- const connectorsResponse = await fetch("/api/connectors");
- if (!connectorsResponse.ok) {
- throw new Error("Failed to load connectors");
- }
+ try {
+ // Fetch available connectors to validate the provider
+ const connectorsResponse = await fetch("/api/connectors");
+ if (!connectorsResponse.ok) {
+ throw new Error("Failed to load connectors");
+ }
- const connectorsResult = await connectorsResponse.json();
- const providerInfo = connectorsResult.connectors[provider];
+ const connectorsResult = await connectorsResponse.json();
+ const providerInfo = connectorsResult.connectors[provider];
- if (!providerInfo || !providerInfo.available) {
- setError(
- `Cloud provider "${provider}" is not available or configured.`,
- );
- return;
- }
+ if (!providerInfo || !providerInfo.available) {
+ setError(
+ `Cloud provider "${provider}" is not available or configured.`
+ );
+ return;
+ }
- // Check connector status
- const statusResponse = await fetch(
- `/api/connectors/${provider}/status`,
- );
- if (!statusResponse.ok) {
- throw new Error(`Failed to check ${provider} status`);
- }
+ // Check connector status
+ const statusResponse = await fetch(
+ `/api/connectors/${provider}/status`
+ );
+ if (!statusResponse.ok) {
+ throw new Error(`Failed to check ${provider} status`);
+ }
- const statusData = await statusResponse.json();
- const connections = statusData.connections || [];
- const activeConnection = connections.find(
- (conn: { is_active: boolean; connection_id: string }) =>
- conn.is_active,
- );
- const isConnected = activeConnection !== undefined;
+ const statusData = await statusResponse.json();
+ const connections = statusData.connections || [];
+ const activeConnection = connections.find(
+ (conn: { is_active: boolean; connection_id: string }) =>
+ conn.is_active
+ );
+ const isConnected = activeConnection !== undefined;
- let hasAccessToken = false;
- let accessTokenError: string | undefined;
+ let hasAccessToken = false;
+ let accessTokenError: string | undefined;
- // Try to get access token for connected connectors
- if (isConnected && activeConnection) {
- try {
- const tokenResponse = await fetch(
- `/api/connectors/${provider}/token?connection_id=${activeConnection.connection_id}`,
- );
- if (tokenResponse.ok) {
- const tokenData = await tokenResponse.json();
- if (tokenData.access_token) {
- hasAccessToken = true;
- setAccessToken(tokenData.access_token);
- }
- } else {
- const errorData = await tokenResponse
- .json()
- .catch(() => ({ error: "Token unavailable" }));
- accessTokenError = errorData.error || "Access token unavailable";
- }
- } catch {
- accessTokenError = "Failed to fetch access token";
- }
- }
+ // Try to get access token for connected connectors
+ if (isConnected && activeConnection) {
+ try {
+ const tokenResponse = await fetch(
+ `/api/connectors/${provider}/token?connection_id=${activeConnection.connection_id}`
+ );
+ if (tokenResponse.ok) {
+ const tokenData = await tokenResponse.json();
+ if (tokenData.access_token) {
+ hasAccessToken = true;
+ setAccessToken(tokenData.access_token);
+ }
+ } else {
+ const errorData = await tokenResponse
+ .json()
+ .catch(() => ({ error: "Token unavailable" }));
+ accessTokenError = errorData.error || "Access token unavailable";
+ }
+ } catch {
+ accessTokenError = "Failed to fetch access token";
+ }
+ }
- setConnector({
- id: provider,
- name: providerInfo.name,
- description: providerInfo.description,
- status: isConnected ? "connected" : "not_connected",
- type: provider,
- connectionId: activeConnection?.connection_id,
- clientId: activeConnection?.client_id,
- hasAccessToken,
- accessTokenError,
- });
- } catch (error) {
- console.error("Failed to load connector info:", error);
- setError(
- error instanceof Error
- ? error.message
- : "Failed to load connector information",
- );
- } finally {
- setIsLoading(false);
- }
- };
+ setConnector({
+ id: provider,
+ name: providerInfo.name,
+ description: providerInfo.description,
+ status: isConnected ? "connected" : "not_connected",
+ type: provider,
+ connectionId: activeConnection?.connection_id,
+ clientId: activeConnection?.client_id,
+ hasAccessToken,
+ accessTokenError,
+ });
+ } catch (error) {
+ console.error("Failed to load connector info:", error);
+ setError(
+ error instanceof Error
+ ? error.message
+ : "Failed to load connector information"
+ );
+ } finally {
+ setIsLoading(false);
+ }
+ };
- if (provider) {
- fetchConnectorInfo();
- }
- }, [provider]);
+ if (provider) {
+ fetchConnectorInfo();
+ }
+ }, [provider]);
- // Watch for sync task completion and redirect
- useEffect(() => {
- if (!currentSyncTaskId) return;
+ // Watch for sync task completion and redirect
+ useEffect(() => {
+ if (!currentSyncTaskId) return;
- const currentTask = tasks.find(
- (task) => task.task_id === currentSyncTaskId,
- );
+ const currentTask = tasks.find(
+ (task) => task.task_id === currentSyncTaskId
+ );
- if (currentTask && currentTask.status === "completed") {
- // Task completed successfully, show toast and redirect
- setIsIngesting(false);
- setTimeout(() => {
- router.push("/knowledge");
- }, 2000); // 2 second delay to let user see toast
- } else if (currentTask && currentTask.status === "failed") {
- // Task failed, clear the tracking but don't redirect
- setIsIngesting(false);
- setCurrentSyncTaskId(null);
- }
- }, [tasks, currentSyncTaskId, router]);
+ if (currentTask && currentTask.status === "completed") {
+ // Task completed successfully, show toast and redirect
+ setIsIngesting(false);
+ setTimeout(() => {
+ router.push("/knowledge");
+ }, 2000); // 2 second delay to let user see toast
+ } else if (currentTask && currentTask.status === "failed") {
+ // Task failed, clear the tracking but don't redirect
+ setIsIngesting(false);
+ setCurrentSyncTaskId(null);
+ }
+ }, [tasks, currentSyncTaskId, router]);
- const handleFileSelected = (files: CloudFile[]) => {
- setSelectedFiles(files);
- console.log(`Selected ${files.length} files from ${provider}:`, files);
- // You can add additional handling here like triggering sync, etc.
- };
+ const handleFileSelected = (files: CloudFile[]) => {
+ setSelectedFiles(files);
+ console.log(`Selected ${files.length} files from ${provider}:`, files);
+ // You can add additional handling here like triggering sync, etc.
+ };
- const handleSync = async (connector: CloudConnector) => {
- if (!connector.connectionId || selectedFiles.length === 0) return;
+ const handleSync = async (connector: CloudConnector) => {
+ if (!connector.connectionId || selectedFiles.length === 0) return;
- setIsIngesting(true);
+ setIsIngesting(true);
- try {
- const syncBody: {
- connection_id: string;
- max_files?: number;
- selected_files?: string[];
- settings?: IngestSettings;
- } = {
- connection_id: connector.connectionId,
- selected_files: selectedFiles.map((file) => file.id),
- settings: ingestSettings,
- };
+ try {
+ const syncBody: {
+ connection_id: string;
+ max_files?: number;
+ selected_files?: string[];
+ settings?: IngestSettings;
+ } = {
+ connection_id: connector.connectionId,
+ selected_files: selectedFiles.map((file) => file.id),
+ settings: ingestSettings,
+ };
- const response = await fetch(`/api/connectors/${connector.type}/sync`, {
- method: "POST",
- headers: {
- "Content-Type": "application/json",
- },
- body: JSON.stringify(syncBody),
- });
+ const response = await fetch(`/api/connectors/${connector.type}/sync`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify(syncBody),
+ });
- const result = await response.json();
+ const result = await response.json();
- if (response.status === 201) {
- const taskIds = result.task_ids;
- if (taskIds && taskIds.length > 0) {
- const taskId = taskIds[0]; // Use the first task ID
- addTask(taskId);
- setCurrentSyncTaskId(taskId);
- }
- } else {
- console.error("Sync failed:", result.error);
- }
- } catch (error) {
- console.error("Sync error:", error);
- setIsIngesting(false);
- }
- };
+ if (response.status === 201) {
+ const taskIds = result.task_ids;
+ if (taskIds && taskIds.length > 0) {
+ const taskId = taskIds[0]; // Use the first task ID
+ addTask(taskId);
+ setCurrentSyncTaskId(taskId);
+ }
+ } else {
+ console.error("Sync failed:", result.error);
+ }
+ } catch (error) {
+ console.error("Sync error:", error);
+ setIsIngesting(false);
+ }
+ };
- const getProviderDisplayName = () => {
- const nameMap: { [key: string]: string } = {
- google_drive: "Google Drive",
- onedrive: "OneDrive",
- sharepoint: "SharePoint",
- };
- return nameMap[provider] || provider;
- };
+ const getProviderDisplayName = () => {
+ const nameMap: { [key: string]: string } = {
+ google_drive: "Google Drive",
+ onedrive: "OneDrive",
+ sharepoint: "SharePoint",
+ };
+ return nameMap[provider] || provider;
+ };
- if (isLoading) {
- return (
-
-
-
-
-
Loading {getProviderDisplayName()} connector...
-
-
-
- );
- }
+ if (isLoading) {
+ return (
+
+
+
+
+
Loading {getProviderDisplayName()} connector...
+
+
+
+ );
+ }
- if (error || !connector) {
- return (
-
-
-
router.back()}
- className="mb-4"
- >
-
- Back
-
-
+ if (error || !connector) {
+ return (
+
+
+
router.back()}
+ className="mb-4"
+ >
+
+ Back
+
+
-
-
-
-
- Provider Not Available
-
-
{error}
-
router.push("/settings")}>
- Configure Connectors
-
-
-
-
- );
- }
+
+
+
+
+ Provider Not Available
+
+
{error}
+
router.push("/settings")}>
+ Configure Connectors
+
+
+
+
+ );
+ }
- if (connector.status !== "connected") {
- return (
-
-
-
router.back()}
- className="mb-4"
- >
-
- Back
-
-
+ if (connector.status !== "connected") {
+ return (
+
+
+
router.back()}
+ className="mb-4"
+ >
+
+ Back
+
+
-
-
-
-
- {connector.name} Not Connected
-
-
- You need to connect your {connector.name} account before you can
- select files.
-
-
router.push("/settings")}>
- Connect {connector.name}
-
-
-
-
- );
- }
+
+
+
+
+ {connector.name} Not Connected
+
+
+ You need to connect your {connector.name} account before you can
+ select files.
+
+
router.push("/settings")}>
+ Connect {connector.name}
+
+
+
+
+ );
+ }
- if (!connector.hasAccessToken) {
- return (
-
-
-
router.back()}
- className="mb-4"
- >
-
- Back
-
-
+ if (!connector.hasAccessToken) {
+ return (
+
+
+
router.back()}
+ className="mb-4"
+ >
+
+ Back
+
+
-
-
-
-
- Access Token Required
-
-
- {connector.accessTokenError ||
- `Unable to get access token for ${connector.name}. Try reconnecting your account.`}
-
-
router.push("/settings")}>
- Reconnect {connector.name}
-
-
-
-
- );
- }
+
+
+
+
+ Access Token Required
+
+
+ {connector.accessTokenError ||
+ `Unable to get access token for ${connector.name}. Try reconnecting your account.`}
+
+
router.push("/settings")}>
+ Reconnect {connector.name}
+
+
+
+
+ );
+ }
- return (
-
-
-
router.back()}>
-
-
-
- Add from {getProviderDisplayName()}
-
-
+ return (
+
+
+
router.back()} size="icon">
+
+
+
+ Add from {getProviderDisplayName()}
+
+
-
-
-
+
+
+
-
-
- router.back()}
- >
- Back
-
- handleSync(connector)}
- disabled={selectedFiles.length === 0 || isIngesting}
- >
- {isIngesting ? (
- <>Ingesting {selectedFiles.length} Files...>
- ) : (
- <>Start ingest>
- )}
-
-
-
-
- );
+
+
+ router.back()}
+ >
+ Back
+
+ handleSync(connector)}
+ disabled={selectedFiles.length === 0 || isIngesting}
+ >
+ {isIngesting ? (
+ <>
+ Ingesting {selectedFiles.length} file
+ {selectedFiles.length > 1 ? "s" : ""}...
+ >
+ ) : (
+ <>Ingest files>
+ )}
+
+
+
+
+ );
}
diff --git a/frontend/src/components/cloud-picker/picker-header.tsx b/frontend/src/components/cloud-picker/picker-header.tsx
index 05dcaebd..e0d9cfa4 100644
--- a/frontend/src/components/cloud-picker/picker-header.tsx
+++ b/frontend/src/components/cloud-picker/picker-header.tsx
@@ -51,19 +51,13 @@ export const PickerHeader = ({
Select files from {getProviderName(provider)} to ingest.
- {isPickerOpen ? "Opening Picker..." : "Add Files"}
+ {isPickerOpen ? "Opening picker..." : "Add files"}
-
- csv, json, pdf,{" "}
-
+16 more {" "}
-
150 MB max
-
);
From ae02fc00e8b90c95e3dbcdb0dd0a4d205aa26f6d Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Fri, 3 Oct 2025 10:48:42 -0600
Subject: [PATCH 16/39] updated numbers to match env.example
---
frontend/src/app/settings/page.tsx | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index 71c0c8e4..07aeb5f8 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -644,19 +644,19 @@ function KnowledgeSourcesPage() {
- 18
+ 27
# Google OAuth
- 19
+ 28
# Create credentials here:
- 20
+ 29
# https://console.cloud.google.com/apis/credentials
@@ -664,11 +664,11 @@ function KnowledgeSourcesPage() {
- 21
+ 30
GOOGLE_OAUTH_CLIENT_ID=
- 22
+ 31
GOOGLE_OAUTH_CLIENT_SECRET=
From 088ed2697b5eadf5802cf8b39e2e8c71c93c03e1 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Fri, 3 Oct 2025 12:00:13 -0600
Subject: [PATCH 17/39] updates from sync
---
frontend/src/app/knowledge/chunks/page.tsx | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index bbb7f019..85a4e3f2 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -161,7 +161,7 @@ function ChunksPageContent() {
-
+
{selectedFilter?.name && (
setQueryInputText(e.target.value)}
value={queryInputText}
/>
-
*/}
@@ -269,6 +269,10 @@ function ChunksPageContent() {
+
+ {chunk.score.toFixed(2)} score
+
+
{/* TODO: Update to use active toggle */}
{/*
Date: Fri, 3 Oct 2025 12:12:47 -0600
Subject: [PATCH 18/39] remove checkboxes
---
frontend/src/app/knowledge/chunks/page.tsx | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 85a4e3f2..cde2c3e8 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -240,14 +240,14 @@ function ChunksPageContent() {
>
-
+ {/*
handleChunkCardCheckboxChange(index)
}
/>
-
+
*/}
Chunk {chunk.index}
@@ -282,7 +282,7 @@ function ChunksPageContent() {
Active
*/}
-
+
{chunk.text}
From 589839fedc1199e6060e0b57e9bd9ee08d8081b1 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 15:34:54 -0300
Subject: [PATCH 19/39] implement duplicate handling on backend and frontend
---
.../components/duplicate-handling-dialog.tsx | 71 ++++++
frontend/components/knowledge-dropdown.tsx | 233 ++++++++++++------
frontend/src/contexts/task-context.tsx | 3 +-
src/api/documents.py | 92 +++++--
src/api/router.py | 13 +-
src/main.py | 11 +
src/models/processors.py | 139 +++++++++--
src/models/tasks.py | 3 +-
src/services/task_service.py | 23 +-
9 files changed, 470 insertions(+), 118 deletions(-)
create mode 100644 frontend/components/duplicate-handling-dialog.tsx
diff --git a/frontend/components/duplicate-handling-dialog.tsx b/frontend/components/duplicate-handling-dialog.tsx
new file mode 100644
index 00000000..2f92ea50
--- /dev/null
+++ b/frontend/components/duplicate-handling-dialog.tsx
@@ -0,0 +1,71 @@
+"use client";
+
+import { RotateCcw } from "lucide-react";
+import type React from "react";
+import { Button } from "./ui/button";
+import {
+ Dialog,
+ DialogContent,
+ DialogDescription,
+ DialogFooter,
+ DialogHeader,
+ DialogTitle,
+} from "./ui/dialog";
+
+interface DuplicateHandlingDialogProps {
+ open: boolean;
+ onOpenChange: (open: boolean) => void;
+ filename: string;
+ onOverwrite: () => void | Promise;
+ isLoading?: boolean;
+}
+
+export const DuplicateHandlingDialog: React.FC<
+ DuplicateHandlingDialogProps
+> = ({ open, onOpenChange, filename, onOverwrite, isLoading = false }) => {
+ const handleOverwrite = async () => {
+ try {
+ await onOverwrite();
+ onOpenChange(false);
+ } catch (error) {
+ // Error handling is done by the parent component
+ }
+ };
+
+ return (
+
+
+
+ Overwrite document
+
+ Overwriting will replace the existing document with another version.
+ This can't be undone.
+
+
+
+
+ onOpenChange(false)}
+ disabled={isLoading}
+ size="sm"
+ >
+ Cancel
+
+
+
+ Overwrite
+
+
+
+
+ );
+};
diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx
index 9b71ee81..0b106360 100644
--- a/frontend/components/knowledge-dropdown.tsx
+++ b/frontend/components/knowledge-dropdown.tsx
@@ -13,6 +13,7 @@ import { useRouter } from "next/navigation";
import { useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { useGetTasksQuery } from "@/app/api/queries/useGetTasksQuery";
+import { DuplicateHandlingDialog } from "@/components/duplicate-handling-dialog";
import { Button } from "@/components/ui/button";
import {
Dialog,
@@ -41,6 +42,7 @@ export function KnowledgeDropdown({
const [isOpen, setIsOpen] = useState(false);
const [showFolderDialog, setShowFolderDialog] = useState(false);
const [showS3Dialog, setShowS3Dialog] = useState(false);
+ const [showDuplicateDialog, setShowDuplicateDialog] = useState(false);
const [awsEnabled, setAwsEnabled] = useState(false);
const [folderPath, setFolderPath] = useState("/app/documents/");
const [bucketUrl, setBucketUrl] = useState("s3://");
@@ -48,6 +50,8 @@ export function KnowledgeDropdown({
const [s3Loading, setS3Loading] = useState(false);
const [fileUploading, setFileUploading] = useState(false);
const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false);
+ const [pendingFile, setPendingFile] = useState(null);
+ const [duplicateFilename, setDuplicateFilename] = useState("");
const [cloudConnectors, setCloudConnectors] = useState<{
[key: string]: {
name: string;
@@ -168,101 +172,52 @@ export function KnowledgeDropdown({
const handleFileChange = async (e: React.ChangeEvent) => {
const files = e.target.files;
if (files && files.length > 0) {
- // Close dropdown and disable button immediately after file selection
- setIsOpen(false);
- setFileUploading(true);
+ const file = files[0];
- // Trigger the same file upload event as the chat page
- window.dispatchEvent(
- new CustomEvent("fileUploadStart", {
- detail: { filename: files[0].name },
- }),
- );
+ // Close dropdown immediately after file selection
+ setIsOpen(false);
try {
- const formData = new FormData();
- formData.append("file", files[0]);
+ // Check if filename already exists (using ORIGINAL filename)
+ console.log("[Duplicate Check] Checking file:", file.name);
+ const checkResponse = await fetch(
+ `/api/documents/check-filename?filename=${encodeURIComponent(file.name)}`,
+ );
- // Use router upload and ingest endpoint (automatically routes based on configuration)
- const uploadIngestRes = await fetch("/api/router/upload_ingest", {
- method: "POST",
- body: formData,
- });
+ console.log("[Duplicate Check] Response status:", checkResponse.status);
- const uploadIngestJson = await uploadIngestRes.json();
-
- if (!uploadIngestRes.ok) {
+ if (!checkResponse.ok) {
+ const errorText = await checkResponse.text();
+ console.error("[Duplicate Check] Error response:", errorText);
throw new Error(
- uploadIngestJson?.error || "Upload and ingest failed",
+ `Failed to check duplicates: ${checkResponse.statusText}`,
);
}
- // Extract results from the response - handle both unified and simple formats
- const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id || uploadIngestJson?.task_id;
- const filePath =
- uploadIngestJson?.upload?.path ||
- uploadIngestJson?.path ||
- "uploaded";
- const runJson = uploadIngestJson?.ingestion;
- const deleteResult = uploadIngestJson?.deletion;
- console.log("c", uploadIngestJson )
- if (!fileId) {
- throw new Error("Upload successful but no file id returned");
- }
- // Check if ingestion actually succeeded
- if (
- runJson &&
- runJson.status !== "COMPLETED" &&
- runJson.status !== "SUCCESS"
- ) {
- const errorMsg = runJson.error || "Ingestion pipeline failed";
- throw new Error(
- `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`,
- );
- }
- // Log deletion status if provided
- if (deleteResult) {
- if (deleteResult.status === "deleted") {
- console.log(
- "File successfully cleaned up from Langflow:",
- deleteResult.file_id,
- );
- } else if (deleteResult.status === "delete_failed") {
- console.warn(
- "Failed to cleanup file from Langflow:",
- deleteResult.error,
- );
+ const checkData = await checkResponse.json();
+ console.log("[Duplicate Check] Result:", checkData);
+
+ if (checkData.exists) {
+ // Show duplicate handling dialog
+ console.log("[Duplicate Check] Duplicate detected, showing dialog");
+ setPendingFile(file);
+ setDuplicateFilename(file.name);
+ setShowDuplicateDialog(true);
+ // Reset file input
+ if (fileInputRef.current) {
+ fileInputRef.current.value = "";
}
+ return;
}
- // Notify UI
- window.dispatchEvent(
- new CustomEvent("fileUploaded", {
- detail: {
- file: files[0],
- result: {
- file_id: fileId,
- file_path: filePath,
- run: runJson,
- deletion: deleteResult,
- unified: true,
- },
- },
- }),
- );
- refetchTasks();
+ // No duplicate, proceed with upload
+ console.log("[Duplicate Check] No duplicate, proceeding with upload");
+ await uploadFile(file, false);
} catch (error) {
- window.dispatchEvent(
- new CustomEvent("fileUploadError", {
- detail: {
- filename: files[0].name,
- error: error instanceof Error ? error.message : "Upload failed",
- },
- }),
- );
- } finally {
- window.dispatchEvent(new CustomEvent("fileUploadComplete"));
- setFileUploading(false);
+ console.error("[Duplicate Check] Exception:", error);
+ toast.error("Failed to check for duplicates", {
+ description: error instanceof Error ? error.message : "Unknown error",
+ });
}
}
@@ -272,6 +227,111 @@ export function KnowledgeDropdown({
}
};
+ const uploadFile = async (file: File, replace: boolean) => {
+ setFileUploading(true);
+
+ // Trigger the same file upload event as the chat page
+ window.dispatchEvent(
+ new CustomEvent("fileUploadStart", {
+ detail: { filename: file.name },
+ }),
+ );
+
+ try {
+ const formData = new FormData();
+ formData.append("file", file);
+ formData.append("replace_duplicates", replace.toString());
+
+ // Use router upload and ingest endpoint (automatically routes based on configuration)
+ const uploadIngestRes = await fetch("/api/router/upload_ingest", {
+ method: "POST",
+ body: formData,
+ });
+
+ const uploadIngestJson = await uploadIngestRes.json();
+
+ if (!uploadIngestRes.ok) {
+ throw new Error(uploadIngestJson?.error || "Upload and ingest failed");
+ }
+
+ // Extract results from the response - handle both unified and simple formats
+ const fileId =
+ uploadIngestJson?.upload?.id ||
+ uploadIngestJson?.id ||
+ uploadIngestJson?.task_id;
+ const filePath =
+ uploadIngestJson?.upload?.path || uploadIngestJson?.path || "uploaded";
+ const runJson = uploadIngestJson?.ingestion;
+ const deleteResult = uploadIngestJson?.deletion;
+ console.log("c", uploadIngestJson);
+ if (!fileId) {
+ throw new Error("Upload successful but no file id returned");
+ }
+ // Check if ingestion actually succeeded
+ if (
+ runJson &&
+ runJson.status !== "COMPLETED" &&
+ runJson.status !== "SUCCESS"
+ ) {
+ const errorMsg = runJson.error || "Ingestion pipeline failed";
+ throw new Error(
+ `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`,
+ );
+ }
+ // Log deletion status if provided
+ if (deleteResult) {
+ if (deleteResult.status === "deleted") {
+ console.log(
+ "File successfully cleaned up from Langflow:",
+ deleteResult.file_id,
+ );
+ } else if (deleteResult.status === "delete_failed") {
+ console.warn(
+ "Failed to cleanup file from Langflow:",
+ deleteResult.error,
+ );
+ }
+ }
+ // Notify UI
+ window.dispatchEvent(
+ new CustomEvent("fileUploaded", {
+ detail: {
+ file: file,
+ result: {
+ file_id: fileId,
+ file_path: filePath,
+ run: runJson,
+ deletion: deleteResult,
+ unified: true,
+ },
+ },
+ }),
+ );
+
+ refetchTasks();
+ } catch (error) {
+ window.dispatchEvent(
+ new CustomEvent("fileUploadError", {
+ detail: {
+ filename: file.name,
+ error: error instanceof Error ? error.message : "Upload failed",
+ },
+ }),
+ );
+ } finally {
+ window.dispatchEvent(new CustomEvent("fileUploadComplete"));
+ setFileUploading(false);
+ }
+ };
+
+ const handleOverwriteFile = async () => {
+ if (pendingFile) {
+ await uploadFile(pendingFile, true);
+ setPendingFile(null);
+ setDuplicateFilename("");
+ }
+ };
+
const handleFolderUpload = async () => {
if (!folderPath.trim()) return;
@@ -611,6 +671,15 @@ export function KnowledgeDropdown({
+
+ {/* Duplicate Handling Dialog */}
+
>
);
}
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index 8928ae8f..26e8ca00 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -135,7 +135,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
- const fileName = filePath.split("/").pop() || filePath;
+ // Use the filename from backend if available, otherwise extract from path
+ const fileName = (fileInfo as any).filename || filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
diff --git a/src/api/documents.py b/src/api/documents.py
index 82afb349..a367c9fe 100644
--- a/src/api/documents.py
+++ b/src/api/documents.py
@@ -6,14 +6,13 @@ from config.settings import INDEX_NAME
logger = get_logger(__name__)
-async def delete_documents_by_filename(request: Request, document_service, session_manager):
- """Delete all documents with a specific filename"""
- data = await request.json()
- filename = data.get("filename")
-
+async def check_filename_exists(request: Request, document_service, session_manager):
+ """Check if a document with a specific filename already exists"""
+ filename = request.query_params.get("filename")
+
if not filename:
- return JSONResponse({"error": "filename is required"}, status_code=400)
-
+ return JSONResponse({"error": "filename parameter is required"}, status_code=400)
+
user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
@@ -22,34 +21,99 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token
)
-
+
+ # Search for any document with this exact filename
+ # Try both .keyword (exact match) and regular field (analyzed match)
+ search_body = {
+ "query": {
+ "bool": {
+ "should": [
+ {"term": {"filename.keyword": filename}},
+ {"term": {"filename": filename}}
+ ],
+ "minimum_should_match": 1
+ }
+ },
+ "size": 1,
+ "_source": ["filename"]
+ }
+
+ logger.debug(f"Checking filename existence: {filename}")
+
+ response = await opensearch_client.search(
+ index=INDEX_NAME,
+ body=search_body
+ )
+
+ # Check if any hits were found
+ hits = response.get("hits", {}).get("hits", [])
+ exists = len(hits) > 0
+
+ logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}")
+
+ return JSONResponse({
+ "exists": exists,
+ "filename": filename
+ }, status_code=200)
+
+ except Exception as e:
+ logger.error("Error checking filename existence", filename=filename, error=str(e))
+ error_str = str(e)
+ if "AuthenticationException" in error_str:
+ return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403)
+ else:
+ return JSONResponse({"error": str(e)}, status_code=500)
+
+
+async def delete_documents_by_filename(request: Request, document_service, session_manager):
+ """Delete all documents with a specific filename"""
+ data = await request.json()
+ filename = data.get("filename")
+
+ if not filename:
+ return JSONResponse({"error": "filename is required"}, status_code=400)
+
+ user = request.state.user
+ jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
+
+ try:
+ # Get user's OpenSearch client
+ opensearch_client = session_manager.get_user_opensearch_client(
+ user.user_id, jwt_token
+ )
+
# Delete by query to remove all chunks of this document
+ # Use both .keyword and regular field to ensure we catch all variations
delete_query = {
"query": {
"bool": {
- "must": [
+ "should": [
+ {"term": {"filename.keyword": filename}},
{"term": {"filename": filename}}
- ]
+ ],
+ "minimum_should_match": 1
}
}
}
-
+
+ logger.debug(f"Deleting documents with filename: {filename}")
+
result = await opensearch_client.delete_by_query(
index=INDEX_NAME,
body=delete_query,
conflicts="proceed"
)
-
+
deleted_count = result.get("deleted", 0)
logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id)
-
+
return JSONResponse({
"success": True,
"deleted_chunks": deleted_count,
"filename": filename,
"message": f"All documents with filename '{filename}' deleted successfully"
}, status_code=200)
-
+
except Exception as e:
logger.error("Error deleting documents by filename", filename=filename, error=str(e))
error_str = str(e)
diff --git a/src/api/router.py b/src/api/router.py
index 23ce5bdf..327757be 100644
--- a/src/api/router.py
+++ b/src/api/router.py
@@ -77,6 +77,7 @@ async def langflow_upload_ingest_task(
settings_json = form.get("settings")
tweaks_json = form.get("tweaks")
delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true"
+ replace_duplicates = form.get("replace_duplicates", "false").lower() == "true"
# Parse JSON fields if provided
settings = None
@@ -112,7 +113,8 @@ async def langflow_upload_ingest_task(
import tempfile
import os
temp_file_paths = []
-
+ original_filenames = []
+
try:
# Create temp directory reference once
temp_dir = tempfile.gettempdir()
@@ -121,8 +123,11 @@ async def langflow_upload_ingest_task(
# Read file content
content = await upload_file.read()
- # Create temporary file with the actual filename (not a temp prefix)
- # Store in temp directory but use the real filename
+ # Store ORIGINAL filename (not transformed)
+ original_filenames.append(upload_file.filename)
+
+ # Create temporary file with TRANSFORMED filename for filesystem safety
+ # Transform: spaces and / to underscore
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_path = os.path.join(temp_dir, safe_filename)
@@ -153,6 +158,7 @@ async def langflow_upload_ingest_task(
task_id = await task_service.create_langflow_upload_task(
user_id=user_id,
file_paths=temp_file_paths,
+ original_filenames=original_filenames,
langflow_file_service=langflow_file_service,
session_manager=session_manager,
jwt_token=jwt_token,
@@ -162,6 +168,7 @@ async def langflow_upload_ingest_task(
tweaks=tweaks,
settings=settings,
delete_after_ingest=delete_after_ingest,
+ replace_duplicates=replace_duplicates,
)
logger.debug("Langflow upload task created successfully", task_id=task_id)
diff --git a/src/main.py b/src/main.py
index 230ded79..bf6da342 100644
--- a/src/main.py
+++ b/src/main.py
@@ -953,6 +953,17 @@ async def create_app():
methods=["POST", "GET"],
),
# Document endpoints
+ Route(
+ "/documents/check-filename",
+ require_auth(services["session_manager"])(
+ partial(
+ documents.check_filename_exists,
+ document_service=services["document_service"],
+ session_manager=services["session_manager"],
+ )
+ ),
+ methods=["GET"],
+ ),
Route(
"/documents/delete-by-filename",
require_auth(services["session_manager"])(
diff --git a/src/models/processors.py b/src/models/processors.py
index a1d72777..6d7b74b4 100644
--- a/src/models/processors.py
+++ b/src/models/processors.py
@@ -55,6 +55,108 @@ class TaskProcessor:
await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
+ async def check_filename_exists(
+ self,
+ filename: str,
+ opensearch_client,
+ ) -> bool:
+ """
+ Check if a document with the given filename already exists in OpenSearch.
+ Returns True if any chunks with this filename exist.
+ """
+ from config.settings import INDEX_NAME
+ import asyncio
+
+ max_retries = 3
+ retry_delay = 1.0
+
+ for attempt in range(max_retries):
+ try:
+ # Search for any document with this exact filename
+ search_body = {
+ "query": {
+ "term": {
+ "filename.keyword": filename
+ }
+ },
+ "size": 1,
+ "_source": False
+ }
+
+ response = await opensearch_client.search(
+ index=INDEX_NAME,
+ body=search_body
+ )
+
+ # Check if any hits were found
+ hits = response.get("hits", {}).get("hits", [])
+ return len(hits) > 0
+
+ except (asyncio.TimeoutError, Exception) as e:
+ if attempt == max_retries - 1:
+ logger.error(
+ "OpenSearch filename check failed after retries",
+ filename=filename,
+ error=str(e),
+ attempt=attempt + 1
+ )
+ # On final failure, assume document doesn't exist (safer to reprocess than skip)
+ logger.warning(
+ "Assuming filename doesn't exist due to connection issues",
+ filename=filename
+ )
+ return False
+ else:
+ logger.warning(
+ "OpenSearch filename check failed, retrying",
+ filename=filename,
+ error=str(e),
+ attempt=attempt + 1,
+ retry_in=retry_delay
+ )
+ await asyncio.sleep(retry_delay)
+ retry_delay *= 2 # Exponential backoff
+
+ async def delete_document_by_filename(
+ self,
+ filename: str,
+ opensearch_client,
+ ) -> None:
+ """
+ Delete all chunks of a document with the given filename from OpenSearch.
+ """
+ from config.settings import INDEX_NAME
+
+ try:
+ # Delete all documents with this filename
+ delete_body = {
+ "query": {
+ "term": {
+ "filename.keyword": filename
+ }
+ }
+ }
+
+ response = await opensearch_client.delete_by_query(
+ index=INDEX_NAME,
+ body=delete_body
+ )
+
+ deleted_count = response.get("deleted", 0)
+ logger.info(
+ "Deleted existing document chunks",
+ filename=filename,
+ deleted_count=deleted_count
+ )
+
+ except Exception as e:
+ logger.error(
+ "Failed to delete existing document",
+ filename=filename,
+ error=str(e)
+ )
+ raise
+
async def process_document_standard(
self,
file_path: str,
@@ -527,6 +629,7 @@ class LangflowFileProcessor(TaskProcessor):
tweaks: dict = None,
settings: dict = None,
delete_after_ingest: bool = True,
+ replace_duplicates: bool = False,
):
super().__init__()
self.langflow_file_service = langflow_file_service
@@ -539,6 +642,7 @@ class LangflowFileProcessor(TaskProcessor):
self.tweaks = tweaks or {}
self.settings = settings
self.delete_after_ingest = delete_after_ingest
+ self.replace_duplicates = replace_duplicates
async def process_item(
self, upload_task: UploadTask, item: str, file_task: FileTask
@@ -554,33 +658,40 @@ class LangflowFileProcessor(TaskProcessor):
file_task.updated_at = time.time()
try:
- # Compute hash and check if already exists
- from utils.hash_utils import hash_id
- file_hash = hash_id(item)
+ # Use the ORIGINAL filename stored in file_task (not the transformed temp path)
+ # This ensures we check/store the original filename with spaces, etc.
+ original_filename = file_task.filename or os.path.basename(item)
- # Check if document already exists
+ # Check if document with same filename already exists
opensearch_client = self.session_manager.get_user_opensearch_client(
self.owner_user_id, self.jwt_token
)
- if await self.check_document_exists(file_hash, opensearch_client):
- file_task.status = TaskStatus.COMPLETED
- file_task.result = {"status": "unchanged", "id": file_hash}
+
+ filename_exists = await self.check_filename_exists(original_filename, opensearch_client)
+
+ if filename_exists and not self.replace_duplicates:
+ # Duplicate exists and user hasn't confirmed replacement
+ file_task.status = TaskStatus.FAILED
+ file_task.error = f"File with name '{original_filename}' already exists"
file_task.updated_at = time.time()
- upload_task.successful_files += 1
+ upload_task.failed_files += 1
return
+ elif filename_exists and self.replace_duplicates:
+ # Delete existing document before uploading new one
+ logger.info(f"Replacing existing document: {original_filename}")
+ await self.delete_document_by_filename(original_filename, opensearch_client)
# Read file content for processing
with open(item, 'rb') as f:
content = f.read()
- # Create file tuple for upload
- # The temp file now has the actual filename, no need to extract it
- filename = os.path.basename(item)
- content_type, _ = mimetypes.guess_type(filename)
+ # Create file tuple for upload using ORIGINAL filename
+ # This ensures the document is indexed with the original name
+ content_type, _ = mimetypes.guess_type(original_filename)
if not content_type:
content_type = 'application/octet-stream'
-
- file_tuple = (filename, content, content_type)
+
+ file_tuple = (original_filename, content, content_type)
# Get JWT token using same logic as DocumentFileProcessor
# This will handle anonymous JWT creation if needed
diff --git a/src/models/tasks.py b/src/models/tasks.py
index 236927ab..253cabb5 100644
--- a/src/models/tasks.py
+++ b/src/models/tasks.py
@@ -20,7 +20,8 @@ class FileTask:
retry_count: int = 0
created_at: float = field(default_factory=time.time)
updated_at: float = field(default_factory=time.time)
-
+ filename: Optional[str] = None # Original filename for display
+
@property
def duration_seconds(self) -> float:
"""Duration in seconds from creation to last update"""
diff --git a/src/services/task_service.py b/src/services/task_service.py
index be5312a0..eb5825c0 100644
--- a/src/services/task_service.py
+++ b/src/services/task_service.py
@@ -59,6 +59,7 @@ class TaskService:
file_paths: list,
langflow_file_service,
session_manager,
+ original_filenames: list = None,
jwt_token: str = None,
owner_name: str = None,
owner_email: str = None,
@@ -66,6 +67,7 @@ class TaskService:
tweaks: dict = None,
settings: dict = None,
delete_after_ingest: bool = True,
+ replace_duplicates: bool = False,
) -> str:
"""Create a new upload task for Langflow file processing with upload and ingest"""
# Use LangflowFileProcessor with user context
@@ -82,18 +84,31 @@ class TaskService:
tweaks=tweaks,
settings=settings,
delete_after_ingest=delete_after_ingest,
+ replace_duplicates=replace_duplicates,
)
- return await self.create_custom_task(user_id, file_paths, processor)
+ return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
- async def create_custom_task(self, user_id: str, items: list, processor) -> str:
+ async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: list = None) -> str:
"""Create a new task with custom processor for any type of items"""
+ import os
# Store anonymous tasks under a stable key so they can be retrieved later
store_user_id = user_id or AnonymousUser().user_id
task_id = str(uuid.uuid4())
+
+ # Create file tasks with original filenames if provided
+ file_tasks = {}
+ for i, item in enumerate(items):
+ if original_filenames and i < len(original_filenames):
+ filename = original_filenames[i]
+ else:
+ filename = os.path.basename(str(item))
+
+ file_tasks[str(item)] = FileTask(file_path=str(item), filename=filename)
+
upload_task = UploadTask(
task_id=task_id,
total_files=len(items),
- file_tasks={str(item): FileTask(file_path=str(item)) for item in items},
+ file_tasks=file_tasks,
)
# Attach the custom processor to the task
@@ -268,6 +283,7 @@ class TaskService:
"created_at": file_task.created_at,
"updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds,
+ "filename": file_task.filename,
}
# Count running and pending files
@@ -322,6 +338,7 @@ class TaskService:
"created_at": file_task.created_at,
"updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds,
+ "filename": file_task.filename,
}
if file_task.status.value == "running":
From fb29f72598aad139e4c0623d861094a18e7b8fba Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 15:39:51 -0300
Subject: [PATCH 20/39] update task toast
---
frontend/src/contexts/task-context.tsx | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index 26e8ca00..d25130f7 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -205,9 +205,19 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
previousTask && previousTask.status !== "completed" &&
currentTask.status === "completed"
) {
- // Task just completed - show success toast
- toast.success("Task completed successfully", {
- description: `Task ${currentTask.task_id} has finished processing.`,
+ // Task just completed - show success toast with file counts
+ const successfulFiles = currentTask.successful_files || 0;
+ const failedFiles = currentTask.failed_files || 0;
+
+ let description = "";
+ if (failedFiles > 0) {
+ description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully, ${failedFiles} file${failedFiles !== 1 ? 's' : ''} failed`;
+ } else {
+ description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully`;
+ }
+
+ toast.success("Task completed", {
+ description,
action: {
label: "View",
onClick: () => console.log("View task", currentTask.task_id),
From 9336aa287e87032bd7385e3824e548df5de26f0e Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 16:12:09 -0300
Subject: [PATCH 21/39] fixed lint
---
frontend/src/app/knowledge/page.tsx | 110 ++++++++++++++++++----------
1 file changed, 71 insertions(+), 39 deletions(-)
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index c6d254c4..9e933903 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -1,10 +1,15 @@
"use client";
-import type { ColDef } from "ag-grid-community";
+import type { ColDef, GetRowIdParams } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
import { useRouter } from "next/navigation";
-import { type ChangeEvent, useCallback, useRef, useState } from "react";
+import {
+ type ChangeEvent,
+ useCallback,
+ useRef,
+ useState,
+} from "react";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@@ -54,15 +59,10 @@ function SearchPage() {
const deleteDocumentMutation = useDeleteDocument();
- const { data = [], isFetching } = useGetSearchQuery(
+ const { data: searchData = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*",
parsedFilterData,
);
-
- const handleTableSearch = (e: ChangeEvent) => {
- gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
- };
-
// Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return {
@@ -75,60 +75,91 @@ function SearchPage() {
};
});
- const backendFiles = (data as File[]).filter((file) => !taskFilesAsFiles.some((taskFile) => taskFile.filename === file.filename && taskFile.status === "processing"));
+ // Create a map of task files by filename for quick lookup
+ const taskFileMap = new Map(
+ taskFilesAsFiles.map((file) => [file.filename, file]),
+ );
+
+ // Override backend files with task file status if they exist
+ const backendFiles = (searchData as File[])
+ .map((file) => {
+ const taskFile = taskFileMap.get(file.filename);
+ if (taskFile) {
+ // Override backend file with task file data (includes status)
+ return { ...file, ...taskFile };
+ }
+ return file;
+ })
+ .filter((file) => {
+ // Only filter out files that are currently processing AND in taskFiles
+ const taskFile = taskFileMap.get(file.filename);
+ return !taskFile || taskFile.status !== "processing";
+ });
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return (
taskFile.status !== "active" &&
!backendFiles.some(
- (backendFile) => backendFile.filename === taskFile.filename,)
+ (backendFile) => backendFile.filename === taskFile.filename,
+ )
);
});
// Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles];
+ const handleTableSearch = (e: ChangeEvent) => {
+ gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
+ };
+
const gridRef = useRef(null);
const columnDefs = [
{
field: "filename",
headerName: "Source",
- checkboxSelection: (data) => (data?.data?.status || "active") === "active",
+ checkboxSelection: (params: CustomCellRendererProps) =>
+ (params?.data?.status || "active") === "active",
headerCheckboxSelection: true,
initialFlex: 2,
minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps) => {
+ // Read status directly from data on each render
+ const status = data?.status || "active";
+ const isActive = status === "active";
+ console.log(data?.filename, status, "a");
return (
- {((data?.status || "active") !== "active") &&
-
- }
-
{
- if ((data?.status || "active") !== "active") {
- return;
- }
- router.push(
- `/knowledge/chunks?filename=${encodeURIComponent(
- data?.filename ?? "",
- )}`,
- );
- }}
- >
- {getSourceIcon(data?.connector_type)}
-
- {value}
-
-
+
+
+
{
+ if (!isActive) {
+ return;
+ }
+ router.push(
+ `/knowledge/chunks?filename=${encodeURIComponent(
+ data?.filename ?? "",
+ )}`,
+ );
+ }}
+ >
+ {getSourceIcon(data?.connector_type)}
+
+ {value}
+
+
+
);
},
},
{
field: "size",
headerName: "Size",
- valueFormatter: (params) =>
+ valueFormatter: (params: CustomCellRendererProps) =>
params.value ? `${Math.round(params.value / 1024)} KB` : "-",
},
{
@@ -138,13 +169,13 @@ function SearchPage() {
{
field: "owner",
headerName: "Owner",
- valueFormatter: (params) =>
+ valueFormatter: (params: CustomCellRendererProps) =>
params.data?.owner_name || params.data?.owner_email || "—",
},
{
field: "chunkCount",
headerName: "Chunks",
- valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
+ valueFormatter: (params: CustomCellRendererProps) => params.data?.chunkCount?.toString() || "-",
},
{
field: "avgScore",
@@ -162,6 +193,7 @@ function SearchPage() {
field: "status",
headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps) => {
+ console.log(data?.filename, data?.status, "b");
// Default to 'active' status if no status is provided
const status = data?.status || "active";
return ;
@@ -188,7 +220,7 @@ function SearchPage() {
resizable: false,
sortable: false,
initialFlex: 0,
- }
+ },
];
const defaultColDef: ColDef = {
@@ -323,7 +355,7 @@ function SearchPage() {
[]}
defaultColDef={defaultColDef}
loading={isFetching}
ref={gridRef}
@@ -331,7 +363,7 @@ function SearchPage() {
rowSelection="multiple"
rowMultiSelectWithClick={false}
suppressRowClickSelection={true}
- getRowId={(params) => params.data.filename}
+ getRowId={(params: GetRowIdParams) => params.data?.filename}
domLayout="normal"
onSelectionChanged={onSelectionChanged}
noRowsOverlayComponent={() => (
From ca9bee8222bd615501c90e8e7f01913243c3d08d Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 16:12:18 -0300
Subject: [PATCH 22/39] removed file from query when overwriting
---
frontend/components/knowledge-dropdown.tsx | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx
index 0b106360..7fe84259 100644
--- a/frontend/components/knowledge-dropdown.tsx
+++ b/frontend/components/knowledge-dropdown.tsx
@@ -1,5 +1,6 @@
"use client";
+import { useQueryClient } from "@tanstack/react-query";
import {
ChevronDown,
Cloud,
@@ -26,6 +27,7 @@ import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useTask } from "@/contexts/task-context";
import { cn } from "@/lib/utils";
+import type { File as SearchFile } from "@/src/app/api/queries/useGetSearchQuery";
interface KnowledgeDropdownProps {
active?: boolean;
@@ -38,6 +40,7 @@ export function KnowledgeDropdown({
}: KnowledgeDropdownProps) {
const { addTask } = useTask();
const { refetch: refetchTasks } = useGetTasksQuery();
+ const queryClient = useQueryClient();
const router = useRouter();
const [isOpen, setIsOpen] = useState(false);
const [showFolderDialog, setShowFolderDialog] = useState(false);
@@ -326,6 +329,15 @@ export function KnowledgeDropdown({
const handleOverwriteFile = async () => {
if (pendingFile) {
+ // Remove the old file from all search query caches before overwriting
+ queryClient.setQueriesData({ queryKey: ["search"] }, (oldData: []) => {
+ if (!oldData) return oldData;
+ // Filter out the file that's being overwritten
+ return oldData.filter(
+ (file: SearchFile) => file.filename !== pendingFile.name,
+ );
+ });
+
await uploadFile(pendingFile, true);
setPendingFile(null);
setDuplicateFilename("");
@@ -676,7 +688,6 @@ export function KnowledgeDropdown({
From 4de9a0d085848abbab76db05b9f06c9d3d81198f Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 16:12:24 -0300
Subject: [PATCH 23/39] removed unused prop
---
frontend/components/duplicate-handling-dialog.tsx | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/frontend/components/duplicate-handling-dialog.tsx b/frontend/components/duplicate-handling-dialog.tsx
index 2f92ea50..d5cb2edf 100644
--- a/frontend/components/duplicate-handling-dialog.tsx
+++ b/frontend/components/duplicate-handling-dialog.tsx
@@ -15,21 +15,16 @@ import {
interface DuplicateHandlingDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
- filename: string;
onOverwrite: () => void | Promise;
isLoading?: boolean;
}
export const DuplicateHandlingDialog: React.FC<
DuplicateHandlingDialogProps
-> = ({ open, onOpenChange, filename, onOverwrite, isLoading = false }) => {
+> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => {
const handleOverwrite = async () => {
- try {
- await onOverwrite();
- onOpenChange(false);
- } catch (error) {
- // Error handling is done by the parent component
- }
+ await onOverwrite();
+ onOpenChange(false);
};
return (
From 1fdb251a47a7168f9431e00c8f714a9e088db09a Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 16:15:32 -0300
Subject: [PATCH 24/39] UI minor tweaks
---
frontend/components/ui/input.tsx | 2 +-
frontend/src/app/knowledge/chunks/page.tsx | 626 +++++++++++----------
frontend/src/app/knowledge/page.tsx | 3 +-
3 files changed, 316 insertions(+), 315 deletions(-)
diff --git a/frontend/components/ui/input.tsx b/frontend/components/ui/input.tsx
index ffcda454..86e638a1 100644
--- a/frontend/components/ui/input.tsx
+++ b/frontend/components/ui/input.tsx
@@ -44,7 +44,7 @@ const Input = React.forwardRef(
placeholder={placeholder}
className={cn(
"primary-input",
- icon && "pl-9",
+ icon && "!pl-9",
type === "password" && "!pr-8",
icon ? inputClassName : className
)}
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index cb96eddc..7d49a56e 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -1,289 +1,291 @@
"use client";
import { ArrowLeft, Check, Copy, Loader2, Search } from "lucide-react";
-import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation";
+import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
+import { Checkbox } from "@/components/ui/checkbox";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import {
- type ChunkResult,
- type File,
- useGetSearchQuery,
+ type ChunkResult,
+ type File,
+ useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery";
-import { Label } from "@/components/ui/label";
-import { Checkbox } from "@/components/ui/checkbox";
-import { Input } from "@/components/ui/input";
const getFileTypeLabel = (mimetype: string) => {
- if (mimetype === "application/pdf") return "PDF";
- if (mimetype === "text/plain") return "Text";
- if (mimetype === "application/msword") return "Word Document";
- return "Unknown";
+ if (mimetype === "application/pdf") return "PDF";
+ if (mimetype === "text/plain") return "Text";
+ if (mimetype === "application/msword") return "Word Document";
+ return "Unknown";
};
function ChunksPageContent() {
- const router = useRouter();
- const searchParams = useSearchParams();
- const { isMenuOpen } = useTask();
- const { parsedFilterData, isPanelOpen } = useKnowledgeFilter();
+ const router = useRouter();
+ const searchParams = useSearchParams();
+ const { isMenuOpen } = useTask();
+ const { parsedFilterData, isPanelOpen } = useKnowledgeFilter();
- const filename = searchParams.get("filename");
- const [chunks, setChunks] = useState([]);
- const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
- ChunkResult[]
- >([]);
- const [selectedChunks, setSelectedChunks] = useState>(new Set());
- const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
- number | null
- >(null);
+ const filename = searchParams.get("filename");
+ const [chunks, setChunks] = useState([]);
+ const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
+ ChunkResult[]
+ >([]);
+ const [selectedChunks, setSelectedChunks] = useState>(new Set());
+ const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
+ number | null
+ >(null);
- // Calculate average chunk length
- const averageChunkLength = useMemo(
- () =>
- chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
- chunks.length || 0,
- [chunks]
- );
+ // Calculate average chunk length
+ const averageChunkLength = useMemo(
+ () =>
+ chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
+ chunks.length || 0,
+ [chunks],
+ );
- const [selectAll, setSelectAll] = useState(false);
- const [queryInputText, setQueryInputText] = useState(
- parsedFilterData?.query ?? ""
- );
+ const [selectAll, setSelectAll] = useState(false);
+ const [queryInputText, setQueryInputText] = useState(
+ parsedFilterData?.query ?? "",
+ );
- // Use the same search query as the knowledge page, but we'll filter for the specific file
- const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
+ // Use the same search query as the knowledge page, but we'll filter for the specific file
+ const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
- useEffect(() => {
- if (queryInputText === "") {
- setChunksFilteredByQuery(chunks);
- } else {
- setChunksFilteredByQuery(
- chunks.filter((chunk) =>
- chunk.text.toLowerCase().includes(queryInputText.toLowerCase())
- )
- );
- }
- }, [queryInputText, chunks]);
+ useEffect(() => {
+ if (queryInputText === "") {
+ setChunksFilteredByQuery(chunks);
+ } else {
+ setChunksFilteredByQuery(
+ chunks.filter((chunk) =>
+ chunk.text.toLowerCase().includes(queryInputText.toLowerCase()),
+ ),
+ );
+ }
+ }, [queryInputText, chunks]);
- const handleCopy = useCallback((text: string, index: number) => {
- // Trim whitespace and remove new lines/tabs for cleaner copy
- navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, ""));
- setActiveCopiedChunkIndex(index);
- setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds
- }, []);
+ const handleCopy = useCallback((text: string, index: number) => {
+ // Trim whitespace and remove new lines/tabs for cleaner copy
+ navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, ""));
+ setActiveCopiedChunkIndex(index);
+ setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds
+ }, []);
- const fileData = (data as File[]).find(
- (file: File) => file.filename === filename
- );
+ const fileData = (data as File[]).find(
+ (file: File) => file.filename === filename,
+ );
- // Extract chunks for the specific file
- useEffect(() => {
- if (!filename || !(data as File[]).length) {
- setChunks([]);
- return;
- }
+ // Extract chunks for the specific file
+ useEffect(() => {
+ if (!filename || !(data as File[]).length) {
+ setChunks([]);
+ return;
+ }
- setChunks(fileData?.chunks || []);
- }, [data, filename]);
+ setChunks(fileData?.chunks || []);
+ }, [data, filename]);
- // Set selected state for all checkboxes when selectAll changes
- useEffect(() => {
- if (selectAll) {
- setSelectedChunks(new Set(chunks.map((_, index) => index)));
- } else {
- setSelectedChunks(new Set());
- }
- }, [selectAll, setSelectedChunks, chunks]);
+ // Set selected state for all checkboxes when selectAll changes
+ useEffect(() => {
+ if (selectAll) {
+ setSelectedChunks(new Set(chunks.map((_, index) => index)));
+ } else {
+ setSelectedChunks(new Set());
+ }
+ }, [selectAll, setSelectedChunks, chunks]);
- const handleBack = useCallback(() => {
- router.push("/knowledge");
- }, [router]);
+ const handleBack = useCallback(() => {
+ router.push("/knowledge");
+ }, [router]);
- const handleChunkCardCheckboxChange = useCallback(
- (index: number) => {
- setSelectedChunks((prevSelected) => {
- const newSelected = new Set(prevSelected);
- if (newSelected.has(index)) {
- newSelected.delete(index);
- } else {
- newSelected.add(index);
- }
- return newSelected;
- });
- },
- [setSelectedChunks]
- );
+ const handleChunkCardCheckboxChange = useCallback(
+ (index: number) => {
+ setSelectedChunks((prevSelected) => {
+ const newSelected = new Set(prevSelected);
+ if (newSelected.has(index)) {
+ newSelected.delete(index);
+ } else {
+ newSelected.add(index);
+ }
+ return newSelected;
+ });
+ },
+ [setSelectedChunks],
+ );
- if (!filename) {
- return (
-
-
-
-
No file specified
-
- Please select a file from the knowledge page
-
-
-
- );
- }
+ if (!filename) {
+ return (
+
+
+
+
No file specified
+
+ Please select a file from the knowledge page
+
+
+
+ );
+ }
- return (
-
-
- {/* Header */}
-
-
-
-
-
-
- {/* Removes file extension from filename */}
- {filename.replace(/\.[^/.]+$/, "")}
-
-
-
-
- : null}
- id="search-query"
- type="text"
- defaultValue={parsedFilterData?.query}
- value={queryInputText}
- onChange={(e) => setQueryInputText(e.target.value)}
- placeholder="Search chunks..."
- />
-
-
-
- setSelectAll(!!handleSelectAll)
- }
- />
-
- Select all
-
-
-
-
+ return (
+
+
+ {/* Header */}
+
+
+
+
+
+
+ {/* Removes file extension from filename */}
+ {filename.replace(/\.[^/.]+$/, "")}
+
+
+
+
+ : null
+ }
+ id="search-query"
+ type="text"
+ defaultValue={parsedFilterData?.query}
+ value={queryInputText}
+ onChange={(e) => setQueryInputText(e.target.value)}
+ placeholder="Search chunks..."
+ />
+
+
+
+ setSelectAll(!!handleSelectAll)
+ }
+ />
+
+ Select all
+
+
+
+
- {/* Content Area - matches knowledge page structure */}
-
- {isFetching ? (
-
-
-
-
- Loading chunks...
-
-
-
- ) : chunks.length === 0 ? (
-
-
-
-
No chunks found
-
- This file may not have been indexed yet
-
-
-
- ) : (
-
- {chunksFilteredByQuery.map((chunk, index) => (
-
-
-
-
-
- handleChunkCardCheckboxChange(index)
- }
- />
-
-
- Chunk {chunk.page}
-
-
- {chunk.text.length} chars
-
-
- handleCopy(chunk.text, index)}
- variant="ghost"
- size="sm"
- >
- {activeCopiedChunkIndex === index ? (
-
- ) : (
-
- )}
-
-
-
+ {/* Content Area - matches knowledge page structure */}
+
+ {isFetching ? (
+
+
+
+
+ Loading chunks...
+
+
+
+ ) : chunks.length === 0 ? (
+
+
+
+
No chunks found
+
+ This file may not have been indexed yet
+
+
+
+ ) : (
+
+ {chunksFilteredByQuery.map((chunk, index) => (
+
+
+
+
+
+ handleChunkCardCheckboxChange(index)
+ }
+ />
+
+
+ Chunk {chunk.page}
+
+
+ {chunk.text.length} chars
+
+
+ handleCopy(chunk.text, index)}
+ variant="ghost"
+ size="sm"
+ >
+ {activeCopiedChunkIndex === index ? (
+
+ ) : (
+
+ )}
+
+
+
- {/* TODO: Update to use active toggle */}
- {/*
+ {/* TODO: Update to use active toggle */}
+ {/*
Active
*/}
-
-
- {chunk.text}
-
-
- ))}
-
- )}
-
-
- {/* Right panel - Summary (TODO), Technical details, */}
-
-
-
Technical details
-
-
-
Total chunks
-
- {chunks.length}
-
-
-
-
Avg length
-
- {averageChunkLength.toFixed(0)} chars
-
-
- {/* TODO: Uncomment after data is available */}
- {/*
+
+
+ {chunk.text}
+
+
+ ))}
+
+ )}
+
+
+ {/* Right panel - Summary (TODO), Technical details, */}
+
+
+
Technical details
+
+
+
Total chunks
+
+ {chunks.length}
+
+
+
+
Avg length
+
+ {averageChunkLength.toFixed(0)} chars
+
+
+ {/* TODO: Uncomment after data is available */}
+ {/*
Process time
@@ -293,76 +295,76 @@ function ChunksPageContent() {
*/}
-
-
-
-
Original document
-
-
-
Name
-
- {fileData?.filename}
-
-
-
-
Type
-
- {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
-
-
-
-
Size
-
- {fileData?.size
- ? `${Math.round(fileData.size / 1024)} KB`
- : "Unknown"}
-
-
-
-
Uploaded
-
- N/A
-
-
- {/* TODO: Uncomment after data is available */}
- {/*
+
+
+
+
Original document
+
+
+
Name
+
+ {fileData?.filename}
+
+
+
+
Type
+
+ {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
+
+
+
+
Size
+
+ {fileData?.size
+ ? `${Math.round(fileData.size / 1024)} KB`
+ : "Unknown"}
+
+
+
+
Uploaded
+
+ N/A
+
+
+ {/* TODO: Uncomment after data is available */}
+ {/*
Source
*/}
-
-
Updated
-
- N/A
-
-
-
-
-
-
- );
+
+
Updated
+
+ N/A
+
+
+
+
+
+
+ );
}
function ChunksPage() {
- return (
-
-
-
- }
- >
-
-
- );
+ return (
+
+
+
+ }
+ >
+
+
+ );
}
export default function ProtectedChunksPage() {
- return (
-
-
-
- );
+ return (
+
+
+
+ );
}
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index 9e933903..5395d85d 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -310,14 +310,13 @@ function SearchPage() {
)}
From 99cc806a559f526e9e4a837c41096eef19fd3116 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Fri, 3 Oct 2025 13:53:39 -0600
Subject: [PATCH 25/39] fix empty state
---
frontend/src/app/knowledge/chunks/page.tsx | 103 +++++++++++----------
1 file changed, 55 insertions(+), 48 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index cde2c3e8..31b741a4 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -224,10 +224,9 @@ function ChunksPageContent() {
) : chunks.length === 0 ? (
-
-
No chunks found
-
- This file may not have been indexed yet
+
No knowledge
+
+ Clear the knowledge filter or return to the knowledge page
@@ -292,24 +291,29 @@ function ChunksPageContent() {
{/* Right panel - Summary (TODO), Technical details, */}
-
-
-
Technical details
-
-
-
Total chunks
-
- {chunks.length}
-
-
-
-
Avg length
-
- {averageChunkLength.toFixed(0)} chars
-
-
- {/* TODO: Uncomment after data is available */}
- {/*
+ {chunks.length > 0 && (
+
+
+
+ Technical details
+
+
+
+
+ Total chunks
+
+
+ {chunks.length}
+
+
+
+
Avg length
+
+ {averageChunkLength.toFixed(0)} chars
+
+
+ {/* TODO: Uncomment after data is available */}
+ {/*
Process time
@@ -319,51 +323,54 @@ function ChunksPageContent() {
*/}
-
-
-
-
Original document
-
- {/*
+
+
+
+
+ Original document
+
+
+ {/*
Name
{fileData?.filename}
*/}
-
-
Type
-
- {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
-
-
-
-
Size
-
- {fileData?.size
- ? `${Math.round(fileData.size / 1024)} KB`
- : "Unknown"}
-
-
- {/*
+
+
Type
+
+ {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
+
+
+
+
Size
+
+ {fileData?.size
+ ? `${Math.round(fileData.size / 1024)} KB`
+ : "Unknown"}
+
+
+ {/*
Uploaded
N/A
*/}
- {/* TODO: Uncomment after data is available */}
- {/*
+ {/* TODO: Uncomment after data is available */}
+ {/*
Source
*/}
- {/*
+ {/*
Updated
N/A
*/}
-
+
+
-
+ )}
);
}
From 946d3edc89d09af8fabd5e6b1981466d76fae7ee Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 16:54:40 -0300
Subject: [PATCH 26/39] refresh tasks on entering page, make failed files
persist
---
frontend/src/app/knowledge/page.tsx | 7 ++++++-
frontend/src/contexts/task-context.tsx | 3 ++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index 5395d85d..73cc2c7d 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -7,6 +7,7 @@ import { useRouter } from "next/navigation";
import {
type ChangeEvent,
useCallback,
+ useEffect,
useRef,
useState,
} from "react";
@@ -51,7 +52,7 @@ function getSourceIcon(connectorType?: string) {
function SearchPage() {
const router = useRouter();
- const { isMenuOpen, files: taskFiles } = useTask();
+ const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter();
const [selectedRows, setSelectedRows] = useState([]);
@@ -59,6 +60,10 @@ function SearchPage() {
const deleteDocumentMutation = useDeleteDocument();
+ useEffect(() => {
+ refreshTasks();
+ }, [refreshTasks]);
+
const { data: searchData = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*",
parsedFilterData,
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index d25130f7..69a1214f 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -226,7 +226,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
setTimeout(() => {
refetchSearch();
setFiles((prevFiles) =>
- prevFiles.filter((file) => file.task_id !== currentTask.task_id && file.status !== "failed"),
+ prevFiles.filter((file) => file.task_id !== currentTask.task_id || file.status === "failed"),
);
}, 500);
} else if (
@@ -258,6 +258,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
);
const refreshTasks = useCallback(async () => {
+ setFiles([]);
await refetchTasks();
}, [refetchTasks]);
From 509b6c613228fa5ec592533d6a5e69b8bfa76aa2 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 17:06:07 -0300
Subject: [PATCH 27/39] make view button open menu
---
frontend/src/contexts/task-context.tsx | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index 69a1214f..a8693526 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -220,7 +220,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
description,
action: {
label: "View",
- onClick: () => console.log("View task", currentTask.task_id),
+ onClick: () => setIsMenuOpen(true),
},
});
setTimeout(() => {
@@ -252,7 +252,9 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
(_taskId: string) => {
// React Query will automatically handle polling when tasks are active
// Just trigger a refetch to get the latest data
- refetchTasks();
+ setTimeout(() => {
+ refetchTasks();
+ }, 500);
},
[refetchTasks],
);
From 7201a914be856775a0bafc7ef445a8882766d3ae Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Fri, 3 Oct 2025 17:12:36 -0300
Subject: [PATCH 28/39] open tasks menu when clicking view
---
frontend/src/components/task-notification-menu.tsx | 11 +++++++++--
frontend/src/contexts/task-context.tsx | 10 +++++++++-
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/frontend/src/components/task-notification-menu.tsx b/frontend/src/components/task-notification-menu.tsx
index e17f9579..fed7e6f1 100644
--- a/frontend/src/components/task-notification-menu.tsx
+++ b/frontend/src/components/task-notification-menu.tsx
@@ -1,6 +1,6 @@
"use client"
-import { useState } from 'react'
+import { useEffect, useState } from 'react'
import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
@@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge'
import { useTask, Task } from '@/contexts/task-context'
export function TaskNotificationMenu() {
- const { tasks, isFetching, isMenuOpen, cancelTask } = useTask()
+ const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask()
const [isExpanded, setIsExpanded] = useState(false)
+ // Sync local state with context state
+ useEffect(() => {
+ if (isRecentTasksExpanded) {
+ setIsExpanded(true)
+ }
+ }, [isRecentTasksExpanded])
+
// Don't render if menu is closed
if (!isMenuOpen) return null
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index a8693526..f5d0c0c4 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -43,6 +43,8 @@ interface TaskContextType {
isFetching: boolean;
isMenuOpen: boolean;
toggleMenu: () => void;
+ isRecentTasksExpanded: boolean;
+ setRecentTasksExpanded: (expanded: boolean) => void;
// React Query states
isLoading: boolean;
error: Error | null;
@@ -53,6 +55,7 @@ const TaskContext = createContext(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) {
const [files, setFiles] = useState([]);
const [isMenuOpen, setIsMenuOpen] = useState(false);
+ const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false);
const previousTasksRef = useRef([]);
const { isAuthenticated, isNoAuthMode } = useAuth();
@@ -220,7 +223,10 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
description,
action: {
label: "View",
- onClick: () => setIsMenuOpen(true),
+ onClick: () => {
+ setIsMenuOpen(true);
+ setIsRecentTasksExpanded(true);
+ },
},
});
setTimeout(() => {
@@ -302,6 +308,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
isFetching,
isMenuOpen,
toggleMenu,
+ isRecentTasksExpanded,
+ setRecentTasksExpanded: setIsRecentTasksExpanded,
isLoading,
error,
};
From 5ea70cc7797b652eb829617d466d18e706a8adae Mon Sep 17 00:00:00 2001
From: Edwin Jose
Date: Fri, 3 Oct 2025 16:17:05 -0400
Subject: [PATCH 29/39] Refactor file upload task to use filename mapping
(#195)
Changed the handling of original filenames in Langflow upload tasks to use a mapping from file paths to original filenames instead of a list. Updated both the API router and TaskService to support this change, improving reliability when associating uploaded files with their original names.
---
src/api/router.py | 83 ++++++++++++++++++++----------------
src/services/task_service.py | 25 ++++++-----
2 files changed, 60 insertions(+), 48 deletions(-)
diff --git a/src/api/router.py b/src/api/router.py
index 327757be..15a9b116 100644
--- a/src/api/router.py
+++ b/src/api/router.py
@@ -13,27 +13,27 @@ logger = get_logger(__name__)
async def upload_ingest_router(
- request: Request,
- document_service=None,
- langflow_file_service=None,
+ request: Request,
+ document_service=None,
+ langflow_file_service=None,
session_manager=None,
- task_service=None
+ task_service=None,
):
"""
Router endpoint that automatically routes upload requests based on configuration.
-
+
- If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload)
- If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service
-
+
This provides a single endpoint that users can call regardless of backend configuration.
All langflow uploads are processed as background tasks for better scalability.
"""
try:
logger.debug(
- "Router upload_ingest endpoint called",
- disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW
+ "Router upload_ingest endpoint called",
+ disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
)
-
+
# Route based on configuration
if DISABLE_INGEST_WITH_LANGFLOW:
# Route to traditional OpenRAG upload
@@ -42,8 +42,10 @@ async def upload_ingest_router(
else:
# Route to Langflow upload and ingest using task service
logger.debug("Routing to Langflow upload-ingest pipeline via task service")
- return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service)
-
+ return await langflow_upload_ingest_task(
+ request, langflow_file_service, session_manager, task_service
+ )
+
except Exception as e:
logger.error("Error in upload_ingest_router", error=str(e))
error_msg = str(e)
@@ -57,17 +59,14 @@ async def upload_ingest_router(
async def langflow_upload_ingest_task(
- request: Request,
- langflow_file_service,
- session_manager,
- task_service
+ request: Request, langflow_file_service, session_manager, task_service
):
"""Task-based langflow upload and ingest for single/multiple files"""
try:
logger.debug("Task-based langflow upload_ingest endpoint called")
form = await request.form()
upload_files = form.getlist("file")
-
+
if not upload_files or len(upload_files) == 0:
logger.error("No files provided in task-based upload request")
return JSONResponse({"error": "Missing files"}, status_code=400)
@@ -82,10 +81,11 @@ async def langflow_upload_ingest_task(
# Parse JSON fields if provided
settings = None
tweaks = None
-
+
if settings_json:
try:
import json
+
settings = json.loads(settings_json)
except json.JSONDecodeError as e:
logger.error("Invalid settings JSON", error=str(e))
@@ -94,6 +94,7 @@ async def langflow_upload_ingest_task(
if tweaks_json:
try:
import json
+
tweaks = json.loads(tweaks_json)
except json.JSONDecodeError as e:
logger.error("Invalid tweaks JSON", error=str(e))
@@ -107,11 +108,14 @@ async def langflow_upload_ingest_task(
jwt_token = getattr(request.state, "jwt_token", None)
if not user_id:
- return JSONResponse({"error": "User authentication required"}, status_code=401)
+ return JSONResponse(
+ {"error": "User authentication required"}, status_code=401
+ )
# Create temporary files for task processing
import tempfile
import os
+
temp_file_paths = []
original_filenames = []
@@ -132,7 +136,7 @@ async def langflow_upload_ingest_task(
temp_path = os.path.join(temp_dir, safe_filename)
# Write content to temp file
- with open(temp_path, 'wb') as temp_file:
+ with open(temp_path, "wb") as temp_file:
temp_file.write(content)
temp_file_paths.append(temp_path)
@@ -143,22 +147,22 @@ async def langflow_upload_ingest_task(
user_id=user_id,
has_settings=bool(settings),
has_tweaks=bool(tweaks),
- delete_after_ingest=delete_after_ingest
+ delete_after_ingest=delete_after_ingest,
)
# Create langflow upload task
- print(f"tweaks: {tweaks}")
- print(f"settings: {settings}")
- print(f"jwt_token: {jwt_token}")
- print(f"user_name: {user_name}")
- print(f"user_email: {user_email}")
- print(f"session_id: {session_id}")
- print(f"delete_after_ingest: {delete_after_ingest}")
- print(f"temp_file_paths: {temp_file_paths}")
+ logger.debug(
+ f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}",
+ )
+ # Create a map between temp_file_paths and original_filenames
+ file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames))
+ logger.debug(
+ f"File path to original filename map: {file_path_to_original_filename}",
+ )
task_id = await task_service.create_langflow_upload_task(
user_id=user_id,
file_paths=temp_file_paths,
- original_filenames=original_filenames,
+ original_filenames=file_path_to_original_filename,
langflow_file_service=langflow_file_service,
session_manager=session_manager,
jwt_token=jwt_token,
@@ -172,20 +176,24 @@ async def langflow_upload_ingest_task(
)
logger.debug("Langflow upload task created successfully", task_id=task_id)
-
- return JSONResponse({
- "task_id": task_id,
- "message": f"Langflow upload task created for {len(upload_files)} file(s)",
- "file_count": len(upload_files)
- }, status_code=202) # 202 Accepted for async processing
-
+
+ return JSONResponse(
+ {
+ "task_id": task_id,
+ "message": f"Langflow upload task created for {len(upload_files)} file(s)",
+ "file_count": len(upload_files),
+ },
+ status_code=202,
+ ) # 202 Accepted for async processing
+
except Exception:
# Clean up temp files on error
from utils.file_utils import safe_unlink
+
for temp_path in temp_file_paths:
safe_unlink(temp_path)
raise
-
+
except Exception as e:
logger.error(
"Task-based langflow upload_ingest endpoint failed",
@@ -193,5 +201,6 @@ async def langflow_upload_ingest_task(
error=str(e),
)
import traceback
+
logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500)
diff --git a/src/services/task_service.py b/src/services/task_service.py
index eb5825c0..735ad483 100644
--- a/src/services/task_service.py
+++ b/src/services/task_service.py
@@ -1,6 +1,5 @@
import asyncio
import random
-from typing import Dict, Optional
import time
import uuid
@@ -59,7 +58,7 @@ class TaskService:
file_paths: list,
langflow_file_service,
session_manager,
- original_filenames: list = None,
+ original_filenames: dict | None = None,
jwt_token: str = None,
owner_name: str = None,
owner_email: str = None,
@@ -88,7 +87,7 @@ class TaskService:
)
return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
- async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: list = None) -> str:
+ async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str:
"""Create a new task with custom processor for any type of items"""
import os
# Store anonymous tasks under a stable key so they can be retrieved later
@@ -96,14 +95,18 @@ class TaskService:
task_id = str(uuid.uuid4())
# Create file tasks with original filenames if provided
- file_tasks = {}
- for i, item in enumerate(items):
- if original_filenames and i < len(original_filenames):
- filename = original_filenames[i]
- else:
- filename = os.path.basename(str(item))
-
- file_tasks[str(item)] = FileTask(file_path=str(item), filename=filename)
+ normalized_originals = (
+ {str(k): v for k, v in original_filenames.items()} if original_filenames else {}
+ )
+ file_tasks = {
+ str(item): FileTask(
+ file_path=str(item),
+ filename=normalized_originals.get(
+ str(item), os.path.basename(str(item))
+ ),
+ )
+ for item in items
+ }
upload_task = UploadTask(
task_id=task_id,
From eef9e380dc3f76956155fc3cdad80d48a2a80cbc Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Mon, 6 Oct 2025 09:45:21 -0300
Subject: [PATCH 30/39] Added opensearch queries
---
src/utils/opensearch_queries.py | 55 +++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
create mode 100644 src/utils/opensearch_queries.py
diff --git a/src/utils/opensearch_queries.py b/src/utils/opensearch_queries.py
new file mode 100644
index 00000000..f29c6283
--- /dev/null
+++ b/src/utils/opensearch_queries.py
@@ -0,0 +1,55 @@
+"""
+Utility functions for constructing OpenSearch queries consistently.
+"""
+from typing import Union, List
+
+
+def build_filename_query(filename: str) -> dict:
+ """
+ Build a standardized query for finding documents by filename.
+
+ Args:
+ filename: The exact filename to search for
+
+ Returns:
+ A dict containing the OpenSearch query body
+ """
+ return {
+ "term": {
+ "filename": filename
+ }
+ }
+
+
+def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict:
+ """
+ Build a complete search body for checking if a filename exists.
+
+ Args:
+ filename: The exact filename to search for
+ size: Number of results to return (default: 1)
+ source: Whether to include source fields, or list of specific fields to include (default: False)
+
+ Returns:
+ A dict containing the complete OpenSearch search body
+ """
+ return {
+ "query": build_filename_query(filename),
+ "size": size,
+ "_source": source
+ }
+
+
+def build_filename_delete_body(filename: str) -> dict:
+ """
+ Build a delete-by-query body for removing all documents with a filename.
+
+ Args:
+ filename: The exact filename to delete
+
+ Returns:
+ A dict containing the OpenSearch delete-by-query body
+ """
+ return {
+ "query": build_filename_query(filename)
+ }
From 95bdc59af0bad389fca636b92f30621cbe2961b3 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Mon, 6 Oct 2025 09:45:33 -0300
Subject: [PATCH 31/39] Use opensearch queries on documents and processors
---
src/api/documents.py | 32 ++++++--------------------------
src/models/processors.py | 20 ++++----------------
2 files changed, 10 insertions(+), 42 deletions(-)
diff --git a/src/api/documents.py b/src/api/documents.py
index a367c9fe..048f746a 100644
--- a/src/api/documents.py
+++ b/src/api/documents.py
@@ -23,20 +23,9 @@ async def check_filename_exists(request: Request, document_service, session_mana
)
# Search for any document with this exact filename
- # Try both .keyword (exact match) and regular field (analyzed match)
- search_body = {
- "query": {
- "bool": {
- "should": [
- {"term": {"filename.keyword": filename}},
- {"term": {"filename": filename}}
- ],
- "minimum_should_match": 1
- }
- },
- "size": 1,
- "_source": ["filename"]
- }
+ from utils.opensearch_queries import build_filename_search_body
+
+ search_body = build_filename_search_body(filename, size=1, source=["filename"])
logger.debug(f"Checking filename existence: {filename}")
@@ -83,18 +72,9 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
)
# Delete by query to remove all chunks of this document
- # Use both .keyword and regular field to ensure we catch all variations
- delete_query = {
- "query": {
- "bool": {
- "should": [
- {"term": {"filename.keyword": filename}},
- {"term": {"filename": filename}}
- ],
- "minimum_should_match": 1
- }
- }
- }
+ from utils.opensearch_queries import build_filename_delete_body
+
+ delete_query = build_filename_delete_body(filename)
logger.debug(f"Deleting documents with filename: {filename}")
diff --git a/src/models/processors.py b/src/models/processors.py
index 6d7b74b4..4a5d96b5 100644
--- a/src/models/processors.py
+++ b/src/models/processors.py
@@ -65,6 +65,7 @@ class TaskProcessor:
Returns True if any chunks with this filename exist.
"""
from config.settings import INDEX_NAME
+ from utils.opensearch_queries import build_filename_search_body
import asyncio
max_retries = 3
@@ -73,15 +74,7 @@ class TaskProcessor:
for attempt in range(max_retries):
try:
# Search for any document with this exact filename
- search_body = {
- "query": {
- "term": {
- "filename.keyword": filename
- }
- },
- "size": 1,
- "_source": False
- }
+ search_body = build_filename_search_body(filename, size=1, source=False)
response = await opensearch_client.search(
index=INDEX_NAME,
@@ -126,16 +119,11 @@ class TaskProcessor:
Delete all chunks of a document with the given filename from OpenSearch.
"""
from config.settings import INDEX_NAME
+ from utils.opensearch_queries import build_filename_delete_body
try:
# Delete all documents with this filename
- delete_body = {
- "query": {
- "term": {
- "filename.keyword": filename
- }
- }
- }
+ delete_body = build_filename_delete_body(filename)
response = await opensearch_client.delete_by_query(
index=INDEX_NAME,
From 1e3f28f9ed1295f0fd737131f7e3b5f4e0fbd61e Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Mon, 6 Oct 2025 11:51:13 -0300
Subject: [PATCH 32/39] removed console logs
---
frontend/src/contexts/task-context.tsx | 39 ++++++++++++++++++--------
1 file changed, 27 insertions(+), 12 deletions(-)
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index f5d0c0c4..b3d1f647 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -119,7 +119,6 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
previousTasksRef.current = tasks;
return;
}
- console.log(tasks, previousTasksRef.current);
// Check for task status changes by comparing with previous tasks
tasks.forEach((currentTask) => {
@@ -128,18 +127,22 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
);
// Only show toasts if we have previous data and status has changed
- if (((previousTask && previousTask.status !== currentTask.status) || (!previousTask && previousTasksRef.current.length !== 0))) {
- console.log("task status changed", currentTask.status);
+ if (
+ (previousTask && previousTask.status !== currentTask.status) ||
+ (!previousTask && previousTasksRef.current.length !== 0)
+ ) {
// Process files from failed task and add them to files list
if (currentTask.files && typeof currentTask.files === "object") {
- console.log("processing files", currentTask.files);
const taskFileEntries = Object.entries(currentTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
// Use the filename from backend if available, otherwise extract from path
- const fileName = (fileInfo as any).filename || filePath.split("/").pop() || filePath;
+ const fileName =
+ (fileInfo as any).filename ||
+ filePath.split("/").pop() ||
+ filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
@@ -205,7 +208,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
});
}
if (
- previousTask && previousTask.status !== "completed" &&
+ previousTask &&
+ previousTask.status !== "completed" &&
currentTask.status === "completed"
) {
// Task just completed - show success toast with file counts
@@ -214,9 +218,15 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
let description = "";
if (failedFiles > 0) {
- description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully, ${failedFiles} file${failedFiles !== 1 ? 's' : ''} failed`;
+ description = `${successfulFiles} file${
+ successfulFiles !== 1 ? "s" : ""
+ } uploaded successfully, ${failedFiles} file${
+ failedFiles !== 1 ? "s" : ""
+ } failed`;
} else {
- description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully`;
+ description = `${successfulFiles} file${
+ successfulFiles !== 1 ? "s" : ""
+ } uploaded successfully`;
}
toast.success("Task completed", {
@@ -230,13 +240,18 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
},
});
setTimeout(() => {
- refetchSearch();
- setFiles((prevFiles) =>
- prevFiles.filter((file) => file.task_id !== currentTask.task_id || file.status === "failed"),
+ setFiles((prevFiles) =>
+ prevFiles.filter(
+ (file) =>
+ file.task_id !== currentTask.task_id ||
+ file.status === "failed",
+ ),
);
+ refetchSearch();
}, 500);
} else if (
- previousTask && previousTask.status !== "failed" &&
+ previousTask &&
+ previousTask.status !== "failed" &&
previousTask.status !== "error" &&
(currentTask.status === "failed" || currentTask.status === "error")
) {
From 640dbcee20b46d9d1eeaa7dc9227ecfc4e2b16e3 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Mon, 6 Oct 2025 11:52:18 -0300
Subject: [PATCH 33/39] remove unused function
---
frontend/src/contexts/task-context.tsx | 6 ------
1 file changed, 6 deletions(-)
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index b3d1f647..12ad3c24 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -36,7 +36,6 @@ interface TaskContextType {
files: TaskFile[];
addTask: (taskId: string) => void;
addFiles: (files: Partial[], taskId: string) => void;
- removeTask: (taskId: string) => void;
refreshTasks: () => Promise;
cancelTask: (taskId: string) => Promise;
isPolling: boolean;
@@ -285,10 +284,6 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
await refetchTasks();
}, [refetchTasks]);
- const removeTask = useCallback((_taskId: string) => {
- // This is now handled by React Query automatically
- // Tasks will be removed from the list when they're no longer returned by the API
- }, []);
const cancelTask = useCallback(
async (taskId: string) => {
@@ -316,7 +311,6 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
files,
addTask,
addFiles,
- removeTask,
refreshTasks,
cancelTask,
isPolling,
From 8213f0f6136242e160df49597064d050b30db5e3 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Mon, 6 Oct 2025 08:53:09 -0600
Subject: [PATCH 34/39] revert
---
frontend/src/components/cloud-picker/picker-header.tsx | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/frontend/src/components/cloud-picker/picker-header.tsx b/frontend/src/components/cloud-picker/picker-header.tsx
index e0d9cfa4..54407aa7 100644
--- a/frontend/src/components/cloud-picker/picker-header.tsx
+++ b/frontend/src/components/cloud-picker/picker-header.tsx
@@ -51,13 +51,19 @@ export const PickerHeader = ({
Select files from {getProviderName(provider)} to ingest.
- {isPickerOpen ? "Opening picker..." : "Add files"}
+ Add Files
+
+ csv, json, pdf,{" "}
+
+16 more {" "}
+
150 MB max
+
);
From c2f50fe139563453b8edd5cf311095d0ab71a9e3 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Mon, 6 Oct 2025 08:53:39 -0600
Subject: [PATCH 35/39] revert
---
frontend/src/components/cloud-picker/picker-header.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/frontend/src/components/cloud-picker/picker-header.tsx b/frontend/src/components/cloud-picker/picker-header.tsx
index 54407aa7..05dcaebd 100644
--- a/frontend/src/components/cloud-picker/picker-header.tsx
+++ b/frontend/src/components/cloud-picker/picker-header.tsx
@@ -57,7 +57,7 @@ export const PickerHeader = ({
className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
>
- Add Files
+ {isPickerOpen ? "Opening Picker..." : "Add Files"}
csv, json, pdf,{" "}
From 47b84a3d8a2e74d7db88ae578a328a6e60d10214 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Mon, 6 Oct 2025 08:55:12 -0600
Subject: [PATCH 36/39] revert
---
frontend/src/app/upload/[provider]/page.tsx | 659 ++++++++++----------
1 file changed, 328 insertions(+), 331 deletions(-)
diff --git a/frontend/src/app/upload/[provider]/page.tsx b/frontend/src/app/upload/[provider]/page.tsx
index 8e0a306c..7c72ec3d 100644
--- a/frontend/src/app/upload/[provider]/page.tsx
+++ b/frontend/src/app/upload/[provider]/page.tsx
@@ -12,370 +12,367 @@ import { useTask } from "@/contexts/task-context";
// CloudFile interface is now imported from the unified cloud picker
interface CloudConnector {
- id: string;
- name: string;
- description: string;
- status: "not_connected" | "connecting" | "connected" | "error";
- type: string;
- connectionId?: string;
- clientId: string;
- hasAccessToken: boolean;
- accessTokenError?: string;
+ id: string;
+ name: string;
+ description: string;
+ status: "not_connected" | "connecting" | "connected" | "error";
+ type: string;
+ connectionId?: string;
+ clientId: string;
+ hasAccessToken: boolean;
+ accessTokenError?: string;
}
export default function UploadProviderPage() {
- const params = useParams();
- const router = useRouter();
- const provider = params.provider as string;
- const { addTask, tasks } = useTask();
+ const params = useParams();
+ const router = useRouter();
+ const provider = params.provider as string;
+ const { addTask, tasks } = useTask();
- const [connector, setConnector] = useState(null);
- const [isLoading, setIsLoading] = useState(true);
- const [error, setError] = useState(null);
- const [accessToken, setAccessToken] = useState(null);
- const [selectedFiles, setSelectedFiles] = useState([]);
- const [isIngesting, setIsIngesting] = useState(false);
- const [currentSyncTaskId, setCurrentSyncTaskId] = useState(
- null
- );
- const [ingestSettings, setIngestSettings] = useState({
- chunkSize: 1000,
- chunkOverlap: 200,
- ocr: false,
- pictureDescriptions: false,
- embeddingModel: "text-embedding-3-small",
- });
+ const [connector, setConnector] = useState(null);
+ const [isLoading, setIsLoading] = useState(true);
+ const [error, setError] = useState(null);
+ const [accessToken, setAccessToken] = useState(null);
+ const [selectedFiles, setSelectedFiles] = useState([]);
+ const [isIngesting, setIsIngesting] = useState(false);
+ const [currentSyncTaskId, setCurrentSyncTaskId] = useState(
+ null,
+ );
+ const [ingestSettings, setIngestSettings] = useState({
+ chunkSize: 1000,
+ chunkOverlap: 200,
+ ocr: false,
+ pictureDescriptions: false,
+ embeddingModel: "text-embedding-3-small",
+ });
- useEffect(() => {
- const fetchConnectorInfo = async () => {
- setIsLoading(true);
- setError(null);
+ useEffect(() => {
+ const fetchConnectorInfo = async () => {
+ setIsLoading(true);
+ setError(null);
- try {
- // Fetch available connectors to validate the provider
- const connectorsResponse = await fetch("/api/connectors");
- if (!connectorsResponse.ok) {
- throw new Error("Failed to load connectors");
- }
+ try {
+ // Fetch available connectors to validate the provider
+ const connectorsResponse = await fetch("/api/connectors");
+ if (!connectorsResponse.ok) {
+ throw new Error("Failed to load connectors");
+ }
- const connectorsResult = await connectorsResponse.json();
- const providerInfo = connectorsResult.connectors[provider];
+ const connectorsResult = await connectorsResponse.json();
+ const providerInfo = connectorsResult.connectors[provider];
- if (!providerInfo || !providerInfo.available) {
- setError(
- `Cloud provider "${provider}" is not available or configured.`
- );
- return;
- }
+ if (!providerInfo || !providerInfo.available) {
+ setError(
+ `Cloud provider "${provider}" is not available or configured.`,
+ );
+ return;
+ }
- // Check connector status
- const statusResponse = await fetch(
- `/api/connectors/${provider}/status`
- );
- if (!statusResponse.ok) {
- throw new Error(`Failed to check ${provider} status`);
- }
+ // Check connector status
+ const statusResponse = await fetch(
+ `/api/connectors/${provider}/status`,
+ );
+ if (!statusResponse.ok) {
+ throw new Error(`Failed to check ${provider} status`);
+ }
- const statusData = await statusResponse.json();
- const connections = statusData.connections || [];
- const activeConnection = connections.find(
- (conn: { is_active: boolean; connection_id: string }) =>
- conn.is_active
- );
- const isConnected = activeConnection !== undefined;
+ const statusData = await statusResponse.json();
+ const connections = statusData.connections || [];
+ const activeConnection = connections.find(
+ (conn: { is_active: boolean; connection_id: string }) =>
+ conn.is_active,
+ );
+ const isConnected = activeConnection !== undefined;
- let hasAccessToken = false;
- let accessTokenError: string | undefined;
+ let hasAccessToken = false;
+ let accessTokenError: string | undefined;
- // Try to get access token for connected connectors
- if (isConnected && activeConnection) {
- try {
- const tokenResponse = await fetch(
- `/api/connectors/${provider}/token?connection_id=${activeConnection.connection_id}`
- );
- if (tokenResponse.ok) {
- const tokenData = await tokenResponse.json();
- if (tokenData.access_token) {
- hasAccessToken = true;
- setAccessToken(tokenData.access_token);
- }
- } else {
- const errorData = await tokenResponse
- .json()
- .catch(() => ({ error: "Token unavailable" }));
- accessTokenError = errorData.error || "Access token unavailable";
- }
- } catch {
- accessTokenError = "Failed to fetch access token";
- }
- }
+ // Try to get access token for connected connectors
+ if (isConnected && activeConnection) {
+ try {
+ const tokenResponse = await fetch(
+ `/api/connectors/${provider}/token?connection_id=${activeConnection.connection_id}`,
+ );
+ if (tokenResponse.ok) {
+ const tokenData = await tokenResponse.json();
+ if (tokenData.access_token) {
+ hasAccessToken = true;
+ setAccessToken(tokenData.access_token);
+ }
+ } else {
+ const errorData = await tokenResponse
+ .json()
+ .catch(() => ({ error: "Token unavailable" }));
+ accessTokenError = errorData.error || "Access token unavailable";
+ }
+ } catch {
+ accessTokenError = "Failed to fetch access token";
+ }
+ }
- setConnector({
- id: provider,
- name: providerInfo.name,
- description: providerInfo.description,
- status: isConnected ? "connected" : "not_connected",
- type: provider,
- connectionId: activeConnection?.connection_id,
- clientId: activeConnection?.client_id,
- hasAccessToken,
- accessTokenError,
- });
- } catch (error) {
- console.error("Failed to load connector info:", error);
- setError(
- error instanceof Error
- ? error.message
- : "Failed to load connector information"
- );
- } finally {
- setIsLoading(false);
- }
- };
+ setConnector({
+ id: provider,
+ name: providerInfo.name,
+ description: providerInfo.description,
+ status: isConnected ? "connected" : "not_connected",
+ type: provider,
+ connectionId: activeConnection?.connection_id,
+ clientId: activeConnection?.client_id,
+ hasAccessToken,
+ accessTokenError,
+ });
+ } catch (error) {
+ console.error("Failed to load connector info:", error);
+ setError(
+ error instanceof Error
+ ? error.message
+ : "Failed to load connector information",
+ );
+ } finally {
+ setIsLoading(false);
+ }
+ };
- if (provider) {
- fetchConnectorInfo();
- }
- }, [provider]);
+ if (provider) {
+ fetchConnectorInfo();
+ }
+ }, [provider]);
- // Watch for sync task completion and redirect
- useEffect(() => {
- if (!currentSyncTaskId) return;
+ // Watch for sync task completion and redirect
+ useEffect(() => {
+ if (!currentSyncTaskId) return;
- const currentTask = tasks.find(
- (task) => task.task_id === currentSyncTaskId
- );
+ const currentTask = tasks.find(
+ (task) => task.task_id === currentSyncTaskId,
+ );
- if (currentTask && currentTask.status === "completed") {
- // Task completed successfully, show toast and redirect
- setIsIngesting(false);
- setTimeout(() => {
- router.push("/knowledge");
- }, 2000); // 2 second delay to let user see toast
- } else if (currentTask && currentTask.status === "failed") {
- // Task failed, clear the tracking but don't redirect
- setIsIngesting(false);
- setCurrentSyncTaskId(null);
- }
- }, [tasks, currentSyncTaskId, router]);
+ if (currentTask && currentTask.status === "completed") {
+ // Task completed successfully, show toast and redirect
+ setIsIngesting(false);
+ setTimeout(() => {
+ router.push("/knowledge");
+ }, 2000); // 2 second delay to let user see toast
+ } else if (currentTask && currentTask.status === "failed") {
+ // Task failed, clear the tracking but don't redirect
+ setIsIngesting(false);
+ setCurrentSyncTaskId(null);
+ }
+ }, [tasks, currentSyncTaskId, router]);
- const handleFileSelected = (files: CloudFile[]) => {
- setSelectedFiles(files);
- console.log(`Selected ${files.length} files from ${provider}:`, files);
- // You can add additional handling here like triggering sync, etc.
- };
+ const handleFileSelected = (files: CloudFile[]) => {
+ setSelectedFiles(files);
+ console.log(`Selected ${files.length} files from ${provider}:`, files);
+ // You can add additional handling here like triggering sync, etc.
+ };
- const handleSync = async (connector: CloudConnector) => {
- if (!connector.connectionId || selectedFiles.length === 0) return;
+ const handleSync = async (connector: CloudConnector) => {
+ if (!connector.connectionId || selectedFiles.length === 0) return;
- setIsIngesting(true);
+ setIsIngesting(true);
- try {
- const syncBody: {
- connection_id: string;
- max_files?: number;
- selected_files?: string[];
- settings?: IngestSettings;
- } = {
- connection_id: connector.connectionId,
- selected_files: selectedFiles.map((file) => file.id),
- settings: ingestSettings,
- };
+ try {
+ const syncBody: {
+ connection_id: string;
+ max_files?: number;
+ selected_files?: string[];
+ settings?: IngestSettings;
+ } = {
+ connection_id: connector.connectionId,
+ selected_files: selectedFiles.map((file) => file.id),
+ settings: ingestSettings,
+ };
- const response = await fetch(`/api/connectors/${connector.type}/sync`, {
- method: "POST",
- headers: {
- "Content-Type": "application/json",
- },
- body: JSON.stringify(syncBody),
- });
+ const response = await fetch(`/api/connectors/${connector.type}/sync`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify(syncBody),
+ });
- const result = await response.json();
+ const result = await response.json();
- if (response.status === 201) {
- const taskIds = result.task_ids;
- if (taskIds && taskIds.length > 0) {
- const taskId = taskIds[0]; // Use the first task ID
- addTask(taskId);
- setCurrentSyncTaskId(taskId);
- }
- } else {
- console.error("Sync failed:", result.error);
- }
- } catch (error) {
- console.error("Sync error:", error);
- setIsIngesting(false);
- }
- };
+ if (response.status === 201) {
+ const taskIds = result.task_ids;
+ if (taskIds && taskIds.length > 0) {
+ const taskId = taskIds[0]; // Use the first task ID
+ addTask(taskId);
+ setCurrentSyncTaskId(taskId);
+ }
+ } else {
+ console.error("Sync failed:", result.error);
+ }
+ } catch (error) {
+ console.error("Sync error:", error);
+ setIsIngesting(false);
+ }
+ };
- const getProviderDisplayName = () => {
- const nameMap: { [key: string]: string } = {
- google_drive: "Google Drive",
- onedrive: "OneDrive",
- sharepoint: "SharePoint",
- };
- return nameMap[provider] || provider;
- };
+ const getProviderDisplayName = () => {
+ const nameMap: { [key: string]: string } = {
+ google_drive: "Google Drive",
+ onedrive: "OneDrive",
+ sharepoint: "SharePoint",
+ };
+ return nameMap[provider] || provider;
+ };
- if (isLoading) {
- return (
-
-
-
-
-
Loading {getProviderDisplayName()} connector...
-
-
-
- );
- }
+ if (isLoading) {
+ return (
+
+
+
+
+
Loading {getProviderDisplayName()} connector...
+
+
+
+ );
+ }
- if (error || !connector) {
- return (
-
-
-
router.back()}
- className="mb-4"
- >
-
- Back
-
-
+ if (error || !connector) {
+ return (
+
+
+
router.back()}
+ className="mb-4"
+ >
+
+ Back
+
+
-
-
-
-
- Provider Not Available
-
-
{error}
-
router.push("/settings")}>
- Configure Connectors
-
-
-
-
- );
- }
+
+
+
+
+ Provider Not Available
+
+
{error}
+
router.push("/settings")}>
+ Configure Connectors
+
+
+
+
+ );
+ }
- if (connector.status !== "connected") {
- return (
-
-
-
router.back()}
- className="mb-4"
- >
-
- Back
-
-
+ if (connector.status !== "connected") {
+ return (
+
+
+
router.back()}
+ className="mb-4"
+ >
+
+ Back
+
+
-
-
-
-
- {connector.name} Not Connected
-
-
- You need to connect your {connector.name} account before you can
- select files.
-
-
router.push("/settings")}>
- Connect {connector.name}
-
-
-
-
- );
- }
+
+
+
+
+ {connector.name} Not Connected
+
+
+ You need to connect your {connector.name} account before you can
+ select files.
+
+
router.push("/settings")}>
+ Connect {connector.name}
+
+
+
+
+ );
+ }
- if (!connector.hasAccessToken) {
- return (
-
-
-
router.back()}
- className="mb-4"
- >
-
- Back
-
-
+ if (!connector.hasAccessToken) {
+ return (
+
+
+
router.back()}
+ className="mb-4"
+ >
+
+ Back
+
+
-
-
-
-
- Access Token Required
-
-
- {connector.accessTokenError ||
- `Unable to get access token for ${connector.name}. Try reconnecting your account.`}
-
-
router.push("/settings")}>
- Reconnect {connector.name}
-
-
-
-
- );
- }
+
+
+
+
+ Access Token Required
+
+
+ {connector.accessTokenError ||
+ `Unable to get access token for ${connector.name}. Try reconnecting your account.`}
+
+
router.push("/settings")}>
+ Reconnect {connector.name}
+
+
+
+
+ );
+ }
- return (
-
-
-
router.back()} size="icon">
-
-
-
- Add from {getProviderDisplayName()}
-
-
+ return (
+
+
+
router.back()}>
+
+
+
+ Add from {getProviderDisplayName()}
+
+
-
-
-
+
+
+
-
-
- router.back()}
- >
- Back
-
- handleSync(connector)}
- disabled={selectedFiles.length === 0 || isIngesting}
- >
- {isIngesting ? (
- <>
- Ingesting {selectedFiles.length} file
- {selectedFiles.length > 1 ? "s" : ""}...
- >
- ) : (
- <>Ingest files>
- )}
-
-
-
-
- );
+
+
+ router.back()}
+ >
+ Back
+
+ handleSync(connector)}
+ disabled={selectedFiles.length === 0 || isIngesting}
+ >
+ {isIngesting ? (
+ <>Ingesting {selectedFiles.length} Files...>
+ ) : (
+ <>Start ingest>
+ )}
+
+
+
+
+ );
}
From 641a24aa0664cf3a1f33f98da2ecabd85127f2ed Mon Sep 17 00:00:00 2001
From: Lucas Oliveira
Date: Mon, 6 Oct 2025 11:57:51 -0300
Subject: [PATCH 37/39] removed unused file
---
.../app/api/queries/useGetTaskStatusQuery.ts | 80 -------------------
1 file changed, 80 deletions(-)
delete mode 100644 frontend/src/app/api/queries/useGetTaskStatusQuery.ts
diff --git a/frontend/src/app/api/queries/useGetTaskStatusQuery.ts b/frontend/src/app/api/queries/useGetTaskStatusQuery.ts
deleted file mode 100644
index 17cd2d16..00000000
--- a/frontend/src/app/api/queries/useGetTaskStatusQuery.ts
+++ /dev/null
@@ -1,80 +0,0 @@
-import {
- type UseQueryOptions,
- useQuery,
- useQueryClient,
-} from "@tanstack/react-query";
-
-export interface TaskStatus {
- task_id: string;
- status:
- | "pending"
- | "running"
- | "processing"
- | "completed"
- | "failed"
- | "error";
- total_files?: number;
- processed_files?: number;
- successful_files?: number;
- failed_files?: number;
- running_files?: number;
- pending_files?: number;
- created_at: string;
- updated_at: string;
- duration_seconds?: number;
- result?: Record;
- error?: string;
- files?: Record>;
-}
-
-export const useGetTaskStatusQuery = (
- taskId: string,
- options?: Omit, "queryKey" | "queryFn">
-) => {
- const queryClient = useQueryClient();
-
- async function getTaskStatus(): Promise {
- if (!taskId) {
- return null;
- }
-
- const response = await fetch(`/api/tasks/${taskId}`);
-
- if (!response.ok) {
- if (response.status === 404) {
- return null; // Task not found
- }
- throw new Error("Failed to fetch task status");
- }
-
- return response.json();
- }
-
- const queryResult = useQuery(
- {
- queryKey: ["task-status", taskId],
- queryFn: getTaskStatus,
- refetchInterval: (data) => {
- // Only poll if the task is still active
- if (!data) {
- return false; // Stop polling if no data
- }
-
- const isActive =
- data.status === "pending" ||
- data.status === "running" ||
- data.status === "processing";
-
- return isActive ? 3000 : false; // Poll every 3 seconds if active
- },
- refetchIntervalInBackground: true,
- staleTime: 0, // Always consider data stale to ensure fresh updates
- gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
- enabled: !!taskId, // Only run if taskId is provided
- ...options,
- },
- queryClient,
- );
-
- return queryResult;
-};
From 1643e10d5f215443c9039c1e07d8024613f96546 Mon Sep 17 00:00:00 2001
From: Brent O'Neill
Date: Mon, 6 Oct 2025 09:53:14 -0600
Subject: [PATCH 38/39] Cleanup commented code
---
frontend/src/app/knowledge/chunks/page.tsx | 32 +++++++++++-----------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 9ae3619c..6288bcfc 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -13,8 +13,8 @@ import {
type File,
useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery";
-import { Label } from "@/components/ui/label";
-import { Checkbox } from "@/components/ui/checkbox";
+// import { Label } from "@/components/ui/label";
+// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
const getFileTypeLabel = (mimetype: string) => {
@@ -106,20 +106,20 @@ function ChunksPageContent() {
router.push("/knowledge");
}, [router]);
- const handleChunkCardCheckboxChange = useCallback(
- (index: number) => {
- setSelectedChunks((prevSelected) => {
- const newSelected = new Set(prevSelected);
- if (newSelected.has(index)) {
- newSelected.delete(index);
- } else {
- newSelected.add(index);
- }
- return newSelected;
- });
- },
- [setSelectedChunks]
- );
+ // const handleChunkCardCheckboxChange = useCallback(
+ // (index: number) => {
+ // setSelectedChunks((prevSelected) => {
+ // const newSelected = new Set(prevSelected);
+ // if (newSelected.has(index)) {
+ // newSelected.delete(index);
+ // } else {
+ // newSelected.add(index);
+ // }
+ // return newSelected;
+ // });
+ // },
+ // [setSelectedChunks]
+ // );
if (!filename) {
return (
From 41e2d39dd061d5582466ad1340755da255008ff6 Mon Sep 17 00:00:00 2001
From: Edwin Jose
Date: Mon, 6 Oct 2025 14:36:17 -0400
Subject: [PATCH 39/39] feat: Show globe icon for HTML documents in knowledge
page (#188)
* Show globe icon for HTML documents in knowledge page
Updated getSourceIcon to display a globe icon for documents with 'text/html' mimetype, regardless of connector type. This improves visual identification of web-based documents in the grid.
* changed page to get connector type as url
* removed wrong docs
* fix formatting
* format
---------
Co-authored-by: Lucas Oliveira
---
frontend/src/app/knowledge/page.tsx | 651 ++++++++++++++--------------
1 file changed, 331 insertions(+), 320 deletions(-)
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index 64eeb49c..2cd6f382 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -2,14 +2,22 @@
import type { ColDef, GetRowIdParams } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
-import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
+import {
+ Building2,
+ Cloud,
+ Globe,
+ HardDrive,
+ Search,
+ Trash2,
+ X,
+} from "lucide-react";
import { useRouter } from "next/navigation";
import {
- type ChangeEvent,
- useCallback,
- useEffect,
- useRef,
- useState,
+ type ChangeEvent,
+ useCallback,
+ useEffect,
+ useRef,
+ useState,
} from "react";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
@@ -31,250 +39,255 @@ import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
// Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) {
- switch (connectorType) {
- case "google_drive":
- return (
-
- );
- case "onedrive":
- return (
-
- );
- case "sharepoint":
- return ;
- case "s3":
- return ;
- default:
- return (
-
- );
- }
+ switch (connectorType) {
+ case "url":
+ return ;
+ case "google_drive":
+ return (
+
+ );
+ case "onedrive":
+ return (
+
+ );
+ case "sharepoint":
+ return ;
+ case "s3":
+ return ;
+ default:
+ return (
+
+ );
+ }
}
function SearchPage() {
- const router = useRouter();
- const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
+ const router = useRouter();
+ const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
const { totalTopOffset } = useLayout();
- const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
- useKnowledgeFilter();
- const [selectedRows, setSelectedRows] = useState([]);
- const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
+ const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
+ useKnowledgeFilter();
+ const [selectedRows, setSelectedRows] = useState([]);
+ const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
- const deleteDocumentMutation = useDeleteDocument();
+ const deleteDocumentMutation = useDeleteDocument();
- useEffect(() => {
- refreshTasks();
- }, [refreshTasks]);
+ useEffect(() => {
+ refreshTasks();
+ }, [refreshTasks]);
- const { data: searchData = [], isFetching } = useGetSearchQuery(
- parsedFilterData?.query || "*",
- parsedFilterData,
- );
- // Convert TaskFiles to File format and merge with backend results
- const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
- return {
- filename: taskFile.filename,
- mimetype: taskFile.mimetype,
- source_url: taskFile.source_url,
- size: taskFile.size,
- connector_type: taskFile.connector_type,
- status: taskFile.status,
- };
- });
+ const { data: searchData = [], isFetching } = useGetSearchQuery(
+ parsedFilterData?.query || "*",
+ parsedFilterData,
+ );
+ // Convert TaskFiles to File format and merge with backend results
+ const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
+ return {
+ filename: taskFile.filename,
+ mimetype: taskFile.mimetype,
+ source_url: taskFile.source_url,
+ size: taskFile.size,
+ connector_type: taskFile.connector_type,
+ status: taskFile.status,
+ };
+ });
- // Create a map of task files by filename for quick lookup
- const taskFileMap = new Map(
- taskFilesAsFiles.map((file) => [file.filename, file]),
- );
+ // Create a map of task files by filename for quick lookup
+ const taskFileMap = new Map(
+ taskFilesAsFiles.map((file) => [file.filename, file]),
+ );
- // Override backend files with task file status if they exist
- const backendFiles = (searchData as File[])
- .map((file) => {
- const taskFile = taskFileMap.get(file.filename);
- if (taskFile) {
- // Override backend file with task file data (includes status)
- return { ...file, ...taskFile };
- }
- return file;
- })
- .filter((file) => {
- // Only filter out files that are currently processing AND in taskFiles
- const taskFile = taskFileMap.get(file.filename);
- return !taskFile || taskFile.status !== "processing";
- });
+ // Override backend files with task file status if they exist
+ const backendFiles = (searchData as File[])
+ .map((file) => {
+ const taskFile = taskFileMap.get(file.filename);
+ if (taskFile) {
+ // Override backend file with task file data (includes status)
+ return { ...file, ...taskFile };
+ }
+ return file;
+ })
+ .filter((file) => {
+ // Only filter out files that are currently processing AND in taskFiles
+ const taskFile = taskFileMap.get(file.filename);
+ return !taskFile || taskFile.status !== "processing";
+ });
- const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
- return (
- taskFile.status !== "active" &&
- !backendFiles.some(
- (backendFile) => backendFile.filename === taskFile.filename,
- )
- );
- });
+ const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
+ return (
+ taskFile.status !== "active" &&
+ !backendFiles.some(
+ (backendFile) => backendFile.filename === taskFile.filename,
+ )
+ );
+ });
- // Combine task files first, then backend files
- const fileResults = [...backendFiles, ...filteredTaskFiles];
+ // Combine task files first, then backend files
+ const fileResults = [...backendFiles, ...filteredTaskFiles];
- const handleTableSearch = (e: ChangeEvent) => {
- gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
- };
+ const handleTableSearch = (e: ChangeEvent) => {
+ gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
+ };
- const gridRef = useRef(null);
+ const gridRef = useRef(null);
- const columnDefs = [
- {
- field: "filename",
- headerName: "Source",
- checkboxSelection: (params: CustomCellRendererProps) =>
- (params?.data?.status || "active") === "active",
- headerCheckboxSelection: true,
- initialFlex: 2,
- minWidth: 220,
- cellRenderer: ({ data, value }: CustomCellRendererProps) => {
- // Read status directly from data on each render
- const status = data?.status || "active";
- const isActive = status === "active";
- console.log(data?.filename, status, "a");
- return (
-
-
-
{
- if (!isActive) {
- return;
- }
- router.push(
- `/knowledge/chunks?filename=${encodeURIComponent(
- data?.filename ?? "",
- )}`,
- );
- }}
- >
- {getSourceIcon(data?.connector_type)}
-
- {value}
-
-
-
- );
- },
- },
- {
- field: "size",
- headerName: "Size",
- valueFormatter: (params: CustomCellRendererProps) =>
- params.value ? `${Math.round(params.value / 1024)} KB` : "-",
- },
- {
- field: "mimetype",
- headerName: "Type",
- },
- {
- field: "owner",
- headerName: "Owner",
- valueFormatter: (params: CustomCellRendererProps) =>
- params.data?.owner_name || params.data?.owner_email || "—",
- },
- {
- field: "chunkCount",
- headerName: "Chunks",
- valueFormatter: (params: CustomCellRendererProps) => params.data?.chunkCount?.toString() || "-",
- },
- {
- field: "avgScore",
- headerName: "Avg score",
- initialFlex: 0.5,
- cellRenderer: ({ value }: CustomCellRendererProps) => {
- return (
-
- {value?.toFixed(2) ?? "-"}
-
- );
- },
- },
- {
- field: "status",
- headerName: "Status",
- cellRenderer: ({ data }: CustomCellRendererProps) => {
- console.log(data?.filename, data?.status, "b");
- // Default to 'active' status if no status is provided
- const status = data?.status || "active";
- return ;
- },
- },
- {
- cellRenderer: ({ data }: CustomCellRendererProps) => {
- const status = data?.status || "active";
- if (status !== "active") {
- return null;
- }
- return ;
- },
- cellStyle: {
- alignItems: "center",
- display: "flex",
- justifyContent: "center",
- padding: 0,
- },
- colId: "actions",
- filter: false,
- minWidth: 0,
- width: 40,
- resizable: false,
- sortable: false,
- initialFlex: 0,
- },
- ];
+ const columnDefs = [
+ {
+ field: "filename",
+ headerName: "Source",
+ checkboxSelection: (params: CustomCellRendererProps) =>
+ (params?.data?.status || "active") === "active",
+ headerCheckboxSelection: true,
+ initialFlex: 2,
+ minWidth: 220,
+ cellRenderer: ({ data, value }: CustomCellRendererProps) => {
+ // Read status directly from data on each render
+ const status = data?.status || "active";
+ const isActive = status === "active";
+ console.log(data?.filename, status, "a");
+ return (
+
+
+
{
+ if (!isActive) {
+ return;
+ }
+ router.push(
+ `/knowledge/chunks?filename=${encodeURIComponent(
+ data?.filename ?? "",
+ )}`,
+ );
+ }}
+ >
+ {getSourceIcon(data?.connector_type)}
+
+ {value}
+
+
+
+ );
+ },
+ },
+ {
+ field: "size",
+ headerName: "Size",
+ valueFormatter: (params: CustomCellRendererProps) =>
+ params.value ? `${Math.round(params.value / 1024)} KB` : "-",
+ },
+ {
+ field: "mimetype",
+ headerName: "Type",
+ },
+ {
+ field: "owner",
+ headerName: "Owner",
+ valueFormatter: (params: CustomCellRendererProps) =>
+ params.data?.owner_name || params.data?.owner_email || "—",
+ },
+ {
+ field: "chunkCount",
+ headerName: "Chunks",
+ valueFormatter: (params: CustomCellRendererProps) =>
+ params.data?.chunkCount?.toString() || "-",
+ },
+ {
+ field: "avgScore",
+ headerName: "Avg score",
+ initialFlex: 0.5,
+ cellRenderer: ({ value }: CustomCellRendererProps) => {
+ return (
+
+ {value?.toFixed(2) ?? "-"}
+
+ );
+ },
+ },
+ {
+ field: "status",
+ headerName: "Status",
+ cellRenderer: ({ data }: CustomCellRendererProps) => {
+ console.log(data?.filename, data?.status, "b");
+ // Default to 'active' status if no status is provided
+ const status = data?.status || "active";
+ return ;
+ },
+ },
+ {
+ cellRenderer: ({ data }: CustomCellRendererProps) => {
+ const status = data?.status || "active";
+ if (status !== "active") {
+ return null;
+ }
+ return ;
+ },
+ cellStyle: {
+ alignItems: "center",
+ display: "flex",
+ justifyContent: "center",
+ padding: 0,
+ },
+ colId: "actions",
+ filter: false,
+ minWidth: 0,
+ width: 40,
+ resizable: false,
+ sortable: false,
+ initialFlex: 0,
+ },
+ ];
- const defaultColDef: ColDef = {
- resizable: false,
- suppressMovable: true,
- initialFlex: 1,
- minWidth: 100,
- };
+ const defaultColDef: ColDef = {
+ resizable: false,
+ suppressMovable: true,
+ initialFlex: 1,
+ minWidth: 100,
+ };
- const onSelectionChanged = useCallback(() => {
- if (gridRef.current) {
- const selectedNodes = gridRef.current.api.getSelectedRows();
- setSelectedRows(selectedNodes);
- }
- }, []);
+ const onSelectionChanged = useCallback(() => {
+ if (gridRef.current) {
+ const selectedNodes = gridRef.current.api.getSelectedRows();
+ setSelectedRows(selectedNodes);
+ }
+ }, []);
- const handleBulkDelete = async () => {
- if (selectedRows.length === 0) return;
+ const handleBulkDelete = async () => {
+ if (selectedRows.length === 0) return;
- try {
- // Delete each file individually since the API expects one filename at a time
- const deletePromises = selectedRows.map((row) =>
- deleteDocumentMutation.mutateAsync({ filename: row.filename }),
- );
+ try {
+ // Delete each file individually since the API expects one filename at a time
+ const deletePromises = selectedRows.map((row) =>
+ deleteDocumentMutation.mutateAsync({ filename: row.filename }),
+ );
- await Promise.all(deletePromises);
+ await Promise.all(deletePromises);
- toast.success(
- `Successfully deleted ${selectedRows.length} document${
- selectedRows.length > 1 ? "s" : ""
- }`,
- );
- setSelectedRows([]);
- setShowBulkDeleteDialog(false);
+ toast.success(
+ `Successfully deleted ${selectedRows.length} document${
+ selectedRows.length > 1 ? "s" : ""
+ }`,
+ );
+ setSelectedRows([]);
+ setShowBulkDeleteDialog(false);
- // Clear selection in the grid
- if (gridRef.current) {
- gridRef.current.api.deselectAll();
- }
- } catch (error) {
- toast.error(
- error instanceof Error
- ? error.message
- : "Failed to delete some documents",
- );
- }
- };
+ // Clear selection in the grid
+ if (gridRef.current) {
+ gridRef.current.api.deselectAll();
+ }
+ } catch (error) {
+ toast.error(
+ error instanceof Error
+ ? error.message
+ : "Failed to delete some documents",
+ );
+ }
+ };
return (
- {/* Search Input Area */}
-
-
-
- {selectedFilter?.name && (
-
- {selectedFilter?.name}
- setSelectedFilter(null)}
- />
-
- )}
-
-
-
- {/*
+
+
+ {selectedFilter?.name && (
+
+ {selectedFilter?.name}
+ setSelectedFilter(null)}
+ />
+
+ )}
+
+
+
+ {/*
)}
*/}
- {/* //TODO: Implement sync button */}
- {/*
Sync
*/}
- {selectedRows.length > 0 && (
- setShowBulkDeleteDialog(true)}
- >
- Delete
-
- )}
-
-
- []}
- defaultColDef={defaultColDef}
- loading={isFetching}
- ref={gridRef}
- rowData={fileResults}
- rowSelection="multiple"
- rowMultiSelectWithClick={false}
- suppressRowClickSelection={true}
- getRowId={(params: GetRowIdParams) => params.data?.filename}
- domLayout="normal"
- onSelectionChanged={onSelectionChanged}
- noRowsOverlayComponent={() => (
-
-
- No knowledge
-
-
- Add files from local or your preferred cloud.
-
-
- )}
- />
-
+ {selectedRows.length > 0 && (
+
setShowBulkDeleteDialog(true)}
+ >
+ Delete
+
+ )}
+
+
+
[]}
+ defaultColDef={defaultColDef}
+ loading={isFetching}
+ ref={gridRef}
+ rowData={fileResults}
+ rowSelection="multiple"
+ rowMultiSelectWithClick={false}
+ suppressRowClickSelection={true}
+ getRowId={(params: GetRowIdParams) => params.data?.filename}
+ domLayout="normal"
+ onSelectionChanged={onSelectionChanged}
+ noRowsOverlayComponent={() => (
+
+
+ No knowledge
+
+
+ Add files from local or your preferred cloud.
+
+
+ )}
+ />
+
- {/* Bulk Delete Confirmation Dialog */}
- 1 ? "s" : ""
- }? This will remove all chunks and data associated with these documents. This action cannot be undone.
+ {/* Bulk Delete Confirmation Dialog */}
+ 1 ? "s" : ""
+ }? This will remove all chunks and data associated with these documents. This action cannot be undone.
Documents to be deleted:
${selectedRows.map((row) => `• ${row.filename}`).join("\n")}`}
- confirmText="Delete All"
- onConfirm={handleBulkDelete}
- isLoading={deleteDocumentMutation.isPending}
- />
-
- );
+ confirmText="Delete All"
+ onConfirm={handleBulkDelete}
+ isLoading={deleteDocumentMutation.isPending}
+ />
+
+ );
}
export default function ProtectedSearchPage() {
- return (
-
-
-
- );
+ return (
+
+
+
+ );
}