Merge branch 'main' into knowledge-status
This commit is contained in:
commit
9a14192fd3
9 changed files with 2565 additions and 113 deletions
|
|
@ -8,6 +8,8 @@ LANGFLOW_SECRET_KEY=
|
||||||
# flow ids for chat and ingestion flows
|
# flow ids for chat and ingestion flows
|
||||||
LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
|
LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
|
||||||
LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
|
LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
|
||||||
|
# Ingest flow using docling
|
||||||
|
LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915
|
||||||
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
|
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
|
||||||
|
|
||||||
# Set a strong admin password for OpenSearch; a bcrypt hash is generated at
|
# Set a strong admin password for OpenSearch; a bcrypt hash is generated at
|
||||||
|
|
|
||||||
2220
flows/openrag_ingest_docling.json
Normal file
2220
flows/openrag_ingest_docling.json
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,17 +1,14 @@
|
||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
Building2,
|
ArrowLeft,
|
||||||
Cloud,
|
Copy,
|
||||||
FileText,
|
File as FileIcon,
|
||||||
HardDrive,
|
|
||||||
Loader2,
|
Loader2,
|
||||||
Search,
|
Search,
|
||||||
} from "lucide-react";
|
} from "lucide-react";
|
||||||
import { Suspense, useCallback, useEffect, useState } from "react";
|
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
|
||||||
import { useRouter, useSearchParams } from "next/navigation";
|
import { useRouter, useSearchParams } from "next/navigation";
|
||||||
import { SiGoogledrive } from "react-icons/si";
|
|
||||||
import { TbBrandOnedrive } from "react-icons/tb";
|
|
||||||
import { ProtectedRoute } from "@/components/protected-route";
|
import { ProtectedRoute } from "@/components/protected-route";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
|
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
|
||||||
|
|
@ -21,22 +18,16 @@ import {
|
||||||
type File,
|
type File,
|
||||||
useGetSearchQuery,
|
useGetSearchQuery,
|
||||||
} from "../../api/queries/useGetSearchQuery";
|
} from "../../api/queries/useGetSearchQuery";
|
||||||
|
import { Label } from "@/components/ui/label";
|
||||||
|
import { Checkbox } from "@/components/ui/checkbox";
|
||||||
|
import { Input } from "@/components/ui/input";
|
||||||
|
|
||||||
// Function to get the appropriate icon for a connector type
|
const getFileTypeLabel = (mimetype: string) => {
|
||||||
function getSourceIcon(connectorType?: string) {
|
if (mimetype === "application/pdf") return "PDF";
|
||||||
switch (connectorType) {
|
if (mimetype === "text/plain") return "Text";
|
||||||
case "google_drive":
|
if (mimetype === "application/msword") return "Word Document";
|
||||||
return <SiGoogledrive className="h-4 w-4 text-foreground" />;
|
return "Unknown";
|
||||||
case "onedrive":
|
};
|
||||||
return <TbBrandOnedrive className="h-4 w-4 text-foreground" />;
|
|
||||||
case "sharepoint":
|
|
||||||
return <Building2 className="h-4 w-4 text-foreground" />;
|
|
||||||
case "s3":
|
|
||||||
return <Cloud className="h-4 w-4 text-foreground" />;
|
|
||||||
default:
|
|
||||||
return <HardDrive className="h-4 w-4 text-muted-foreground" />;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function ChunksPageContent() {
|
function ChunksPageContent() {
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
@ -46,10 +37,47 @@ function ChunksPageContent() {
|
||||||
|
|
||||||
const filename = searchParams.get("filename");
|
const filename = searchParams.get("filename");
|
||||||
const [chunks, setChunks] = useState<ChunkResult[]>([]);
|
const [chunks, setChunks] = useState<ChunkResult[]>([]);
|
||||||
|
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
|
||||||
|
ChunkResult[]
|
||||||
|
>([]);
|
||||||
|
const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
|
||||||
|
|
||||||
|
// Calculate average chunk length
|
||||||
|
const averageChunkLength = useMemo(
|
||||||
|
() =>
|
||||||
|
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
|
||||||
|
chunks.length || 0,
|
||||||
|
[chunks]
|
||||||
|
);
|
||||||
|
|
||||||
|
const [selectAll, setSelectAll] = useState(false);
|
||||||
|
const [queryInputText, setQueryInputText] = useState(
|
||||||
|
parsedFilterData?.query ?? ""
|
||||||
|
);
|
||||||
|
|
||||||
// Use the same search query as the knowledge page, but we'll filter for the specific file
|
// Use the same search query as the knowledge page, but we'll filter for the specific file
|
||||||
const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
|
const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (queryInputText === "") {
|
||||||
|
setChunksFilteredByQuery(chunks);
|
||||||
|
} else {
|
||||||
|
setChunksFilteredByQuery(
|
||||||
|
chunks.filter((chunk) =>
|
||||||
|
chunk.text.toLowerCase().includes(queryInputText.toLowerCase())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}, [queryInputText, chunks]);
|
||||||
|
|
||||||
|
const handleCopy = useCallback((text: string) => {
|
||||||
|
navigator.clipboard.writeText(text);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const fileData = (data as File[]).find(
|
||||||
|
(file: File) => file.filename === filename
|
||||||
|
);
|
||||||
|
|
||||||
// Extract chunks for the specific file
|
// Extract chunks for the specific file
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!filename || !(data as File[]).length) {
|
if (!filename || !(data as File[]).length) {
|
||||||
|
|
@ -57,16 +85,37 @@ function ChunksPageContent() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const fileData = (data as File[]).find(
|
|
||||||
(file: File) => file.filename === filename
|
|
||||||
);
|
|
||||||
setChunks(fileData?.chunks || []);
|
setChunks(fileData?.chunks || []);
|
||||||
}, [data, filename]);
|
}, [data, filename]);
|
||||||
|
|
||||||
|
// Set selected state for all checkboxes when selectAll changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (selectAll) {
|
||||||
|
setSelectedChunks(new Set(chunks.map((_, index) => index)));
|
||||||
|
} else {
|
||||||
|
setSelectedChunks(new Set());
|
||||||
|
}
|
||||||
|
}, [selectAll, setSelectedChunks, chunks]);
|
||||||
|
|
||||||
const handleBack = useCallback(() => {
|
const handleBack = useCallback(() => {
|
||||||
router.back();
|
router.push("/knowledge");
|
||||||
}, [router]);
|
}, [router]);
|
||||||
|
|
||||||
|
const handleChunkCardCheckboxChange = useCallback(
|
||||||
|
(index: number) => {
|
||||||
|
setSelectedChunks((prevSelected) => {
|
||||||
|
const newSelected = new Set(prevSelected);
|
||||||
|
if (newSelected.has(index)) {
|
||||||
|
newSelected.delete(index);
|
||||||
|
} else {
|
||||||
|
newSelected.add(index);
|
||||||
|
}
|
||||||
|
return newSelected;
|
||||||
|
});
|
||||||
|
},
|
||||||
|
[setSelectedChunks]
|
||||||
|
);
|
||||||
|
|
||||||
if (!filename) {
|
if (!filename) {
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center justify-center h-64">
|
<div className="flex items-center justify-center h-64">
|
||||||
|
|
@ -83,7 +132,7 @@ function ChunksPageContent() {
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
className={`fixed inset-0 md:left-72 top-[53px] flex flex-col transition-all duration-300 ${
|
className={`fixed inset-0 md:left-72 top-[53px] flex flex-row transition-all duration-300 ${
|
||||||
isMenuOpen && isPanelOpen
|
isMenuOpen && isPanelOpen
|
||||||
? "md:right-[704px]"
|
? "md:right-[704px]"
|
||||||
: // Both open: 384px (menu) + 320px (KF panel)
|
: // Both open: 384px (menu) + 320px (KF panel)
|
||||||
|
|
@ -98,29 +147,47 @@ function ChunksPageContent() {
|
||||||
>
|
>
|
||||||
<div className="flex-1 flex flex-col min-h-0 px-6 py-6">
|
<div className="flex-1 flex flex-col min-h-0 px-6 py-6">
|
||||||
{/* Header */}
|
{/* Header */}
|
||||||
<div className="flex items-center justify-between mb-6">
|
<div className="flex flex-col mb-6">
|
||||||
<div className="flex items-center gap-3">
|
<div className="flex items-center gap-3 mb-2">
|
||||||
<Button
|
<Button variant="ghost" onClick={handleBack}>
|
||||||
variant="ghost"
|
<ArrowLeft size={18} />
|
||||||
size="sm"
|
<FileIcon className="text-muted-foreground" size={18} />
|
||||||
onClick={handleBack}
|
<h1 className="text-lg font-semibold">
|
||||||
className="text-muted-foreground hover:text-foreground px-2"
|
{filename.replace(/\.[^/.]+$/, "")}
|
||||||
>
|
</h1>
|
||||||
← Back
|
|
||||||
</Button>
|
</Button>
|
||||||
<div className="flex flex-col">
|
|
||||||
<h2 className="text-lg font-semibold">Document Chunks</h2>
|
|
||||||
<p className="text-sm text-muted-foreground truncate max-w-md">
|
|
||||||
{decodeURIComponent(filename)}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div className="text-sm text-muted-foreground">
|
<div className="flex items-center gap-3 pl-4 mt-2">
|
||||||
{!isFetching && chunks.length > 0 && (
|
<div className="flex items-center gap-2">
|
||||||
<span>
|
<Checkbox
|
||||||
{chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found
|
id="selectAllChunks"
|
||||||
</span>
|
checked={selectAll}
|
||||||
)}
|
onCheckedChange={(handleSelectAll) =>
|
||||||
|
setSelectAll(!!handleSelectAll)
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<Label
|
||||||
|
htmlFor="selectAllChunks"
|
||||||
|
className="font-medium text-muted-foreground whitespace-nowrap cursor-pointer"
|
||||||
|
>
|
||||||
|
Select all
|
||||||
|
</Label>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 flex items-center gap-2">
|
||||||
|
<Input
|
||||||
|
name="search-query"
|
||||||
|
id="search-query"
|
||||||
|
type="text"
|
||||||
|
defaultValue={parsedFilterData?.query}
|
||||||
|
value={queryInputText}
|
||||||
|
onChange={(e) => setQueryInputText(e.target.value)}
|
||||||
|
placeholder="Search chunks..."
|
||||||
|
className="flex-1 bg-muted/20 rounded-lg border border-border/50 px-4 py-3 focus-visible:ring-1 focus-visible:ring-ring"
|
||||||
|
/>
|
||||||
|
<Button variant="outline" size="sm">
|
||||||
|
<Search />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -147,41 +214,130 @@ function ChunksPageContent() {
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="space-y-4 pb-6">
|
<div className="space-y-4 pb-6">
|
||||||
{chunks.map((chunk, index) => (
|
{chunksFilteredByQuery.map((chunk, index) => (
|
||||||
<div
|
<div
|
||||||
key={chunk.filename + index}
|
key={chunk.filename + index}
|
||||||
className="bg-muted/20 rounded-lg p-4 border border-border/50"
|
className="bg-muted rounded-lg p-4 border border-border/50"
|
||||||
>
|
>
|
||||||
<div className="flex items-center justify-between mb-2">
|
<div className="flex items-center justify-between mb-2">
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-3">
|
||||||
<FileText className="h-4 w-4 text-blue-400" />
|
<div>
|
||||||
<span className="font-medium truncate">
|
<Checkbox
|
||||||
{chunk.filename}
|
checked={selectedChunks.has(index)}
|
||||||
|
onCheckedChange={() =>
|
||||||
|
handleChunkCardCheckboxChange(index)
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<span className="text-sm font-bold">
|
||||||
|
Chunk {chunk.page}
|
||||||
</span>
|
</span>
|
||||||
{chunk.connector_type && (
|
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
|
||||||
<div className="ml-2">
|
{chunk.text.length} chars
|
||||||
{getSourceIcon(chunk.connector_type)}
|
</span>
|
||||||
</div>
|
<div className="py-1">
|
||||||
)}
|
<Button
|
||||||
|
className="p-1"
|
||||||
|
onClick={() => handleCopy(chunk.text)}
|
||||||
|
variant="ghost"
|
||||||
|
size="xs"
|
||||||
|
>
|
||||||
|
<Copy className="text-muted-foreground" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
|
|
||||||
{chunk.score.toFixed(2)}
|
{/* TODO: Update to use active toggle */}
|
||||||
</span>
|
{/* <span className="px-2 py-1 text-green-500">
|
||||||
|
<Switch
|
||||||
|
className="ml-2 bg-green-500"
|
||||||
|
checked={true}
|
||||||
|
/>
|
||||||
|
Active
|
||||||
|
</span> */}
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-4 text-sm text-muted-foreground mb-3">
|
<blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-input ml-1.5 pl-4">
|
||||||
<span>{chunk.mimetype}</span>
|
|
||||||
<span>Page {chunk.page}</span>
|
|
||||||
{chunk.owner_name && <span>Owner: {chunk.owner_name}</span>}
|
|
||||||
</div>
|
|
||||||
<p className="text-sm text-foreground/90 leading-relaxed">
|
|
||||||
{chunk.text}
|
{chunk.text}
|
||||||
</p>
|
</blockquote>
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{/* Right panel - Summary (TODO), Technical details, */}
|
||||||
|
<div className="w-[320px] py-20 px-2">
|
||||||
|
<div className="mb-8">
|
||||||
|
<h2 className="text-xl font-semibold mt-3 mb-4">Technical details</h2>
|
||||||
|
<dl>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Total chunks</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
{chunks.length}
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
{averageChunkLength.toFixed(0)} chars
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
{/* TODO: Uncomment after data is available */}
|
||||||
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Model</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
</dd>
|
||||||
|
</div> */}
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
<div className="mb-8">
|
||||||
|
<h2 className="text-xl font-semibold mt-2 mb-3">Original document</h2>
|
||||||
|
<dl>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Name</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
{fileData?.filename}
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Type</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Size</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
{fileData?.size
|
||||||
|
? `${Math.round(fileData.size / 1024)} KB`
|
||||||
|
: "Unknown"}
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
N/A
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
{/* TODO: Uncomment after data is available */}
|
||||||
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Source</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
|
||||||
|
</div> */}
|
||||||
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
|
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
|
||||||
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
|
N/A
|
||||||
|
</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,7 @@ function KnowledgeSourcesPage() {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
// Debounced update function
|
// Debounced update function
|
||||||
const debouncedUpdate = useDebounce(
|
const debouncedUpdate = useDebounce(
|
||||||
(variables: Parameters<typeof updateFlowSettingMutation.mutate>[0]) => {
|
(variables: Parameters<typeof updateFlowSettingMutation.mutate>[0]) => {
|
||||||
|
|
@ -219,6 +220,7 @@ function KnowledgeSourcesPage() {
|
||||||
// Update processing mode
|
// Update processing mode
|
||||||
const handleProcessingModeChange = (mode: string) => {
|
const handleProcessingModeChange = (mode: string) => {
|
||||||
setProcessingMode(mode);
|
setProcessingMode(mode);
|
||||||
|
// Update the configuration setting (backend will also update the flow automatically)
|
||||||
debouncedUpdate({ doclingPresets: mode });
|
debouncedUpdate({ doclingPresets: mode });
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
18
src/agent.py
18
src/agent.py
|
|
@ -106,7 +106,6 @@ async def async_response_stream(
|
||||||
model: str,
|
model: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
log_prefix: str = "response",
|
log_prefix: str = "response",
|
||||||
):
|
):
|
||||||
logger.info("User prompt received", prompt=prompt)
|
logger.info("User prompt received", prompt=prompt)
|
||||||
|
|
@ -121,8 +120,6 @@ async def async_response_stream(
|
||||||
}
|
}
|
||||||
if previous_response_id is not None:
|
if previous_response_id is not None:
|
||||||
request_params["previous_response_id"] = previous_response_id
|
request_params["previous_response_id"] = previous_response_id
|
||||||
if tweaks:
|
|
||||||
request_params["tweaks"] = tweaks
|
|
||||||
|
|
||||||
if "x-api-key" not in client.default_headers:
|
if "x-api-key" not in client.default_headers:
|
||||||
if hasattr(client, "api_key") and extra_headers is not None:
|
if hasattr(client, "api_key") and extra_headers is not None:
|
||||||
|
|
@ -199,7 +196,6 @@ async def async_response(
|
||||||
model: str,
|
model: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
log_prefix: str = "response",
|
log_prefix: str = "response",
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
|
|
@ -214,8 +210,6 @@ async def async_response(
|
||||||
}
|
}
|
||||||
if previous_response_id is not None:
|
if previous_response_id is not None:
|
||||||
request_params["previous_response_id"] = previous_response_id
|
request_params["previous_response_id"] = previous_response_id
|
||||||
if tweaks:
|
|
||||||
request_params["tweaks"] = tweaks
|
|
||||||
if extra_headers:
|
if extra_headers:
|
||||||
request_params["extra_headers"] = extra_headers
|
request_params["extra_headers"] = extra_headers
|
||||||
|
|
||||||
|
|
@ -249,7 +243,6 @@ async def async_stream(
|
||||||
model: str,
|
model: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
log_prefix: str = "response",
|
log_prefix: str = "response",
|
||||||
):
|
):
|
||||||
async for chunk in async_response_stream(
|
async for chunk in async_response_stream(
|
||||||
|
|
@ -258,7 +251,6 @@ async def async_stream(
|
||||||
model,
|
model,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=tweaks,
|
|
||||||
log_prefix=log_prefix,
|
log_prefix=log_prefix,
|
||||||
):
|
):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
@ -271,7 +263,6 @@ async def async_langflow(
|
||||||
prompt: str,
|
prompt: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
):
|
):
|
||||||
response_text, response_id, response_obj = await async_response(
|
response_text, response_id, response_obj = await async_response(
|
||||||
langflow_client,
|
langflow_client,
|
||||||
|
|
@ -279,7 +270,6 @@ async def async_langflow(
|
||||||
flow_id,
|
flow_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=tweaks,
|
|
||||||
log_prefix="langflow",
|
log_prefix="langflow",
|
||||||
)
|
)
|
||||||
return response_text, response_id
|
return response_text, response_id
|
||||||
|
|
@ -292,7 +282,6 @@ async def async_langflow_stream(
|
||||||
prompt: str,
|
prompt: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
):
|
):
|
||||||
logger.debug("Starting langflow stream", prompt=prompt)
|
logger.debug("Starting langflow stream", prompt=prompt)
|
||||||
try:
|
try:
|
||||||
|
|
@ -302,8 +291,7 @@ async def async_langflow_stream(
|
||||||
flow_id,
|
flow_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=tweaks,
|
log_prefix="langflow",
|
||||||
log_prefix="langflow",
|
|
||||||
):
|
):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Yielding chunk from langflow stream",
|
"Yielding chunk from langflow stream",
|
||||||
|
|
@ -463,7 +451,6 @@ async def async_langflow_chat(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
store_conversation: bool = True,
|
store_conversation: bool = True,
|
||||||
):
|
):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|
@ -497,7 +484,6 @@ async def async_langflow_chat(
|
||||||
flow_id,
|
flow_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=tweaks,
|
|
||||||
log_prefix="langflow",
|
log_prefix="langflow",
|
||||||
)
|
)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|
@ -576,7 +562,6 @@ async def async_langflow_chat_stream(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
extra_headers: dict = None,
|
extra_headers: dict = None,
|
||||||
previous_response_id: str = None,
|
previous_response_id: str = None,
|
||||||
tweaks: dict = None,
|
|
||||||
):
|
):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"async_langflow_chat_stream called",
|
"async_langflow_chat_stream called",
|
||||||
|
|
@ -603,7 +588,6 @@ async def async_langflow_chat_stream(
|
||||||
flow_id,
|
flow_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=tweaks,
|
|
||||||
log_prefix="langflow",
|
log_prefix="langflow",
|
||||||
):
|
):
|
||||||
# Extract text content to build full response for history
|
# Extract text content to build full response for history
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from config.settings import (
|
||||||
LANGFLOW_CHAT_FLOW_ID,
|
LANGFLOW_CHAT_FLOW_ID,
|
||||||
LANGFLOW_INGEST_FLOW_ID,
|
LANGFLOW_INGEST_FLOW_ID,
|
||||||
LANGFLOW_PUBLIC_URL,
|
LANGFLOW_PUBLIC_URL,
|
||||||
|
DOCLING_COMPONENT_ID,
|
||||||
clients,
|
clients,
|
||||||
get_openrag_config,
|
get_openrag_config,
|
||||||
config_manager,
|
config_manager,
|
||||||
|
|
@ -46,22 +47,7 @@ def get_docling_preset_configs():
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_docling_tweaks(docling_preset: str = None) -> dict:
|
|
||||||
"""Get Langflow tweaks for docling component based on preset"""
|
|
||||||
if not docling_preset:
|
|
||||||
# Get current preset from config
|
|
||||||
openrag_config = get_openrag_config()
|
|
||||||
docling_preset = openrag_config.knowledge.doclingPresets
|
|
||||||
|
|
||||||
preset_configs = get_docling_preset_configs()
|
|
||||||
|
|
||||||
if docling_preset not in preset_configs:
|
|
||||||
docling_preset = "standard" # fallback
|
|
||||||
|
|
||||||
preset_config = preset_configs[docling_preset]
|
|
||||||
docling_serve_opts = json.dumps(preset_config)
|
|
||||||
|
|
||||||
return {"DoclingRemote-ayRdw": {"docling_serve_opts": docling_serve_opts}}
|
|
||||||
|
|
||||||
|
|
||||||
async def get_settings(request, session_manager):
|
async def get_settings(request, session_manager):
|
||||||
|
|
@ -234,6 +220,15 @@ async def update_settings(request, session_manager):
|
||||||
current_config.knowledge.doclingPresets = body["doclingPresets"]
|
current_config.knowledge.doclingPresets = body["doclingPresets"]
|
||||||
config_updated = True
|
config_updated = True
|
||||||
|
|
||||||
|
# Also update the flow with the new docling preset
|
||||||
|
try:
|
||||||
|
await _update_flow_docling_preset(body["doclingPresets"], preset_configs[body["doclingPresets"]])
|
||||||
|
logger.info(f"Successfully updated docling preset in flow to '{body['doclingPresets']}'")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to update docling preset in flow: {str(e)}")
|
||||||
|
# Don't fail the entire settings update if flow update fails
|
||||||
|
# The config will still be saved
|
||||||
|
|
||||||
if "chunk_size" in body:
|
if "chunk_size" in body:
|
||||||
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
|
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
|
|
@ -527,3 +522,93 @@ async def onboarding(request, flows_service):
|
||||||
{"error": f"Failed to update onboarding settings: {str(e)}"},
|
{"error": f"Failed to update onboarding settings: {str(e)}"},
|
||||||
status_code=500,
|
status_code=500,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_flow_docling_preset(preset: str, preset_config: dict):
|
||||||
|
"""Helper function to update docling preset in the ingest flow"""
|
||||||
|
if not LANGFLOW_INGEST_FLOW_ID:
|
||||||
|
raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured")
|
||||||
|
|
||||||
|
# Get the current flow data from Langflow
|
||||||
|
response = await clients.langflow_request(
|
||||||
|
"GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Failed to get ingest flow: HTTP {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
flow_data = response.json()
|
||||||
|
|
||||||
|
# Find the target node in the flow using environment variable
|
||||||
|
nodes = flow_data.get("data", {}).get("nodes", [])
|
||||||
|
target_node = None
|
||||||
|
target_node_index = None
|
||||||
|
|
||||||
|
for i, node in enumerate(nodes):
|
||||||
|
if node.get("id") == DOCLING_COMPONENT_ID:
|
||||||
|
target_node = node
|
||||||
|
target_node_index = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if target_node is None:
|
||||||
|
raise Exception(f"Docling component '{DOCLING_COMPONENT_ID}' not found in ingest flow")
|
||||||
|
|
||||||
|
# Update the docling_serve_opts value directly in the existing node
|
||||||
|
if (target_node.get("data", {}).get("node", {}).get("template", {}).get("docling_serve_opts")):
|
||||||
|
flow_data["data"]["nodes"][target_node_index]["data"]["node"]["template"]["docling_serve_opts"]["value"] = preset_config
|
||||||
|
else:
|
||||||
|
raise Exception(f"docling_serve_opts field not found in node '{DOCLING_COMPONENT_ID}'")
|
||||||
|
|
||||||
|
# Update the flow via PATCH request
|
||||||
|
patch_response = await clients.langflow_request(
|
||||||
|
"PATCH", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}", json=flow_data
|
||||||
|
)
|
||||||
|
|
||||||
|
if patch_response.status_code != 200:
|
||||||
|
raise Exception(f"Failed to update ingest flow: HTTP {patch_response.status_code} - {patch_response.text}")
|
||||||
|
|
||||||
|
|
||||||
|
async def update_docling_preset(request, session_manager):
|
||||||
|
"""Update docling preset in the ingest flow"""
|
||||||
|
try:
|
||||||
|
# Parse request body
|
||||||
|
body = await request.json()
|
||||||
|
|
||||||
|
# Validate preset parameter
|
||||||
|
if "preset" not in body:
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": "preset parameter is required"},
|
||||||
|
status_code=400
|
||||||
|
)
|
||||||
|
|
||||||
|
preset = body["preset"]
|
||||||
|
preset_configs = get_docling_preset_configs()
|
||||||
|
|
||||||
|
if preset not in preset_configs:
|
||||||
|
valid_presets = list(preset_configs.keys())
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"},
|
||||||
|
status_code=400
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the preset configuration
|
||||||
|
preset_config = preset_configs[preset]
|
||||||
|
|
||||||
|
# Use the helper function to update the flow
|
||||||
|
await _update_flow_docling_preset(preset, preset_config)
|
||||||
|
|
||||||
|
logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
|
||||||
|
|
||||||
|
return JSONResponse({
|
||||||
|
"message": f"Successfully updated docling preset to '{preset}'",
|
||||||
|
"preset": preset,
|
||||||
|
"preset_config": preset_config
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to update docling preset", error=str(e))
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": f"Failed to update docling preset: {str(e)}"},
|
||||||
|
status_code=500
|
||||||
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -544,6 +544,9 @@ OLLAMA_LLM_TEXT_COMPONENT_ID = os.getenv(
|
||||||
"OLLAMA_LLM_TEXT_COMPONENT_ID", "OllamaModel-XDGqZ"
|
"OLLAMA_LLM_TEXT_COMPONENT_ID", "OllamaModel-XDGqZ"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Docling component ID for ingest flow
|
||||||
|
DOCLING_COMPONENT_ID = os.getenv("DOCLING_COMPONENT_ID", "DoclingRemote-78KoX")
|
||||||
|
|
||||||
# Global clients instance
|
# Global clients instance
|
||||||
clients = AppClients()
|
clients = AppClients()
|
||||||
|
|
||||||
|
|
|
||||||
11
src/main.py
11
src/main.py
|
|
@ -977,6 +977,17 @@ async def create_app():
|
||||||
),
|
),
|
||||||
methods=["POST"],
|
methods=["POST"],
|
||||||
),
|
),
|
||||||
|
# Docling preset update endpoint
|
||||||
|
Route(
|
||||||
|
"/settings/docling-preset",
|
||||||
|
require_auth(services["session_manager"])(
|
||||||
|
partial(
|
||||||
|
settings.update_docling_preset,
|
||||||
|
session_manager=services["session_manager"]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
methods=["PATCH"],
|
||||||
|
),
|
||||||
Route(
|
Route(
|
||||||
"/nudges",
|
"/nudges",
|
||||||
require_auth(services["session_manager"])(
|
require_auth(services["session_manager"])(
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ import json
|
||||||
from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID
|
from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID
|
||||||
from agent import async_chat, async_langflow, async_chat_stream
|
from agent import async_chat, async_langflow, async_chat_stream
|
||||||
from auth_context import set_auth_context
|
from auth_context import set_auth_context
|
||||||
from api.settings import get_docling_tweaks
|
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
@ -127,8 +126,6 @@ class ChatService:
|
||||||
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
|
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get docling tweaks based on current configuration
|
|
||||||
docling_tweaks = get_docling_tweaks()
|
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
from agent import async_langflow_chat_stream
|
from agent import async_langflow_chat_stream
|
||||||
|
|
@ -140,7 +137,6 @@ class ChatService:
|
||||||
user_id,
|
user_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=docling_tweaks,
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
from agent import async_langflow_chat
|
from agent import async_langflow_chat
|
||||||
|
|
@ -152,7 +148,6 @@ class ChatService:
|
||||||
user_id,
|
user_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=docling_tweaks,
|
|
||||||
)
|
)
|
||||||
response_data = {"response": response_text}
|
response_data = {"response": response_text}
|
||||||
if response_id:
|
if response_id:
|
||||||
|
|
@ -202,8 +197,6 @@ class ChatService:
|
||||||
|
|
||||||
from agent import async_langflow_chat
|
from agent import async_langflow_chat
|
||||||
|
|
||||||
# Get docling tweaks (might not be used by nudges flow, but keeping consistent)
|
|
||||||
docling_tweaks = get_docling_tweaks()
|
|
||||||
|
|
||||||
response_text, response_id = await async_langflow_chat(
|
response_text, response_id = await async_langflow_chat(
|
||||||
langflow_client,
|
langflow_client,
|
||||||
|
|
@ -211,7 +204,6 @@ class ChatService:
|
||||||
prompt,
|
prompt,
|
||||||
user_id,
|
user_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
tweaks=docling_tweaks,
|
|
||||||
store_conversation=False,
|
store_conversation=False,
|
||||||
)
|
)
|
||||||
response_data = {"response": response_text}
|
response_data = {"response": response_text}
|
||||||
|
|
@ -242,8 +234,6 @@ class ChatService:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
|
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
|
||||||
)
|
)
|
||||||
# Get docling tweaks based on current configuration
|
|
||||||
docling_tweaks = get_docling_tweaks()
|
|
||||||
|
|
||||||
response_text, response_id = await async_langflow(
|
response_text, response_id = await async_langflow(
|
||||||
langflow_client=langflow_client,
|
langflow_client=langflow_client,
|
||||||
|
|
@ -251,7 +241,6 @@ class ChatService:
|
||||||
prompt=document_prompt,
|
prompt=document_prompt,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
tweaks=docling_tweaks,
|
|
||||||
)
|
)
|
||||||
else: # chat
|
else: # chat
|
||||||
# Set auth context for chat tools and provide user_id
|
# Set auth context for chat tools and provide user_id
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue