Merge branch 'main' into feat-centralized-storage

This commit is contained in:
Eric Hare 2025-12-12 08:23:44 -08:00 committed by GitHub
commit c002fbabc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 1007 additions and 5373 deletions

View file

@ -2,6 +2,14 @@
# Set to true to disable Langflow ingestion and use traditional OpenRAG processor
# If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete)
DISABLE_INGEST_WITH_LANGFLOW=false
# Langflow HTTP timeout configuration (in seconds)
# For large documents (300+ pages), ingestion can take 30+ minutes
# Increase these values if you experience timeouts with very large PDFs
# Default: 2400 seconds (40 minutes) total timeout, 30 seconds connection timeout
# LANGFLOW_TIMEOUT=2400
# LANGFLOW_CONNECT_TIMEOUT=30
# make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key
LANGFLOW_SECRET_KEY=

11
.github/dependabot.yml vendored Normal file
View file

@ -0,0 +1,11 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
commit-message:
prefix: "build(deps):"
include: scope

7
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,7 @@
repos:
- repo: https://github.com/Yelp/detect-secrets
rev: v1.5.0
hooks:
- id: detect-secrets
args: ["--baseline", ".secrets.baseline", "--exclude-lines", "code_hash"]

180
.secrets.baseline Normal file
View file

@ -0,0 +1,180 @@
{
"version": "1.5.0",
"plugins_used": [
{
"name": "ArtifactoryDetector"
},
{
"name": "AWSKeyDetector"
},
{
"name": "AzureStorageKeyDetector"
},
{
"name": "Base64HighEntropyString",
"limit": 4.5
},
{
"name": "BasicAuthDetector"
},
{
"name": "CloudantDetector"
},
{
"name": "DiscordBotTokenDetector"
},
{
"name": "GitHubTokenDetector"
},
{
"name": "GitLabTokenDetector"
},
{
"name": "HexHighEntropyString",
"limit": 3.0
},
{
"name": "IbmCloudIamDetector"
},
{
"name": "IbmCosHmacDetector"
},
{
"name": "IPPublicDetector"
},
{
"name": "JwtTokenDetector"
},
{
"name": "KeywordDetector",
"keyword_exclude": ""
},
{
"name": "MailchimpDetector"
},
{
"name": "NpmDetector"
},
{
"name": "OpenAIDetector"
},
{
"name": "PrivateKeyDetector"
},
{
"name": "PypiTokenDetector"
},
{
"name": "SendGridDetector"
},
{
"name": "SlackDetector"
},
{
"name": "SoftlayerDetector"
},
{
"name": "SquareOAuthDetector"
},
{
"name": "StripeDetector"
},
{
"name": "TelegramBotTokenDetector"
},
{
"name": "TwilioKeyDetector"
}
],
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
{
"path": "detect_secrets.filters.heuristic.is_likely_id_string"
},
{
"path": "detect_secrets.filters.heuristic.is_lock_file"
},
{
"path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
},
{
"path": "detect_secrets.filters.heuristic.is_potential_uuid"
},
{
"path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
},
{
"path": "detect_secrets.filters.heuristic.is_sequential_string"
},
{
"path": "detect_secrets.filters.heuristic.is_swagger_file"
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
},
{
"path": "detect_secrets.filters.regex.should_exclude_file",
"pattern": [
"flows/.*\\.json$"
]
},
{
"path": "detect_secrets.filters.regex.should_exclude_line",
"pattern": [
"code_hash"
]
}
],
"results": {
"docs/docs/_partial-integrate-chat.mdx": [
{
"type": "Secret Keyword",
"filename": "docs/docs/_partial-integrate-chat.mdx",
"hashed_secret": "e42fd8b9ad15d8fa5f4718cad7cf19b522807996",
"is_verified": false,
"line_number": 30
}
],
"src/main.py": [
{
"type": "Base64 High Entropy String",
"filename": "src/main.py",
"hashed_secret": "131a83e9ef8660d7dd0771da7ce5954d9ea801ee",
"is_verified": false,
"line_number": 404
}
],
"src/models/processors.py": [
{
"type": "Base64 High Entropy String",
"filename": "src/models/processors.py",
"hashed_secret": "131a83e9ef8660d7dd0771da7ce5954d9ea801ee",
"is_verified": false,
"line_number": 763
}
],
"src/services/langflow_file_service.py": [
{
"type": "Base64 High Entropy String",
"filename": "src/services/langflow_file_service.py",
"hashed_secret": "131a83e9ef8660d7dd0771da7ce5954d9ea801ee",
"is_verified": false,
"line_number": 97
}
]
},
"generated_at": "2025-12-09T20:33:13Z"
}

View file

@ -4787,7 +4787,7 @@
"is_component": false,
"locked": true,
"last_tested_version": "1.7.0.dev21",
"name": "OpenRAG OpenSearch Agent",
"name": "OpenRAG OpenSearch Agent Flow",
"tags": [
"assistants",
"agents"

View file

@ -4114,7 +4114,7 @@
"is_component": false,
"locked": true,
"last_tested_version": "1.7.0.dev21",
"name": "OpenRAG OpenSearch Nudges",
"name": "OpenRAG OpenSearch Nudges Flow",
"tags": [
"assistants",
"agents"

View file

@ -1,47 +0,0 @@
"use client";
import * as React from "react";
import { cn } from "@/lib/utils";
import { useDiscordMembers } from "@/hooks/use-discord-members";
import { formatCount } from "@/lib/format-count";
interface DiscordLinkProps {
inviteCode?: string;
className?: string;
}
const DiscordLink = React.forwardRef<HTMLAnchorElement, DiscordLinkProps>(
({ inviteCode = "EqksyE2EX9", className }, ref) => {
const { data, isLoading, error } = useDiscordMembers(inviteCode);
return (
<a
ref={ref}
href={`https://discord.gg/${inviteCode}`}
target="_blank"
rel="noopener noreferrer"
className={cn(
"inline-flex h-8 items-center justify-center rounded-md px-2 text-sm font-medium text-muted-foreground shadow-sm transition-colors hover:bg-accent hover:text-accent-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
className,
)}
>
<svg className="h-4 w-4" viewBox="0 0 24 24" fill="currentColor">
<path d="M20.317 4.37a19.791 19.791 0 0 0-4.885-1.515.074.074 0 0 0-.079.037c-.21.375-.444.864-.608 1.25a18.27 18.27 0 0 0-5.487 0 12.64 12.64 0 0 0-.617-1.25.077.077 0 0 0-.079-.037A19.736 19.736 0 0 0 3.677 4.37a.07.07 0 0 0-.032.027C.533 9.046-.32 13.58.099 18.057a.082.082 0 0 0 .031.057 19.9 19.9 0 0 0 5.993 3.03.078.078 0 0 0 .084-.028c.462-.63.874-1.295 1.226-1.994a.076.076 0 0 0-.041-.106 13.107 13.107 0 0 1-1.872-.892.077.077 0 0 1-.008-.128 10.2 10.2 0 0 0 .372-.292.074.074 0 0 1 .077-.01c3.928 1.793 8.18 1.793 12.062 0a.074.074 0 0 1 .078.01c.120.098.246.198.373.292a.077.077 0 0 1-.006.127 12.299 12.299 0 0 1-1.873.892.077.077 0 0 0-.041.107c.36.698.772 1.362 1.225 1.993a.076.076 0 0 0 .084.028 19.839 19.839 0 0 0 6.002-3.03.077.077 0 0 0 .032-.054c.5-5.177-.838-9.674-3.549-13.66a.061.061 0 0 0-.031-.03zM8.02 15.33c-1.183 0-2.157-1.085-2.157-2.419 0-1.333.956-2.419 2.157-2.419 1.21 0 2.176 1.096 2.157 2.42 0 1.333-.956 2.418-2.157 2.418zm7.975 0c-1.183 0-2.157-1.085-2.157-2.419 0-1.333.955-2.419 2.157-2.419 1.21 0 2.176 1.096 2.157 2.42 0 1.333-.946 2.418-2.157 2.418z" />
</svg>
<span className="hidden sm:inline ml-2">
{isLoading
? "..."
: error
? "--"
: data
? formatCount(data.approximate_member_count)
: "--"}
</span>
</a>
);
},
);
DiscordLink.displayName = "DiscordLink";
export { DiscordLink };

View file

@ -1,103 +0,0 @@
"use client";
import * as React from "react";
import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import { Loader2 } from "lucide-react";
interface FileUploadAreaProps {
onFileSelected?: (file: File) => void;
isLoading?: boolean;
className?: string;
}
const FileUploadArea = React.forwardRef<HTMLDivElement, FileUploadAreaProps>(
({ onFileSelected, isLoading = false, className }, ref) => {
const [isDragging, setIsDragging] = React.useState(false);
const fileInputRef = React.useRef<HTMLInputElement>(null);
const handleDragOver = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(false);
const files = Array.from(e.dataTransfer.files);
if (files.length > 0 && onFileSelected) {
onFileSelected(files[0]);
}
};
const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
const files = Array.from(e.target.files || []);
if (files.length > 0 && onFileSelected) {
onFileSelected(files[0]);
}
};
const handleClick = () => {
if (!isLoading) {
fileInputRef.current?.click();
}
};
return (
<div
ref={ref}
className={cn(
"relative flex min-h-[150px] w-full cursor-pointer flex-col items-center justify-center rounded-lg border-2 border-dashed border-border bg-background p-6 text-center transition-colors hover:bg-muted/50",
isDragging && "border-primary bg-primary/5",
isLoading && "cursor-not-allowed opacity-50",
className,
)}
onDragOver={handleDragOver}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
onClick={handleClick}
>
<input
ref={fileInputRef}
type="file"
onChange={handleFileSelect}
className="hidden"
disabled={isLoading}
/>
<div className="flex flex-col items-center gap-4">
{isLoading && (
<div className="rounded-full bg-muted p-4">
<Loader2 className="h-8 w-8 animate-spin text-muted-foreground" />
</div>
)}
<div className="space-y-2">
<h3 className="text-lg font-medium text-foreground">
{isLoading
? "Processing file..."
: "Drop files here or click to upload"}
</h3>
<p className="text-sm text-muted-foreground">
{isLoading
? "Please wait while your file is being processed"
: ""}
</p>
</div>
{!isLoading && <Button size="sm">+ Upload</Button>}
</div>
</div>
);
},
);
FileUploadArea.displayName = "FileUploadArea";
export { FileUploadArea };

View file

@ -1,47 +0,0 @@
"use client";
import * as React from "react";
import { cn } from "@/lib/utils";
import { Github } from "lucide-react";
import { useGitHubStars } from "@/hooks/use-github-stars";
import { formatCount } from "@/lib/format-count";
interface GitHubStarButtonProps {
repo?: string;
className?: string;
}
const GitHubStarButton = React.forwardRef<
HTMLAnchorElement,
GitHubStarButtonProps
>(({ repo = "phact/openrag", className }, ref) => {
const { data, isLoading, error } = useGitHubStars(repo);
return (
<a
ref={ref}
href={`https://github.com/${repo}`}
target="_blank"
rel="noopener noreferrer"
className={cn(
"inline-flex h-8 items-center justify-center rounded-md px-2 text-sm font-medium text-muted-foreground shadow-sm transition-colors hover:bg-accent hover:text-accent-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
className,
)}
>
<Github className="h-4 w-4" />
<span className="hidden sm:inline ml-2">
{isLoading
? "..."
: error
? "--"
: data
? formatCount(data.stargazers_count)
: "--"}
</span>
</a>
);
});
GitHubStarButton.displayName = "GitHubStarButton";
export { GitHubStarButton };

View file

@ -1,458 +0,0 @@
"use client";
import { useState, useEffect, useRef } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Card, CardContent } from "@/components/ui/card";
import { Label } from "@/components/ui/label";
import { Textarea } from "@/components/ui/textarea";
import {
ChevronDown,
Filter,
Search,
X,
Loader2,
Plus,
Save,
} from "lucide-react";
import { cn } from "@/lib/utils";
interface KnowledgeFilter {
id: string;
name: string;
description: string;
query_data: string;
owner: string;
created_at: string;
updated_at: string;
}
interface ParsedQueryData {
query: string;
filters: {
data_sources: string[];
document_types: string[];
owners: string[];
};
limit: number;
scoreThreshold: number;
}
interface KnowledgeFilterDropdownProps {
selectedFilter: KnowledgeFilter | null;
onFilterSelect: (filter: KnowledgeFilter | null) => void;
}
export function KnowledgeFilterDropdown({
selectedFilter,
onFilterSelect,
}: KnowledgeFilterDropdownProps) {
const [isOpen, setIsOpen] = useState(false);
const [filters, setFilters] = useState<KnowledgeFilter[]>([]);
const [loading, setLoading] = useState(false);
const [searchQuery, setSearchQuery] = useState("");
const [showCreateModal, setShowCreateModal] = useState(false);
const [createName, setCreateName] = useState("");
const [createDescription, setCreateDescription] = useState("");
const [creating, setCreating] = useState(false);
const dropdownRef = useRef<HTMLDivElement>(null);
const loadFilters = async (query = "") => {
setLoading(true);
try {
const response = await fetch("/api/knowledge-filter/search", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
query,
limit: 20, // Limit for dropdown
}),
});
const result = await response.json();
if (response.ok && result.success) {
setFilters(result.filters);
} else {
console.error("Failed to load knowledge filters:", result.error);
setFilters([]);
}
} catch (error) {
console.error("Error loading knowledge filters:", error);
setFilters([]);
} finally {
setLoading(false);
}
};
const deleteFilter = async (filterId: string, e: React.MouseEvent) => {
e.stopPropagation();
try {
const response = await fetch(`/api/knowledge-filter/${filterId}`, {
method: "DELETE",
});
if (response.ok) {
// Remove from local state
setFilters((prev) => prev.filter((f) => f.id !== filterId));
// If this was the selected filter, clear selection
if (selectedFilter?.id === filterId) {
onFilterSelect(null);
}
} else {
console.error("Failed to delete knowledge filter");
}
} catch (error) {
console.error("Error deleting knowledge filter:", error);
}
};
const handleFilterSelect = (filter: KnowledgeFilter) => {
onFilterSelect(filter);
setIsOpen(false);
};
const handleClearFilter = () => {
onFilterSelect(null);
setIsOpen(false);
};
const handleCreateNew = () => {
setIsOpen(false);
setShowCreateModal(true);
};
const handleCreateFilter = async () => {
if (!createName.trim()) return;
setCreating(true);
try {
// Create a basic filter with wildcards (match everything by default)
const defaultFilterData = {
query: "",
filters: {
data_sources: ["*"],
document_types: ["*"],
owners: ["*"],
},
limit: 10,
scoreThreshold: 0,
};
const response = await fetch("/api/knowledge-filter", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
name: createName.trim(),
description: createDescription.trim(),
queryData: JSON.stringify(defaultFilterData),
}),
});
const result = await response.json();
if (response.ok && result.success) {
// Create the new filter object
const newFilter: KnowledgeFilter = {
id: result.filter.id,
name: createName.trim(),
description: createDescription.trim(),
query_data: JSON.stringify(defaultFilterData),
owner: result.filter.owner,
created_at: result.filter.created_at,
updated_at: result.filter.updated_at,
};
// Add to local filters list
setFilters((prev) => [newFilter, ...prev]);
// Select the new filter
onFilterSelect(newFilter);
// Close modal and reset form
setShowCreateModal(false);
setCreateName("");
setCreateDescription("");
} else {
console.error("Failed to create knowledge filter:", result.error);
}
} catch (error) {
console.error("Error creating knowledge filter:", error);
} finally {
setCreating(false);
}
};
const handleCancelCreate = () => {
setShowCreateModal(false);
setCreateName("");
setCreateDescription("");
};
const getFilterSummary = (filter: KnowledgeFilter): string => {
try {
const parsed = JSON.parse(filter.query_data) as ParsedQueryData;
const parts = [];
if (parsed.query) parts.push(`"${parsed.query}"`);
if (parsed.filters.data_sources.length > 0)
parts.push(`${parsed.filters.data_sources.length} sources`);
if (parsed.filters.document_types.length > 0)
parts.push(`${parsed.filters.document_types.length} types`);
if (parsed.filters.owners.length > 0)
parts.push(`${parsed.filters.owners.length} owners`);
return parts.join(" • ") || "No filters";
} catch {
return "Invalid filter";
}
};
useEffect(() => {
if (isOpen) {
loadFilters();
}
}, [isOpen]);
useEffect(() => {
const timeoutId = setTimeout(() => {
if (isOpen) {
loadFilters(searchQuery);
}
}, 300);
return () => clearTimeout(timeoutId);
}, [searchQuery, isOpen]);
// Close dropdown when clicking outside
useEffect(() => {
const handleClickOutside = (event: MouseEvent) => {
if (
dropdownRef.current &&
!dropdownRef.current.contains(event.target as Node)
) {
setIsOpen(false);
}
};
document.addEventListener("mousedown", handleClickOutside);
return () => document.removeEventListener("mousedown", handleClickOutside);
}, []);
return (
<div className="relative" ref={dropdownRef}>
<Button
variant={selectedFilter ? "default" : "outline"}
size="sm"
onClick={() => setIsOpen(!isOpen)}
className={cn(
"flex items-center gap-2 h-8 px-3",
selectedFilter
? "hover:bg-primary hover:text-primary-foreground"
: "hover:bg-transparent hover:text-foreground hover:border-border",
)}
>
<Filter className="h-3 w-3" />
{selectedFilter ? (
<span className="max-w-32 truncate">{selectedFilter.name}</span>
) : (
<span>All Knowledge</span>
)}
<ChevronDown
className={cn("h-3 w-3 transition-transform", isOpen && "rotate-180")}
/>
</Button>
{isOpen && (
<Card className="absolute right-0 top-full mt-1 w-80 max-h-96 overflow-hidden z-50 shadow-lg border-border/50 bg-card/95 backdrop-blur-sm">
<CardContent className="p-0">
{/* Search Header */}
<div className="p-3 border-b border-border/50">
<div className="relative">
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 h-3 w-3 text-muted-foreground" />
<Input
placeholder="Search filters..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
className="pl-9 h-8 text-sm"
/>
</div>
</div>
{/* Filter List */}
<div className="max-h-64 overflow-y-auto">
{/* Clear filter option */}
<div
onClick={handleClearFilter}
className={cn(
"flex items-center gap-3 p-3 hover:bg-accent hover:text-accent-foreground cursor-pointer border-b border-border/30 transition-colors",
!selectedFilter && "bg-accent text-accent-foreground",
)}
>
<div className="flex items-center gap-2 flex-1">
<Filter className="h-4 w-4 text-muted-foreground" />
<div>
<div className="text-sm font-medium">All Knowledge</div>
<div className="text-xs text-muted-foreground">
No filters applied
</div>
</div>
</div>
</div>
{loading ? (
<div className="flex items-center justify-center p-4">
<Loader2 className="h-4 w-4 animate-spin" />
<span className="ml-2 text-sm text-muted-foreground">
Loading...
</span>
</div>
) : filters.length === 0 ? (
<div className="p-4 text-center text-sm text-muted-foreground">
{searchQuery ? "No filters found" : "No saved filters"}
</div>
) : (
filters.map((filter) => (
<div
key={filter.id}
onClick={() => handleFilterSelect(filter)}
className={cn(
"flex items-center gap-3 p-3 hover:bg-accent hover:text-accent-foreground cursor-pointer group transition-colors",
selectedFilter?.id === filter.id &&
"bg-accent text-accent-foreground",
)}
>
<div className="flex items-center gap-2 flex-1 min-w-0">
<Filter className="h-4 w-4 text-muted-foreground group-hover:text-accent-foreground flex-shrink-0" />
<div className="min-w-0 flex-1">
<div className="text-sm font-medium truncate group-hover:text-accent-foreground">
{filter.name}
</div>
<div className="text-xs text-muted-foreground group-hover:text-accent-foreground/70 truncate">
{getFilterSummary(filter)}
</div>
</div>
</div>
<Button
variant="ghost"
size="sm"
onClick={(e) => deleteFilter(filter.id, e)}
className="opacity-0 group-hover:opacity-100 h-6 w-6 p-0 bg-transparent hover:bg-gray-700 hover:text-white transition-all duration-200 border border-transparent hover:border-gray-600"
>
<X className="h-3 w-3 text-gray-400 hover:text-white" />
</Button>
</div>
))
)}
</div>
{/* Create New Filter Option */}
<div className="border-t border-border/50">
<div
onClick={handleCreateNew}
className="flex items-center gap-3 p-3 hover:bg-accent hover:text-accent-foreground cursor-pointer transition-colors"
>
<Plus className="h-4 w-4 text-green-500" />
<div>
<div className="text-sm font-medium text-green-600">
Create New Filter
</div>
<div className="text-xs text-muted-foreground">
Save current search as filter
</div>
</div>
</div>
</div>
{/* Selected Filter Details */}
{selectedFilter && (
<div className="border-t border-border/50 p-3 bg-muted/20">
<div className="text-xs text-muted-foreground">
<strong>Selected:</strong> {selectedFilter.name}
</div>
{selectedFilter.description && (
<div className="text-xs text-muted-foreground mt-1 line-clamp-2">
{selectedFilter.description}
</div>
)}
</div>
)}
</CardContent>
</Card>
)}
{/* Create Filter Modal */}
{showCreateModal && (
<div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
<div className="bg-card border border-border rounded-lg p-6 w-full max-w-md mx-4">
<h3 className="text-lg font-semibold mb-4">
Create New Knowledge Filter
</h3>
<div className="space-y-4">
<div>
<Label htmlFor="filter-name" className="font-medium">
Name <span className="text-red-400">*</span>
</Label>
<Input
id="filter-name"
type="text"
placeholder="Enter filter name"
value={createName}
onChange={(e) => setCreateName(e.target.value)}
className="mt-1"
/>
</div>
<div>
<Label htmlFor="filter-description" className="font-medium">
Description (optional)
</Label>
<Textarea
id="filter-description"
placeholder="Brief description of this filter"
value={createDescription}
onChange={(e) => setCreateDescription(e.target.value)}
className="mt-1"
rows={3}
/>
</div>
</div>
<div className="flex justify-end gap-2 mt-6">
<Button
variant="outline"
onClick={handleCancelCreate}
disabled={creating}
>
Cancel
</Button>
<Button
onClick={handleCreateFilter}
disabled={!createName.trim() || creating}
className="flex items-center gap-2"
>
{creating ? (
<>
<Loader2 className="h-4 w-4 animate-spin" />
Creating...
</>
) : (
<>
<Save className="h-4 w-4" />
Create Filter
</>
)}
</Button>
</div>
</div>
</div>
)}
</div>
);
}

View file

@ -15,6 +15,7 @@ import {
} from "@/components/provider-health-banner";
import { TaskNotificationMenu } from "@/components/task-notification-menu";
import { useAuth } from "@/contexts/auth-context";
import { useChat } from "@/contexts/chat-context";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import { cn } from "@/lib/utils";
@ -27,6 +28,7 @@ export function LayoutWrapper({ children }: { children: React.ReactNode }) {
const { isMenuOpen } = useTask();
const { isPanelOpen } = useKnowledgeFilter();
const { isLoading, isAuthenticated, isNoAuthMode } = useAuth();
const { isOnboardingComplete } = useChat();
// List of paths that should not show navigation
const authPaths = ["/login", "/auth/callback"];
@ -91,17 +93,17 @@ export function LayoutWrapper({ children }: { children: React.ReactNode }) {
isOpen={isDoclingUnhealthy}
className="w-full"
>
<DoclingHealthBanner />
<DoclingHealthBanner />
</AnimatedConditional>
{settings?.edited && isOnboardingComplete && (
<AnimatedConditional
vertical
isOpen={isProviderUnhealthy}
className="w-full"
>
<ProviderHealthBanner />
</AnimatedConditional>
{settings?.edited && (
<AnimatedConditional
vertical
isOpen={isProviderUnhealthy}
className="w-full"
>
<ProviderHealthBanner />
</AnimatedConditional>
)}
)}
</div>
<ChatRenderer settings={settings}>{children}</ChatRenderer>

View file

@ -1,48 +0,0 @@
"use client";
import {
Card,
CardContent,
CardDescription,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import { Button } from "@/components/ui/button";
import { useAuth } from "@/contexts/auth-context";
import { Lock, LogIn } from "lucide-react";
interface LoginRequiredProps {
title?: string;
description?: string;
feature?: string;
}
export function LoginRequired({
title = "Authentication Required",
description = "You need to sign in to access this feature",
feature,
}: LoginRequiredProps) {
const { login } = useAuth();
return (
<div className="flex items-center justify-center min-h-[400px]">
<Card className="max-w-md mx-auto">
<CardHeader className="text-center">
<div className="flex items-center justify-center w-12 h-12 bg-primary/10 rounded-full mx-auto mb-4">
<Lock className="h-6 w-6 text-primary" />
</div>
<CardTitle>{title}</CardTitle>
<CardDescription>
{feature ? `You need to sign in to access ${feature}` : description}
</CardDescription>
</CardHeader>
<CardContent className="text-center">
<Button onClick={login} className="w-full">
<LogIn className="h-4 w-4 mr-2" />
Sign In with Google
</Button>
</CardContent>
</Card>
</div>
);
}

View file

@ -1,23 +0,0 @@
"use client";
import * as React from "react";
import { Moon, Sun } from "lucide-react";
import { useTheme } from "next-themes";
import { Button } from "@/components/ui/button";
export function ModeToggle() {
const { theme, setTheme } = useTheme();
return (
<Button
variant="ghost"
size="icon"
onClick={() => setTheme(theme === "light" ? "dark" : "light")}
>
<Sun className="h-[1.2rem] w-[1.2rem] rotate-0 scale-100 transition-all dark:-rotate-90 dark:scale-0" />
<Moon className="absolute h-[1.2rem] w-[1.2rem] rotate-90 scale-0 transition-all dark:rotate-0 dark:scale-100" />
<span className="sr-only">Toggle theme</span>
</Button>
);
}

View file

@ -1,81 +0,0 @@
"use client";
import { usePathname } from "next/navigation";
import {
useGetConversationsQuery,
type ChatConversation,
} from "@/app/api/queries/useGetConversationsQuery";
import { KnowledgeFilterDropdown } from "@/components/knowledge-filter-dropdown";
import { ModeToggle } from "@/components/mode-toggle";
import { Navigation } from "@/components/navigation";
import { useAuth } from "@/contexts/auth-context";
import { useChat } from "@/contexts/chat-context";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
interface NavigationLayoutProps {
children: React.ReactNode;
}
export function NavigationLayout({ children }: NavigationLayoutProps) {
const { selectedFilter, setSelectedFilter } = useKnowledgeFilter();
const pathname = usePathname();
const { isAuthenticated, isNoAuthMode } = useAuth();
const {
endpoint,
refreshTrigger,
refreshConversations,
startNewConversation,
} = useChat();
// Only fetch conversations on chat page
const isOnChatPage = pathname === "/" || pathname === "/chat";
const { data: conversations = [], isLoading: isConversationsLoading } =
useGetConversationsQuery(endpoint, refreshTrigger, {
enabled: isOnChatPage && (isAuthenticated || isNoAuthMode),
}) as { data: ChatConversation[]; isLoading: boolean };
const handleNewConversation = () => {
refreshConversations();
startNewConversation();
};
return (
<div className="h-full relative">
<div className="hidden h-full md:flex md:w-72 md:flex-col md:fixed md:inset-y-0 z-[80] border-r border-border/40">
<Navigation
conversations={conversations}
isConversationsLoading={isConversationsLoading}
onNewConversation={handleNewConversation}
/>
</div>
<main className="md:pl-72">
<div className="flex flex-col min-h-screen">
<header className="sticky top-0 z-40 w-full border-b border-border/40 bg-background">
<div className="container flex h-14 max-w-screen-2xl items-center">
<div className="mr-4 hidden md:flex">
<h1 className="text-lg font-semibold tracking-tight">
OpenRAG
</h1>
</div>
<div className="flex flex-1 items-center justify-between space-x-2 md:justify-end">
<div className="w-full flex-1 md:w-auto md:flex-none">
{/* Search component could go here */}
</div>
<nav className="flex items-center space-x-2">
<KnowledgeFilterDropdown
selectedFilter={selectedFilter}
onFilterSelect={setSelectedFilter}
/>
<ModeToggle />
</nav>
</div>
</div>
</header>
<div className="flex-1">
<div className="container py-6 lg:py-8">{children}</div>
</div>
</div>
</main>
</div>
);
}

View file

@ -1,30 +0,0 @@
"use client";
import * as React from "react";
import * as CheckboxPrimitive from "@radix-ui/react-checkbox";
import { Check } from "lucide-react";
import { cn } from "@/lib/utils";
const Checkbox = React.forwardRef<
React.ElementRef<typeof CheckboxPrimitive.Root>,
React.ComponentPropsWithoutRef<typeof CheckboxPrimitive.Root>
>(({ className, ...props }, ref) => (
<CheckboxPrimitive.Root
ref={ref}
className={cn(
"peer h-4 w-4 shrink-0 rounded-sm border border-primary ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground",
className,
)}
{...props}
>
<CheckboxPrimitive.Indicator
className={cn("flex items-center justify-center text-current")}
>
<Check className="h-4 w-4" />
</CheckboxPrimitive.Indicator>
</CheckboxPrimitive.Root>
));
Checkbox.displayName = CheckboxPrimitive.Root.displayName;
export { Checkbox };

View file

@ -1,158 +0,0 @@
"use client";
import { motion } from "motion/react";
import type React from "react";
import { useEffect, useId, useRef, useState } from "react";
import { cn } from "@/lib/utils";
/**
* DotPattern Component Props
*
* @param {number} [width=16] - The horizontal spacing between dots
* @param {number} [height=16] - The vertical spacing between dots
* @param {number} [x=0] - The x-offset of the entire pattern
* @param {number} [y=0] - The y-offset of the entire pattern
* @param {number} [cx=1] - The x-offset of individual dots
* @param {number} [cy=1] - The y-offset of individual dots
* @param {number} [cr=1] - The radius of each dot
* @param {string} [className] - Additional CSS classes to apply to the SVG container
* @param {boolean} [glow=false] - Whether dots should have a glowing animation effect
*/
interface DotPatternProps extends React.SVGProps<SVGSVGElement> {
width?: number;
height?: number;
x?: number;
y?: number;
cx?: number;
cy?: number;
cr?: number;
className?: string;
glow?: boolean;
[key: string]: unknown;
}
/**
* DotPattern Component
*
* A React component that creates an animated or static dot pattern background using SVG.
* The pattern automatically adjusts to fill its container and can optionally display glowing dots.
*
* @component
*
* @see DotPatternProps for the props interface.
*
* @example
* // Basic usage
* <DotPattern />
*
* // With glowing effect and custom spacing
* <DotPattern
* width={20}
* height={20}
* glow={true}
* className="opacity-50"
* />
*
* @notes
* - The component is client-side only ("use client")
* - Automatically responds to container size changes
* - When glow is enabled, dots will animate with random delays and durations
* - Uses Motion for animations
* - Dots color can be controlled via the text color utility classes
*/
export function DotPattern({
width = 16,
height = 16,
x = 0,
y = 0,
cx = 1,
cy = 1,
cr = 1,
className,
glow = false,
...props
}: DotPatternProps) {
const id = useId();
const containerRef = useRef<SVGSVGElement>(null);
const [dimensions, setDimensions] = useState({ width: 0, height: 0 });
useEffect(() => {
const updateDimensions = () => {
if (containerRef.current) {
const { width, height } = containerRef.current.getBoundingClientRect();
setDimensions({ width, height });
}
};
updateDimensions();
window.addEventListener("resize", updateDimensions);
return () => window.removeEventListener("resize", updateDimensions);
}, []);
const dots = Array.from(
{
length:
Math.ceil(dimensions.width / width) *
Math.ceil(dimensions.height / height),
},
(_, i) => {
const col = i % Math.ceil(dimensions.width / width);
const row = Math.floor(i / Math.ceil(dimensions.width / width));
return {
x: col * width + cx,
y: row * height + cy,
delay: Math.random() * 5,
duration: Math.random() * 3 + 2,
};
},
);
return (
<svg
ref={containerRef}
aria-hidden="true"
className={cn(
"pointer-events-none absolute inset-0 h-full w-full text-neutral-400/80",
className,
)}
{...props}
>
<defs>
<radialGradient id={`${id}-gradient`}>
<stop offset="0%" stopColor="currentColor" stopOpacity="1" />
<stop offset="100%" stopColor="currentColor" stopOpacity="0" />
</radialGradient>
</defs>
{dots.map((dot, index) => (
<motion.circle
key={`${dot.x}-${dot.y}`}
cx={dot.x}
cy={dot.y}
r={cr}
fill={glow ? `url(#${id}-gradient)` : "currentColor"}
initial={glow ? { opacity: 0.4, scale: 1 } : {}}
animate={
glow
? {
opacity: [0.4, 1, 0.4],
scale: [1, 1.5, 1],
}
: {}
}
transition={
glow
? {
duration: dot.duration,
repeat: Infinity,
repeatType: "reverse",
delay: dot.delay,
ease: "easeInOut",
}
: {}
}
/>
))}
</svg>
);
}

View file

@ -1,67 +0,0 @@
import {
Select,
SelectContent,
SelectTrigger,
SelectValue,
} from "@radix-ui/react-select";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@radix-ui/react-tooltip";
import type { ModelOption } from "@/app/api/queries/useGetModelsQuery";
import {
getFallbackModels,
type ModelProvider,
} from "@/app/settings/_helpers/model-helpers";
import { ModelSelectItems } from "@/app/settings/_helpers/model-select-item";
import { LabelWrapper } from "@/components/label-wrapper";
interface EmbeddingModelInputProps {
disabled?: boolean;
value: string;
onChange: (value: string) => void;
modelsData?: {
embedding_models: ModelOption[];
};
currentProvider?: ModelProvider;
}
export const EmbeddingModelInput = ({
disabled,
value,
onChange,
modelsData,
currentProvider = "openai",
}: EmbeddingModelInputProps) => {
const isDisabled = Boolean(disabled);
const tooltipMessage = isDisabled
? "Locked to keep embeddings consistent"
: "Choose the embedding model for ingest and retrieval";
return (
<LabelWrapper
helperText="Model used for knowledge ingest and retrieval"
id="embedding-model-select"
label="Embedding model"
>
<Select disabled={isDisabled} value={value} onValueChange={onChange}>
<Tooltip delayDuration={0}>
<TooltipTrigger asChild>
<SelectTrigger disabled={isDisabled} id="embedding-model-select">
<SelectValue placeholder="Select an embedding model" />
</SelectTrigger>
</TooltipTrigger>
<TooltipContent>{tooltipMessage}</TooltipContent>
</Tooltip>
<SelectContent>
<ModelSelectItems
models={modelsData?.embedding_models || []}
fallbackModels={getFallbackModels(currentProvider).embedding || []}
provider={currentProvider}
/>
</SelectContent>
</Select>
</LabelWrapper>
);
};

View file

@ -1,128 +0,0 @@
import * as React from "react";
import * as NavigationMenuPrimitive from "@radix-ui/react-navigation-menu";
import { cva } from "class-variance-authority";
import { ChevronDown } from "lucide-react";
import { cn } from "@/lib/utils";
const NavigationMenu = React.forwardRef<
React.ElementRef<typeof NavigationMenuPrimitive.Root>,
React.ComponentPropsWithoutRef<typeof NavigationMenuPrimitive.Root>
>(({ className, children, ...props }, ref) => (
<NavigationMenuPrimitive.Root
ref={ref}
className={cn(
"relative z-10 flex max-w-max flex-1 items-center justify-center",
className,
)}
{...props}
>
{children}
<NavigationMenuViewport />
</NavigationMenuPrimitive.Root>
));
NavigationMenu.displayName = NavigationMenuPrimitive.Root.displayName;
const NavigationMenuList = React.forwardRef<
React.ElementRef<typeof NavigationMenuPrimitive.List>,
React.ComponentPropsWithoutRef<typeof NavigationMenuPrimitive.List>
>(({ className, ...props }, ref) => (
<NavigationMenuPrimitive.List
ref={ref}
className={cn(
"group flex flex-1 list-none items-center justify-center space-x-1",
className,
)}
{...props}
/>
));
NavigationMenuList.displayName = NavigationMenuPrimitive.List.displayName;
const NavigationMenuItem = NavigationMenuPrimitive.Item;
const navigationMenuTriggerStyle = cva(
"group inline-flex h-10 w-max items-center justify-center rounded-md bg-background px-4 py-2 text-sm font-medium transition-colors hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground focus:outline-none disabled:pointer-events-none disabled:opacity-50 data-[state=open]:text-accent-foreground data-[state=open]:bg-accent/50 data-[state=open]:hover:bg-accent data-[state=open]:focus:bg-accent",
);
const NavigationMenuTrigger = React.forwardRef<
React.ElementRef<typeof NavigationMenuPrimitive.Trigger>,
React.ComponentPropsWithoutRef<typeof NavigationMenuPrimitive.Trigger>
>(({ className, children, ...props }, ref) => (
<NavigationMenuPrimitive.Trigger
ref={ref}
className={cn(navigationMenuTriggerStyle(), "group", className)}
{...props}
>
{children}{" "}
<ChevronDown
className="relative top-[1px] ml-1 h-3 w-3 transition duration-200 group-data-[state=open]:rotate-180"
aria-hidden="true"
/>
</NavigationMenuPrimitive.Trigger>
));
NavigationMenuTrigger.displayName = NavigationMenuPrimitive.Trigger.displayName;
const NavigationMenuContent = React.forwardRef<
React.ElementRef<typeof NavigationMenuPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof NavigationMenuPrimitive.Content>
>(({ className, ...props }, ref) => (
<NavigationMenuPrimitive.Content
ref={ref}
className={cn(
"left-0 top-0 w-full data-[motion^=from-]:animate-in data-[motion^=to-]:animate-out data-[motion^=from-]:fade-in data-[motion^=to-]:fade-out data-[motion=from-end]:slide-in-from-right-52 data-[motion=from-start]:slide-in-from-left-52 data-[motion=to-end]:slide-out-to-right-52 data-[motion=to-start]:slide-out-to-left-52 md:absolute md:w-auto ",
className,
)}
{...props}
/>
));
NavigationMenuContent.displayName = NavigationMenuPrimitive.Content.displayName;
const NavigationMenuLink = NavigationMenuPrimitive.Link;
const NavigationMenuViewport = React.forwardRef<
React.ElementRef<typeof NavigationMenuPrimitive.Viewport>,
React.ComponentPropsWithoutRef<typeof NavigationMenuPrimitive.Viewport>
>(({ className, ...props }, ref) => (
<div className={cn("absolute left-0 top-full flex justify-center")}>
<NavigationMenuPrimitive.Viewport
className={cn(
"origin-top-center relative mt-1.5 h-[var(--radix-navigation-menu-viewport-height)] w-full overflow-hidden rounded-md border bg-popover text-popover-foreground shadow-lg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-90 md:w-[var(--radix-navigation-menu-viewport-width)]",
className,
)}
ref={ref}
{...props}
/>
</div>
));
NavigationMenuViewport.displayName =
NavigationMenuPrimitive.Viewport.displayName;
const NavigationMenuIndicator = React.forwardRef<
React.ElementRef<typeof NavigationMenuPrimitive.Indicator>,
React.ComponentPropsWithoutRef<typeof NavigationMenuPrimitive.Indicator>
>(({ className, ...props }, ref) => (
<NavigationMenuPrimitive.Indicator
ref={ref}
className={cn(
"top-full z-[1] flex h-1.5 items-end justify-center overflow-hidden data-[state=visible]:animate-in data-[state=hidden]:animate-out data-[state=hidden]:fade-out data-[state=visible]:fade-in",
className,
)}
{...props}
>
<div className="relative top-[60%] h-2 w-2 rotate-45 rounded-tl-sm bg-border shadow-md" />
</NavigationMenuPrimitive.Indicator>
));
NavigationMenuIndicator.displayName =
NavigationMenuPrimitive.Indicator.displayName;
export {
navigationMenuTriggerStyle,
NavigationMenu,
NavigationMenuList,
NavigationMenuItem,
NavigationMenuContent,
NavigationMenuTrigger,
NavigationMenuLink,
NavigationMenuIndicator,
NavigationMenuViewport,
};

View file

@ -1,44 +0,0 @@
"use client";
import * as React from "react";
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group";
import { Circle } from "lucide-react";
import { cn } from "@/lib/utils";
const RadioGroup = React.forwardRef<
React.ElementRef<typeof RadioGroupPrimitive.Root>,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root>
>(({ className, ...props }, ref) => {
return (
<RadioGroupPrimitive.Root
className={cn("grid gap-2", className)}
{...props}
ref={ref}
/>
);
});
RadioGroup.displayName = RadioGroupPrimitive.Root.displayName;
const RadioGroupItem = React.forwardRef<
React.ElementRef<typeof RadioGroupPrimitive.Item>,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item>
>(({ className, ...props }, ref) => {
return (
<RadioGroupPrimitive.Item
ref={ref}
className={cn(
"aspect-square h-4 w-4 rounded-full border border-input text-primary ring-offset-background focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
className,
)}
{...props}
>
<RadioGroupPrimitive.Indicator className="flex items-center justify-center">
<Circle className="h-2.5 w-2.5 fill-current text-current" />
</RadioGroupPrimitive.Indicator>
</RadioGroupPrimitive.Item>
);
});
RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName;
export { RadioGroup, RadioGroupItem };

View file

@ -1,31 +0,0 @@
"use client";
import * as SeparatorPrimitive from "@radix-ui/react-separator";
import * as React from "react";
import { cn } from "@/lib/utils";
const Separator = React.forwardRef<
React.ElementRef<typeof SeparatorPrimitive.Root>,
React.ComponentPropsWithoutRef<typeof SeparatorPrimitive.Root>
>(
(
{ className, orientation = "horizontal", decorative = true, ...props },
ref,
) => (
<SeparatorPrimitive.Root
ref={ref}
decorative={decorative}
orientation={orientation}
className={cn(
"shrink-0 bg-border",
orientation === "horizontal" ? "h-[1px] w-full" : "h-full w-[1px]",
className,
)}
{...props}
/>
),
);
Separator.displayName = SeparatorPrimitive.Root.displayName;
export { Separator };

View file

@ -1,56 +0,0 @@
import * as React from "react";
interface DiscordData {
approximate_member_count: number;
approximate_presence_count: number;
guild: {
name: string;
icon: string;
};
}
export const useDiscordMembers = (inviteCode: string) => {
const [data, setData] = React.useState<DiscordData | null>(null);
const [isLoading, setIsLoading] = React.useState(true);
const [error, setError] = React.useState<string | null>(null);
React.useEffect(() => {
const fetchDiscordData = async () => {
try {
setIsLoading(true);
setError(null);
const response = await fetch(
`https://discord.com/api/v10/invites/${inviteCode}?with_counts=true&with_expiration=true`,
{
headers: {
"Content-Type": "application/json",
},
},
);
if (!response.ok) {
throw new Error(`Discord API error: ${response.status}`);
}
const discordData = await response.json();
setData(discordData);
} catch (err) {
setError(
err instanceof Error ? err.message : "Failed to fetch Discord data",
);
console.error("Discord API Error:", err);
} finally {
setIsLoading(false);
}
};
fetchDiscordData();
// Refresh every 10 minutes
const interval = setInterval(fetchDiscordData, 10 * 60 * 1000);
return () => clearInterval(interval);
}, [inviteCode]);
return { data, isLoading, error };
};

View file

@ -1,52 +0,0 @@
import * as React from "react";
interface GitHubData {
stargazers_count: number;
forks_count: number;
open_issues_count: number;
}
export const useGitHubStars = (repo: string) => {
const [data, setData] = React.useState<GitHubData | null>(null);
const [isLoading, setIsLoading] = React.useState(true);
const [error, setError] = React.useState<string | null>(null);
React.useEffect(() => {
const fetchGitHubData = async () => {
try {
setIsLoading(true);
setError(null);
const response = await fetch(`https://api.github.com/repos/${repo}`, {
headers: {
Accept: "application/vnd.github.v3+json",
// Optional: Add your GitHub token for higher rate limits
// 'Authorization': `Bearer ${process.env.NEXT_PUBLIC_GITHUB_TOKEN}`,
},
});
if (!response.ok) {
throw new Error(`GitHub API error: ${response.status}`);
}
const repoData = await response.json();
setData(repoData);
} catch (err) {
setError(
err instanceof Error ? err.message : "Failed to fetch GitHub data",
);
console.error("GitHub API Error:", err);
} finally {
setIsLoading(false);
}
};
fetchGitHubData();
// Refresh every 5 minutes
const interval = setInterval(fetchGitHubData, 5 * 60 * 1000);
return () => clearInterval(interval);
}, [repo]);
return { data, isLoading, error };
};

View file

@ -162,6 +162,19 @@ export function useChatStreaming({
if (line.trim()) {
try {
const chunk = JSON.parse(line);
// Investigation logging for Granite 3.3 8b tool call detection
const chunkKeys = Object.keys(chunk);
const toolRelatedKeys = chunkKeys.filter(key =>
key.toLowerCase().includes('tool') ||
key.toLowerCase().includes('call') ||
key.toLowerCase().includes('retrieval') ||
key.toLowerCase().includes('function') ||
key.toLowerCase().includes('result')
);
if (toolRelatedKeys.length > 0) {
console.log('[Tool Detection] Found tool-related keys:', toolRelatedKeys, chunk);
}
// Extract response ID if present
if (chunk.id) {
@ -449,6 +462,42 @@ export function useChatStreaming({
}
}
}
// Heuristic detection for implicit tool calls (Granite 3.3 8b workaround)
// Check if chunk contains retrieval results without explicit tool call markers
const hasImplicitToolCall = (
// Check for various result indicators in the chunk
(chunk.results && Array.isArray(chunk.results) && chunk.results.length > 0) ||
(chunk.outputs && Array.isArray(chunk.outputs) && chunk.outputs.length > 0) ||
// Check for retrieval-related fields
chunk.retrieved_documents ||
chunk.retrieval_results ||
// Check for nested data structures that might contain results
(chunk.data && typeof chunk.data === 'object' && (
chunk.data.results ||
chunk.data.retrieved_documents ||
chunk.data.retrieval_results
))
);
if (hasImplicitToolCall && currentFunctionCalls.length === 0) {
console.log('[Heuristic Detection] Detected implicit tool call:', chunk);
// Create a synthetic function call for the UI
const results = chunk.results || chunk.outputs || chunk.retrieved_documents ||
chunk.retrieval_results || chunk.data?.results ||
chunk.data?.retrieved_documents || [];
const syntheticFunctionCall: FunctionCall = {
name: "Retrieval",
arguments: { implicit: true, detected_heuristically: true },
status: "completed",
type: "retrieval_call",
result: results,
};
currentFunctionCalls.push(syntheticFunctionCall);
console.log('[Heuristic Detection] Created synthetic function call');
}
// Update streaming message in real-time
if (
@ -486,6 +535,29 @@ export function useChatStreaming({
"No response received from the server. Please try again.",
);
}
// Post-processing: Heuristic detection based on final content
// If no explicit tool calls detected but content shows RAG indicators
if (currentFunctionCalls.length === 0 && currentContent) {
// Check for citation patterns that indicate RAG usage
const hasCitations = /\(Source:|\[Source:|\bSource:|filename:|document:/i.test(currentContent);
// Check for common RAG response patterns
const hasRAGPattern = /based on.*(?:document|file|information|data)|according to.*(?:document|file)/i.test(currentContent);
if (hasCitations || hasRAGPattern) {
console.log('[Post-Processing] Detected RAG usage from content patterns');
const syntheticFunctionCall: FunctionCall = {
name: "Retrieval",
arguments: {
implicit: true,
detected_from: hasCitations ? "citations" : "content_patterns"
},
status: "completed",
type: "retrieval_call",
};
currentFunctionCalls.push(syntheticFunctionCall);
}
}
// Finalize the message
const finalMessage: Message = {

18
frontend/knip.config.ts Normal file
View file

@ -0,0 +1,18 @@
import type { KnipConfig } from 'knip';
const config: KnipConfig = {
entry: [
'app/**/*.{ts,tsx}',
'next.config.ts',
],
project: ['**/*.{ts,tsx}'],
ignore: [
'**/*.d.ts',
'**/node_modules/**',
'.next/**',
'public/**',
],
};
export default config;

View file

@ -1,13 +0,0 @@
export const formatCount = (count: number): string => {
if (count >= 1_000_000) {
return `${(count / 1_000_000).toFixed(1)}M`;
}
if (count >= 1_000) {
return `${(count / 1_000).toFixed(1)}k`;
}
return count.toLocaleString();
};
export const formatExactCount = (count: number): string => {
return count.toLocaleString();
};

File diff suppressed because it is too large Load diff

View file

@ -8,23 +8,18 @@
"start": "next start",
"lint": "next lint",
"check-format": "npx @biomejs/biome check",
"format": "npx @biomejs/biome format --write"
"format": "npx @biomejs/biome format --write",
"knip": "knip"
},
"dependencies": {
"@microsoft/mgt-components": "^4.6.0",
"@microsoft/mgt-msal2-provider": "^4.6.0",
"@radix-ui/react-accordion": "^1.2.12",
"@radix-ui/react-avatar": "^1.1.10",
"@radix-ui/react-checkbox": "^1.3.2",
"@radix-ui/react-collapsible": "^1.1.11",
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-dropdown-menu": "^2.1.15",
"@radix-ui/react-label": "^2.1.7",
"@radix-ui/react-navigation-menu": "^1.2.13",
"@radix-ui/react-popover": "^1.1.15",
"@radix-ui/react-radio-group": "^1.3.8",
"@radix-ui/react-select": "^2.2.5",
"@radix-ui/react-separator": "^1.1.7",
"@radix-ui/react-slider": "^1.3.6",
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-switch": "^1.2.5",
@ -47,7 +42,6 @@
"react-dom": "^19.0.0",
"react-dropzone": "^14.3.8",
"react-hook-form": "^7.65.0",
"react-icons": "^5.5.0",
"react-markdown": "^10.1.0",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.9",
@ -68,8 +62,7 @@
"@types/react-dom": "^19",
"@types/react-syntax-highlighter": "^15.5.13",
"autoprefixer": "^10.4.21",
"eslint": "^9",
"eslint-config-next": "15.3.5",
"knip": "^5.73.1",
"postcss": "^8.5.6",
"tailwindcss": "^3.4.17",
"typescript": "^5"

View file

@ -36,7 +36,7 @@ def get_conversation_thread(user_id: str, previous_response_id: str = None):
"messages": [
{
"role": "system",
"content": "You are the OpenRAG Agent. You answer questions using retrieval, reasoning, and tool use.\nYou have access to several tools. Your job is to determine **which tool to use and when**.\n### Available Tools\n- OpenSearch Retrieval Tool:\n Use this to search the indexed knowledge base. Use when the user asks about product details, internal concepts, processes, architecture, documentation, roadmaps, or anything that may be stored in the index.\n- Conversation History:\n Use this to maintain continuity when the user is referring to previous turns. \n Do not treat history as a factual source.\n- Conversation File Context:\n Use this when the user asks about a document they uploaded or refers directly to its contents.\n- URL Ingestion Tool:\n Use this **only** when the user explicitly asks you to read, summarize, or analyze the content of a URL.\n Do not ingest URLs automatically.\n- Calculator / Expression Evaluation Tool:\n Use this when the user asks to compare numbers, compute estimates, calculate totals, analyze pricing, or answer any question requiring mathematics or quantitative reasoning.\n If the answer requires arithmetic, call the calculator tool rather than calculating internally.\n### Retrieval Decision Rules\nUse OpenSearch **whenever**:\n1. The question may be answered from internal or indexed data.\n2. The user references team names, product names, release plans, configurations, requirements, or official information.\n3. The user needs a factual, grounded answer.\nDo **not** use retrieval if:\n- The question is purely creative (e.g., storytelling, analogies) or personal preference.\n- The user simply wants text reformatted or rewritten from what is already present in the conversation.\nWhen uncertain → **Retrieve.** Retrieval is low risk and improves grounding.\n### URL Ingestion Rules\nOnly ingest URLs when the user explicitly says:\n- \"Read this link\"\n- \"Summarize this webpage\"\n- \"What does this site say?\"\n- \"Ingest this URL\"\nIf unclear → ask a clarifying question.\n### Calculator Usage Rules\nUse the calculator when:\n- Performing arithmetic\n- Estimating totals\n- Comparing values\n- Modeling cost, time, effort, scale, or projections\nDo not perform math internally. **Call the calculator tool instead.**\n### Answer Construction Rules\n1. When asked: \"What is OpenRAG\", answer the following:\n\"OpenRAG is an open-source package for building agentic RAG systems. It supports integration with a wide range of orchestration tools, vector databases, and LLM providers. OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:\n**Langflow** Langflow is a powerful tool to build and deploy AI agents and MCP servers. [Read more](https://www.langflow.org/)\n**OpenSearch** OpenSearch is an open source, search and observability suite that brings order to unstructured data at scale. [Read more](https://opensearch.org/)\n**Docling** Docling simplifies document processing with advanced PDF understanding, OCR support, and seamless AI integrations. Parse PDFs, DOCX, PPTX, images & more. [Read more](https://www.docling.ai/)\"\n2. Synthesize retrieved or ingested content in your own words.\n3. Support factual claims with citations in the format:\n (Source: <document_name_or_id>)\n4. If no supporting evidence is found:\n Say: \"No relevant supporting sources were found for that request.\"\n5. Never invent facts or hallucinate details.\n6. Be concise, direct, and confident. \n7. Do not reveal internal chain-of-thought.",
"content": "You are the OpenRAG Agent. You answer questions using retrieval, reasoning, and tool use.\nYou have access to several tools. Your job is to determine **which tool to use and when**.\n### Available Tools\n- OpenSearch Retrieval Tool:\n Use this to search the indexed knowledge base. Use when the user asks about product details, internal concepts, processes, architecture, documentation, roadmaps, or anything that may be stored in the index.\n- Conversation History:\n Use this to maintain continuity when the user is referring to previous turns. \n Do not treat history as a factual source.\n- Conversation File Context:\n Use this when the user asks about a document they uploaded or refers directly to its contents.\n **IMPORTANT**: If you receive confirmation that a file was uploaded (e.g., \"Confirm that you received this file\"), the file content is already available in the conversation context. Do NOT attempt to ingest it as a URL.\n Simply acknowledge the file and answer questions about it directly from the context.\n- URL Ingestion Tool:\n Use this **only** when the user explicitly asks you to read, summarize, or analyze the content of a web URL (http:// or https://).\n **Do NOT use this tool for filenames** (e.g., README.md, document.pdf, data.txt). These are file uploads, not URLs.\n Only use this tool for actual web addresses that the user explicitly provides.\n If unclear → ask a clarifying question.\n- Calculator / Expression Evaluation Tool:\n Use this when the user asks to compare numbers, compute estimates, calculate totals, analyze pricing, or answer any question requiring mathematics or quantitative reasoning.\n If the answer requires arithmetic, call the calculator tool rather than calculating internally.\n### Retrieval Decision Rules\nUse OpenSearch **whenever**:\n1. The question may be answered from internal or indexed data.\n2. The user references team names, product names, release plans, configurations, requirements, or official information.\n3. The user needs a factual, grounded answer.\nDo **not** use retrieval if:\n- The question is purely creative (e.g., storytelling, analogies) or personal preference.\n- The user simply wants text reformatted or rewritten from what is already present in the conversation.\nWhen uncertain → **Retrieve.** Retrieval is low risk and improves grounding.\n### File Upload vs URL Distinction\n**File uploads** (already in context):\n- Filenames like: README.md, document.pdf, notes.txt, data.csv\n- When you see file confirmation messages\n- Use conversation context directly - do NOT call URL tool\n**Web URLs** (need ingestion):\n- Start with http:// or https://\n- Examples: https://example.com, http://docs.site.org\n- User explicitly asks to fetch from web\n### Calculator Usage Rules\nUse the calculator when:\n- Performing arithmetic\n- Estimating totals\n- Comparing values\n- Modeling cost, time, effort, scale, or projections\nDo not perform math internally. **Call the calculator tool instead.**\n### Answer Construction Rules\n1. When asked: \"What is OpenRAG\", answer the following:\n\"OpenRAG is an open-source package for building agentic RAG systems. It supports integration with a wide range of orchestration tools, vector databases, and LLM providers. OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:\n**Langflow** Langflow is a powerful tool to build and deploy AI agents and MCP servers. [Read more](https://www.langflow.org/)\n**OpenSearch** OpenSearch is an open source, search and observability suite that brings order to unstructured data at scale. [Read more](https://opensearch.org/)\n**Docling** Docling simplifies document processing with advanced PDF understanding, OCR support, and seamless AI integrations. Parse PDFs, DOCX, PPTX, images & more. [Read more](https://www.docling.ai/)\"\n2. Synthesize retrieved or ingested content in your own words.\n3. Support factual claims with citations in the format:\n (Source: <document_name_or_id>)\n4. If no supporting evidence is found:\n Say: \"No relevant supporting sources were found for that request.\"\n5. Never invent facts or hallucinate details.\n6. Be concise, direct, and confident. \n7. Do not reveal internal chain-of-thought.",
}
],
"previous_response_id": previous_response_id, # Parent response_id for branching
@ -135,6 +135,7 @@ async def async_response_stream(
full_response = ""
chunk_count = 0
detected_tool_call = False # Track if we've detected a tool call
async for chunk in response:
chunk_count += 1
logger.debug(
@ -158,6 +159,17 @@ async def async_response_stream(
else:
delta_text = str(chunk.delta)
full_response += delta_text
# Enhanced logging for tool call detection (Granite 3.3 8b investigation)
chunk_attrs = dir(chunk) if hasattr(chunk, '__dict__') else []
tool_related_attrs = [attr for attr in chunk_attrs if 'tool' in attr.lower() or 'call' in attr.lower() or 'retrieval' in attr.lower()]
if tool_related_attrs:
logger.info(
"Tool-related attributes found in chunk",
chunk_count=chunk_count,
attributes=tool_related_attrs,
chunk_type=type(chunk).__name__
)
# Send the raw event as JSON followed by newline for easy parsing
try:
@ -169,7 +181,57 @@ async def async_response_stream(
chunk_data = chunk.__dict__
else:
chunk_data = str(chunk)
# Log detailed chunk structure for investigation (especially for Granite 3.3 8b)
if isinstance(chunk_data, dict):
# Check for any fields that might indicate tool usage
potential_tool_fields = {
k: v for k, v in chunk_data.items()
if any(keyword in str(k).lower() for keyword in ['tool', 'call', 'retrieval', 'function', 'result', 'output'])
}
if potential_tool_fields:
logger.info(
"Potential tool-related fields in chunk",
chunk_count=chunk_count,
fields=list(potential_tool_fields.keys()),
sample_data=str(potential_tool_fields)[:500]
)
# Middleware: Detect implicit tool calls and inject standardized events
# This helps Granite 3.3 8b and other models that don't emit standard markers
if isinstance(chunk_data, dict) and not detected_tool_call:
# Check if this chunk contains retrieval results
has_results = any([
'results' in chunk_data and isinstance(chunk_data.get('results'), list),
'outputs' in chunk_data and isinstance(chunk_data.get('outputs'), list),
'retrieved_documents' in chunk_data,
'retrieval_results' in chunk_data,
])
if has_results:
logger.info(
"Detected implicit tool call in backend, injecting synthetic event",
chunk_fields=list(chunk_data.keys())
)
# Inject a synthetic tool call event before this chunk
synthetic_event = {
"type": "response.output_item.done",
"item": {
"type": "retrieval_call",
"id": f"synthetic_{chunk_count}",
"name": "Retrieval",
"tool_name": "Retrieval",
"status": "completed",
"inputs": {"implicit": True, "backend_detected": True},
"results": chunk_data.get('results') or chunk_data.get('outputs') or
chunk_data.get('retrieved_documents') or
chunk_data.get('retrieval_results') or []
}
}
# Send the synthetic event first
yield (json.dumps(synthetic_event, default=str) + "\n").encode("utf-8")
detected_tool_call = True # Mark that we've injected a tool call
yield (json.dumps(chunk_data, default=str) + "\n").encode("utf-8")
except Exception as e:
# Fallback to string representation

View file

@ -59,6 +59,12 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
"DISABLE_INGEST_WITH_LANGFLOW", "false"
).lower() in ("true", "1", "yes")
# Langflow HTTP timeout configuration (in seconds)
# For large documents (300+ pages), ingestion can take 30+ minutes
# Default: 40 minutes total, 40 minutes read timeout
LANGFLOW_TIMEOUT = float(os.getenv("LANGFLOW_TIMEOUT", "2400")) # 40 minutes
LANGFLOW_CONNECT_TIMEOUT = float(os.getenv("LANGFLOW_CONNECT_TIMEOUT", "30")) # 30 seconds
def is_no_auth_mode():
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
@ -317,9 +323,22 @@ class AppClients:
# Initialize document converter
self.converter = create_document_converter(ocr_engine=DOCLING_OCR_ENGINE)
# Initialize Langflow HTTP client
# Initialize Langflow HTTP client with extended timeouts for large documents
# Use explicit timeout configuration to handle large PDF ingestion (300+ pages)
self.langflow_http_client = httpx.AsyncClient(
base_url=LANGFLOW_URL, timeout=1200.0
base_url=LANGFLOW_URL,
timeout=httpx.Timeout(
timeout=LANGFLOW_TIMEOUT, # Total timeout
connect=LANGFLOW_CONNECT_TIMEOUT, # Connection timeout
read=LANGFLOW_TIMEOUT, # Read timeout (most important for large PDFs)
write=LANGFLOW_CONNECT_TIMEOUT, # Write timeout
pool=LANGFLOW_CONNECT_TIMEOUT, # Pool timeout
)
)
logger.info(
"Initialized Langflow HTTP client with extended timeouts",
timeout_seconds=LANGFLOW_TIMEOUT,
connect_timeout_seconds=LANGFLOW_CONNECT_TIMEOUT,
)
return self

View file

@ -15,7 +15,7 @@ class OneDriveConnector(BaseConnector):
# Required BaseConnector class attributes
CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID"
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET"
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" # pragma: allowlist secret
# Connector metadata
CONNECTOR_NAME = "OneDrive"

View file

@ -16,7 +16,7 @@ class SharePointConnector(BaseConnector):
# Required BaseConnector class attributes
CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID"
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET"
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" # pragma: allowlist secret
# Connector metadata
CONNECTOR_NAME = "SharePoint"

View file

@ -197,10 +197,27 @@ class TaskProcessor:
file_hash=file_hash,
)
# Convert and extract
result = clients.converter.convert(file_path)
full_doc = result.document.export_to_dict()
slim_doc = extract_relevant(full_doc)
# Check if this is a .txt file - use simple processing instead of docling
import os
file_ext = os.path.splitext(file_path)[1].lower()
if file_ext == '.txt':
# Simple text file processing without docling
from utils.document_processing import process_text_file
logger.info(
"Processing as plain text file (bypassing docling)",
file_path=file_path,
file_hash=file_hash,
)
slim_doc = process_text_file(file_path)
# Override filename with original_filename if provided
if original_filename:
slim_doc["filename"] = original_filename
else:
# Convert and extract using docling for other file types
result = clients.converter.convert(file_path)
full_doc = result.document.export_to_dict()
slim_doc = extract_relevant(full_doc)
texts = [c["text"] for c in slim_doc["chunks"]]

View file

@ -181,6 +181,7 @@ class DocumentService:
async def process_upload_context(self, upload_file, filename: str = None):
"""Process uploaded file and return content for context"""
import io
import os
if not filename:
filename = upload_file.filename or "uploaded_document"
@ -194,22 +195,37 @@ class DocumentService:
content.write(chunk)
content.seek(0) # Reset to beginning for reading
# Create DocumentStream and process with docling
doc_stream = DocumentStream(name=filename, stream=content)
result = clients.converter.convert(doc_stream)
full_doc = result.document.export_to_dict()
slim_doc = extract_relevant(full_doc)
# Check if this is a .txt file - use simple processing
file_ext = os.path.splitext(filename)[1].lower()
if file_ext == '.txt':
# Simple text file processing for chat context
text_content = content.read().decode('utf-8', errors='replace')
# For context, we don't need to chunk - just return the full content
return {
"filename": filename,
"content": text_content,
"pages": 1, # Text files don't have pages
"content_length": len(text_content),
}
else:
# Create DocumentStream and process with docling
doc_stream = DocumentStream(name=filename, stream=content)
result = clients.converter.convert(doc_stream)
full_doc = result.document.export_to_dict()
slim_doc = extract_relevant(full_doc)
# Extract all text content
all_text = []
for chunk in slim_doc["chunks"]:
all_text.append(f"Page {chunk['page']}:\n{chunk['text']}")
# Extract all text content
all_text = []
for chunk in slim_doc["chunks"]:
all_text.append(f"Page {chunk['page']}:\n{chunk['text']}")
full_content = "\n\n".join(all_text)
full_content = "\n\n".join(all_text)
return {
"filename": filename,
"content": full_content,
"pages": len(slim_doc["chunks"]),
"content_length": len(full_content),
}
return {
"filename": filename,
"content": full_content,
"pages": len(slim_doc["chunks"]),
"content_length": len(full_content),
}

View file

@ -257,9 +257,13 @@ class TaskService:
import traceback
traceback.print_exc()
file_task.status = TaskStatus.FAILED
file_task.error = str(e)
upload_task.failed_files += 1
# Note: Processors already handle incrementing failed_files and
# setting file_task status/error, so we don't duplicate that here.
# Only update timestamp if processor didn't already set it
if file_task.status == TaskStatus.RUNNING:
file_task.status = TaskStatus.FAILED
if not file_task.error:
file_task.error = str(e)
finally:
file_task.updated_at = time.time()
upload_task.processed_files += 1

View file

@ -144,28 +144,29 @@ class EnvManager:
import os
# Map env vars to config attributes
attr_map = {
"OPENAI_API_KEY": "openai_api_key",
"ANTHROPIC_API_KEY": "anthropic_api_key",
# These are environment variable names, not actual secrets
attr_map = { # pragma: allowlist secret
"OPENAI_API_KEY": "openai_api_key", # pragma: allowlist secret
"ANTHROPIC_API_KEY": "anthropic_api_key", # pragma: allowlist secret
"OLLAMA_ENDPOINT": "ollama_endpoint",
"WATSONX_API_KEY": "watsonx_api_key",
"WATSONX_API_KEY": "watsonx_api_key", # pragma: allowlist secret
"WATSONX_ENDPOINT": "watsonx_endpoint",
"WATSONX_PROJECT_ID": "watsonx_project_id",
"OPENSEARCH_PASSWORD": "opensearch_password",
"LANGFLOW_SECRET_KEY": "langflow_secret_key",
"OPENSEARCH_PASSWORD": "opensearch_password", # pragma: allowlist secret
"LANGFLOW_SECRET_KEY": "langflow_secret_key", # pragma: allowlist secret
"LANGFLOW_SUPERUSER": "langflow_superuser",
"LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password",
"LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password", # pragma: allowlist secret
"LANGFLOW_CHAT_FLOW_ID": "langflow_chat_flow_id",
"LANGFLOW_INGEST_FLOW_ID": "langflow_ingest_flow_id",
"LANGFLOW_URL_INGEST_FLOW_ID": "langflow_url_ingest_flow_id",
"NUDGES_FLOW_ID": "nudges_flow_id",
"GOOGLE_OAUTH_CLIENT_ID": "google_oauth_client_id",
"GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret",
"GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret", # pragma: allowlist secret
"MICROSOFT_GRAPH_OAUTH_CLIENT_ID": "microsoft_graph_oauth_client_id",
"MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET": "microsoft_graph_oauth_client_secret",
"MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET": "microsoft_graph_oauth_client_secret", # pragma: allowlist secret
"WEBHOOK_BASE_URL": "webhook_base_url",
"AWS_ACCESS_KEY_ID": "aws_access_key_id",
"AWS_SECRET_ACCESS_KEY": "aws_secret_access_key",
"AWS_SECRET_ACCESS_KEY": "aws_secret_access_key", # pragma: allowlist secret
"LANGFLOW_PUBLIC_URL": "langflow_public_url",
"OPENRAG_DOCUMENTS_PATHS": "openrag_documents_paths",
"OPENSEARCH_DATA_PATH": "opensearch_data_path",

View file

@ -119,6 +119,82 @@ def get_worker_converter():
return _worker_converter
def process_text_file(file_path: str) -> dict:
"""
Process a plain text file without using docling.
Returns the same structure as extract_relevant() for consistency.
Args:
file_path: Path to the .txt file
Returns:
dict with keys: id, filename, mimetype, chunks
"""
import os
from utils.hash_utils import hash_id
# Read the file
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
# Compute hash
file_hash = hash_id(file_path)
filename = os.path.basename(file_path)
# Split content into chunks of ~1000 characters to match typical docling chunk sizes
# This ensures embeddings stay within reasonable token limits
chunk_size = 1000
chunks = []
# Split by paragraphs first (double newline)
paragraphs = content.split('\n\n')
current_chunk = ""
chunk_index = 0
for para in paragraphs:
para = para.strip()
if not para:
continue
# If adding this paragraph would exceed chunk size, save current chunk
if len(current_chunk) + len(para) + 2 > chunk_size and current_chunk:
chunks.append({
"page": chunk_index + 1, # Use chunk_index + 1 as "page" number
"type": "text",
"text": current_chunk.strip()
})
chunk_index += 1
current_chunk = para
else:
if current_chunk:
current_chunk += "\n\n" + para
else:
current_chunk = para
# Add the last chunk if any
if current_chunk.strip():
chunks.append({
"page": chunk_index + 1,
"type": "text",
"text": current_chunk.strip()
})
# If no chunks were created (empty file), create a single empty chunk
if not chunks:
chunks.append({
"page": 1,
"type": "text",
"text": ""
})
return {
"id": file_hash,
"filename": filename,
"mimetype": "text/plain",
"chunks": chunks,
}
def extract_relevant(doc_dict: dict) -> dict:
"""
Given the full export_to_dict() result: