Merge branch 'main' into feat-add-owner-info-env

This commit is contained in:
Edwin Jose 2025-09-19 13:45:12 -04:00 committed by GitHub
commit 5d43bb1abb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 144 additions and 16 deletions

View file

@ -7,7 +7,7 @@ ENV RUSTFLAGS="--cfg reqwest_unstable"
# Accept build arguments for git repository and branch
ARG GIT_REPO=https://github.com/langflow-ai/langflow.git
ARG GIT_BRANCH=load_flows_autologin_false
ARG GIT_BRANCH=main
WORKDIR /app

View file

@ -38,7 +38,7 @@ export const KnowledgeActionsDropdown = ({
return (
<>
<DropdownMenu>
<DropdownMenuTrigger>
<DropdownMenuTrigger asChild>
<Button variant="ghost" className="hover:bg-transparent">
<EllipsisVertical className="h-4 w-4" />
</Button>

View file

@ -11,6 +11,7 @@ import {
} from "lucide-react";
import { useRouter } from "next/navigation";
import { useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { Button } from "@/components/ui/button";
import {
Dialog,
@ -313,6 +314,11 @@ export function KnowledgeDropdown({
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
} else {
console.error("Folder upload failed:", result.error);
if (response.status === 400) {
toast.error("Upload failed", {
description: result.error || "Bad request",
});
}
}
} catch (error) {
console.error("Folder upload error:", error);
@ -353,6 +359,11 @@ export function KnowledgeDropdown({
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
} else {
console.error("S3 upload failed:", result.error);
if (response.status === 400) {
toast.error("Upload failed", {
description: result.error || "Bad request",
});
}
}
} catch (error) {
console.error("S3 upload error:", error);

View file

@ -128,7 +128,7 @@ export const useGetSearchQuery = (
}
>();
data.results.forEach((chunk: ChunkResult) => {
(data.results || []).forEach((chunk: ChunkResult) => {
const existing = fileMap.get(chunk.filename);
if (existing) {
existing.chunks.push(chunk);

View file

@ -60,7 +60,9 @@ export function TaskNotificationMenu() {
const processed = task.processed_files || 0
const successful = task.successful_files || 0
const failed = task.failed_files || 0
const running = task.running_files || 0
const pending = task.pending_files || 0
if (total > 0) {
return {
basic: `${processed}/${total} files`,
@ -69,6 +71,8 @@ export function TaskNotificationMenu() {
processed,
successful,
failed,
running,
pending,
remaining: total - processed
}
}
@ -196,10 +200,16 @@ export function TaskNotificationMenu() {
{formatTaskProgress(task)?.detailed.failed} failed
</span>
</div>
<div className="flex items-center gap-1">
<div className="w-2 h-2 bg-blue-500 rounded-full"></div>
<span className="text-blue-600">
{formatTaskProgress(task)?.detailed.running} running
</span>
</div>
<div className="flex items-center gap-1">
<div className="w-2 h-2 bg-yellow-500 rounded-full"></div>
<span className="text-yellow-600">
{formatTaskProgress(task)?.detailed.remaining} pending
{formatTaskProgress(task)?.detailed.pending} pending
</span>
</div>
</div>
@ -288,6 +298,9 @@ export function TaskNotificationMenu() {
<div className="text-xs text-muted-foreground mt-1">
{formatTaskProgress(task)?.detailed.successful} success, {' '}
{formatTaskProgress(task)?.detailed.failed} failed
{(formatTaskProgress(task)?.detailed.running || 0) > 0 && (
<span>, {formatTaskProgress(task)?.detailed.running} running</span>
)}
</div>
)}
{task.status === 'failed' && task.error && (

View file

@ -25,6 +25,8 @@ export interface Task {
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;

View file

@ -1,6 +1,6 @@
[project]
name = "openrag"
version = "0.1.8"
version = "0.1.12"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"

View file

@ -7,6 +7,8 @@ openrag_user_role:
cluster_permissions:
- "indices:data/write/bulk"
- "indices:data/write/index"
- "indices:data/read/scroll"
- "indices:data/read/scroll/clear"
- "cluster:admin/opensearch/notifications/configs/create"
- "cluster:admin/opensearch/notifications/configs/list"
- "cluster:admin/opensearch/notifications/configs/get"

View file

@ -15,8 +15,8 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
return JSONResponse({"error": "filename is required"}, status_code=400)
user = request.state.user
jwt_token = request.state.jwt_token
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
try:
# Get user's OpenSearch client
opensearch_client = session_manager.get_user_opensearch_client(

View file

@ -4,12 +4,12 @@ import time
import httpx
import requests
from agentd.patch import patch_openai_with_mcp
from docling.document_converter import DocumentConverter
from dotenv import load_dotenv
from openai import AsyncOpenAI
from opensearchpy import AsyncOpenSearch
from opensearchpy._async.http_aiohttp import AIOHttpConnection
from utils.document_processing import create_document_converter
from utils.logging_config import get_logger
load_dotenv()
@ -45,6 +45,7 @@ LANGFLOW_KEY = os.getenv("LANGFLOW_KEY")
SESSION_SECRET = os.getenv("SESSION_SECRET", "your-secret-key-change-in-production")
GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
DOCLING_OCR_ENGINE = os.getenv("DOCLING_OCR_ENGINE")
# Ingestion configuration
DISABLE_INGEST_WITH_LANGFLOW = os.getenv("DISABLE_INGEST_WITH_LANGFLOW", "false").lower() in ("true", "1", "yes")
@ -287,7 +288,7 @@ class AppClients:
self.patched_async_client = patch_openai_with_mcp(AsyncOpenAI())
# Initialize document converter
self.converter = DocumentConverter()
self.converter = create_document_converter(ocr_engine=DOCLING_OCR_ENGINE)
# Initialize Langflow HTTP client
self.langflow_http_client = httpx.AsyncClient(

View file

@ -220,6 +220,9 @@ class TaskService:
return None
file_statuses = {}
running_files_count = 0
pending_files_count = 0
for file_path, file_task in upload_task.file_tasks.items():
file_statuses[file_path] = {
"status": file_task.status.value,
@ -231,6 +234,12 @@ class TaskService:
"duration_seconds": file_task.duration_seconds,
}
# Count running and pending files
if file_task.status.value == "running":
running_files_count += 1
elif file_task.status.value == "pending":
pending_files_count += 1
return {
"task_id": upload_task.task_id,
"status": upload_task.status.value,
@ -238,6 +247,8 @@ class TaskService:
"processed_files": upload_task.processed_files,
"successful_files": upload_task.successful_files,
"failed_files": upload_task.failed_files,
"running_files": running_files_count,
"pending_files": pending_files_count,
"created_at": upload_task.created_at,
"updated_at": upload_task.updated_at,
"duration_seconds": upload_task.duration_seconds,
@ -259,6 +270,16 @@ class TaskService:
for task_id, upload_task in self.task_store[store_user_id].items():
if task_id in tasks_by_id:
continue
# Calculate running and pending counts
running_files_count = 0
pending_files_count = 0
for file_task in upload_task.file_tasks.values():
if file_task.status.value == "running":
running_files_count += 1
elif file_task.status.value == "pending":
pending_files_count += 1
tasks_by_id[task_id] = {
"task_id": upload_task.task_id,
"status": upload_task.status.value,
@ -266,6 +287,8 @@ class TaskService:
"processed_files": upload_task.processed_files,
"successful_files": upload_task.successful_files,
"failed_files": upload_task.failed_files,
"running_files": running_files_count,
"pending_files": pending_files_count,
"created_at": upload_task.created_at,
"updated_at": upload_task.updated_at,
"duration_seconds": upload_task.duration_seconds,

View file

@ -12,12 +12,80 @@ logger = get_logger(__name__)
_worker_converter = None
def create_document_converter(ocr_engine: str | None = None):
"""Create a Docling DocumentConverter with OCR disabled unless requested."""
if ocr_engine is None:
ocr_engine = os.getenv("DOCLING_OCR_ENGINE")
try:
from docling.document_converter import (
DocumentConverter,
InputFormat,
PdfFormatOption,
)
from docling.datamodel.pipeline_options import PdfPipelineOptions
except Exception as exc: # pragma: no cover - fallback path
logger.debug(
"Falling back to default DocumentConverter import",
error=str(exc),
)
from docling.document_converter import DocumentConverter # type: ignore
return DocumentConverter()
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
if ocr_engine:
try:
from docling.models.factories import get_ocr_factory
factory = get_ocr_factory(allow_external_plugins=False)
pipeline_options.do_ocr = True
pipeline_options.ocr_options = factory.create_options(kind=ocr_engine)
except Exception as exc: # pragma: no cover - optional path
pipeline_options.do_ocr = False
logger.warning(
"Unable to enable requested Docling OCR engine, using OCR-off",
ocr_engine=ocr_engine,
error=str(exc),
)
format_options = {}
if hasattr(InputFormat, "PDF"):
format_options[getattr(InputFormat, "PDF")] = PdfFormatOption(
pipeline_options=pipeline_options
)
if hasattr(InputFormat, "IMAGE"):
format_options[getattr(InputFormat, "IMAGE")] = PdfFormatOption(
pipeline_options=pipeline_options
)
try:
converter = DocumentConverter(
format_options=format_options if format_options else None
)
except Exception as exc: # pragma: no cover - fallback path
logger.warning(
"Docling converter initialization failed, falling back to defaults",
error=str(exc),
)
converter = DocumentConverter()
logger.info(
"Docling converter initialized",
ocr_engine=ocr_engine if pipeline_options.do_ocr else None,
ocr_enabled=pipeline_options.do_ocr,
)
return converter
def get_worker_converter():
"""Get or create a DocumentConverter instance for this worker process"""
global _worker_converter
if _worker_converter is None:
from docling.document_converter import DocumentConverter
# Configure GPU settings for this worker
has_gpu_devices, _ = detect_gpu_devices()
if not has_gpu_devices:
@ -45,7 +113,7 @@ def get_worker_converter():
logger.info(
"Initializing DocumentConverter in worker process", worker_pid=os.getpid()
)
_worker_converter = DocumentConverter()
_worker_converter = create_document_converter()
logger.info("DocumentConverter ready in worker process", worker_pid=os.getpid())
return _worker_converter

2
uv.lock generated
View file

@ -2282,7 +2282,7 @@ wheels = [
[[package]]
name = "openrag"
version = "0.1.8"
version = "0.1.11"
source = { editable = "." }
dependencies = [
{ name = "agentd" },

View file

@ -1,6 +1,13 @@
import logging
import os
import sys
from docling.document_converter import DocumentConverter
repo_root = os.path.dirname(__file__)
src_path = os.path.join(repo_root, "src")
if src_path not in sys.path:
sys.path.insert(0, src_path)
from utils.document_processing import create_document_converter
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -11,7 +18,8 @@ try:
# Use the sample document to warm up docling
test_file = "/app/warmup_ocr.pdf"
logger.info(f"Using test file to warm up docling: {test_file}")
DocumentConverter().convert(test_file)
converter = create_document_converter()
converter.convert(test_file)
logger.info("Docling models warmed up successfully")
except Exception as e:
logger.info(f"Docling warm-up completed with exception: {str(e)}")