Merge branch 'main' of github.com:langflow-ai/openrag into ingest-settings
This commit is contained in:
commit
05cd115162
26 changed files with 3724 additions and 222 deletions
|
|
@ -1,2 +1,49 @@
|
||||||
|
# Environment files
|
||||||
.env
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.development
|
||||||
|
.env.production
|
||||||
|
|
||||||
|
# Auth files
|
||||||
.drive.json
|
.drive.json
|
||||||
|
*.json
|
||||||
|
|
||||||
|
# Dependencies
|
||||||
|
node_modules/
|
||||||
|
*/node_modules/
|
||||||
|
**/node_modules/
|
||||||
|
|
||||||
|
# Python cache
|
||||||
|
__pycache__/
|
||||||
|
*/__pycache__/
|
||||||
|
**/__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.pyd
|
||||||
|
.Python
|
||||||
|
|
||||||
|
# Build outputs
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
.next/
|
||||||
|
out/
|
||||||
|
|
||||||
|
# Development files
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
README.md
|
||||||
|
*.md
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# OS files
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
tmp/
|
||||||
|
temp/
|
||||||
|
|
|
||||||
15
.env.example
15
.env.example
|
|
@ -1,15 +1,24 @@
|
||||||
# flow id from the the openrag flow json
|
# make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key
|
||||||
FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
|
LANGFLOW_SECRET_KEY=
|
||||||
|
|
||||||
|
# flow ids for chat and ingestion flows
|
||||||
|
LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
|
||||||
|
LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
|
||||||
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
|
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
|
||||||
|
|
||||||
# Set a strong admin password for OpenSearch; a bcrypt hash is generated at
|
# Set a strong admin password for OpenSearch; a bcrypt hash is generated at
|
||||||
# container startup from this value. Do not commit real secrets.
|
# container startup from this value. Do not commit real secrets.
|
||||||
|
# must match the hashed password in secureconfig, must change for secure deployment!!!
|
||||||
OPENSEARCH_PASSWORD=
|
OPENSEARCH_PASSWORD=
|
||||||
# make here https://console.cloud.google.com/apis/credentials
|
|
||||||
|
# make here https://console.cloud.google.com/apis/credentials
|
||||||
GOOGLE_OAUTH_CLIENT_ID=
|
GOOGLE_OAUTH_CLIENT_ID=
|
||||||
GOOGLE_OAUTH_CLIENT_SECRET=
|
GOOGLE_OAUTH_CLIENT_SECRET=
|
||||||
|
|
||||||
# Azure app registration credentials for SharePoint/OneDrive
|
# Azure app registration credentials for SharePoint/OneDrive
|
||||||
MICROSOFT_GRAPH_OAUTH_CLIENT_ID=
|
MICROSOFT_GRAPH_OAUTH_CLIENT_ID=
|
||||||
MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=
|
MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=
|
||||||
|
|
||||||
# OPTIONAL: dns routable from google (etc.) to handle continous ingest (something like ngrok works). This enables continous ingestion
|
# OPTIONAL: dns routable from google (etc.) to handle continous ingest (something like ngrok works). This enables continous ingestion
|
||||||
WEBHOOK_BASE_URL=
|
WEBHOOK_BASE_URL=
|
||||||
|
|
||||||
|
|
|
||||||
210
Makefile
Normal file
210
Makefile
Normal file
|
|
@ -0,0 +1,210 @@
|
||||||
|
# OpenRAG Development Makefile
|
||||||
|
# Provides easy commands for development workflow
|
||||||
|
|
||||||
|
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
|
||||||
|
|
||||||
|
# Default target
|
||||||
|
help:
|
||||||
|
@echo "OpenRAG Development Commands"
|
||||||
|
@echo ""
|
||||||
|
@echo "Development:"
|
||||||
|
@echo " dev - Start full stack with GPU support (docker compose)"
|
||||||
|
@echo " dev-cpu - Start full stack with CPU only (docker compose)"
|
||||||
|
@echo " dev-local - Start infrastructure only, run backend/frontend locally"
|
||||||
|
@echo " infra - Start infrastructure services only (alias for dev-local)"
|
||||||
|
@echo " stop - Stop all containers"
|
||||||
|
@echo " restart - Restart all containers"
|
||||||
|
@echo ""
|
||||||
|
@echo "Local Development:"
|
||||||
|
@echo " backend - Run backend locally (requires infrastructure)"
|
||||||
|
@echo " frontend - Run frontend locally"
|
||||||
|
@echo " install - Install all dependencies"
|
||||||
|
@echo " install-be - Install backend dependencies (uv)"
|
||||||
|
@echo " install-fe - Install frontend dependencies (npm)"
|
||||||
|
@echo ""
|
||||||
|
@echo "Utilities:"
|
||||||
|
@echo " build - Build all Docker images"
|
||||||
|
@echo " clean - Stop containers and remove volumes"
|
||||||
|
@echo " logs - Show logs from all containers"
|
||||||
|
@echo " logs-be - Show backend container logs"
|
||||||
|
@echo " logs-lf - Show langflow container logs"
|
||||||
|
@echo " shell-be - Shell into backend container"
|
||||||
|
@echo " shell-lf - Shell into langflow container"
|
||||||
|
@echo ""
|
||||||
|
@echo "Testing:"
|
||||||
|
@echo " test - Run backend tests"
|
||||||
|
@echo " lint - Run linting checks"
|
||||||
|
@echo ""
|
||||||
|
|
||||||
|
# Development environments
|
||||||
|
dev:
|
||||||
|
@echo "🚀 Starting OpenRAG with GPU support..."
|
||||||
|
docker-compose up -d
|
||||||
|
@echo "✅ Services started!"
|
||||||
|
@echo " Backend: http://localhost:8000"
|
||||||
|
@echo " Frontend: http://localhost:3000"
|
||||||
|
@echo " Langflow: http://localhost:7860"
|
||||||
|
@echo " OpenSearch: http://localhost:9200"
|
||||||
|
@echo " Dashboards: http://localhost:5601"
|
||||||
|
|
||||||
|
dev-cpu:
|
||||||
|
@echo "🚀 Starting OpenRAG with CPU only..."
|
||||||
|
docker-compose -f docker-compose-cpu.yml up -d
|
||||||
|
@echo "✅ Services started!"
|
||||||
|
@echo " Backend: http://localhost:8000"
|
||||||
|
@echo " Frontend: http://localhost:3000"
|
||||||
|
@echo " Langflow: http://localhost:7860"
|
||||||
|
@echo " OpenSearch: http://localhost:9200"
|
||||||
|
@echo " Dashboards: http://localhost:5601"
|
||||||
|
|
||||||
|
dev-local:
|
||||||
|
@echo "🔧 Starting infrastructure only (for local development)..."
|
||||||
|
docker-compose up -d opensearch dashboards langflow
|
||||||
|
@echo "✅ Infrastructure started!"
|
||||||
|
@echo " Langflow: http://localhost:7860"
|
||||||
|
@echo " OpenSearch: http://localhost:9200"
|
||||||
|
@echo " Dashboards: http://localhost:5601"
|
||||||
|
@echo ""
|
||||||
|
@echo "Now run 'make backend' and 'make frontend' in separate terminals"
|
||||||
|
|
||||||
|
infra:
|
||||||
|
@echo "🔧 Starting infrastructure services only..."
|
||||||
|
docker-compose up -d opensearch dashboards langflow
|
||||||
|
@echo "✅ Infrastructure services started!"
|
||||||
|
@echo " Langflow: http://localhost:7860"
|
||||||
|
@echo " OpenSearch: http://localhost:9200"
|
||||||
|
@echo " Dashboards: http://localhost:5601"
|
||||||
|
|
||||||
|
# Container management
|
||||||
|
stop:
|
||||||
|
@echo "🛑 Stopping all containers..."
|
||||||
|
docker-compose down
|
||||||
|
docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
|
||||||
|
|
||||||
|
restart: stop dev
|
||||||
|
|
||||||
|
clean: stop
|
||||||
|
@echo "🧹 Cleaning up containers and volumes..."
|
||||||
|
docker-compose down -v --remove-orphans
|
||||||
|
docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
|
# Local development
|
||||||
|
backend:
|
||||||
|
@echo "🐍 Starting backend locally..."
|
||||||
|
@if [ ! -f .env ]; then echo "⚠️ .env file not found. Copy .env.example to .env first"; exit 1; fi
|
||||||
|
uv run python src/main.py
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
@echo "⚛️ Starting frontend locally..."
|
||||||
|
@if [ ! -d "frontend/node_modules" ]; then echo "📦 Installing frontend dependencies first..."; cd frontend && npm install; fi
|
||||||
|
cd frontend && npx next dev
|
||||||
|
|
||||||
|
# Installation
|
||||||
|
install: install-be install-fe
|
||||||
|
@echo "✅ All dependencies installed!"
|
||||||
|
|
||||||
|
install-be:
|
||||||
|
@echo "📦 Installing backend dependencies..."
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
install-fe:
|
||||||
|
@echo "📦 Installing frontend dependencies..."
|
||||||
|
cd frontend && npm install
|
||||||
|
|
||||||
|
# Building
|
||||||
|
build:
|
||||||
|
@echo "🔨 Building Docker images..."
|
||||||
|
docker-compose build
|
||||||
|
|
||||||
|
build-be:
|
||||||
|
@echo "🔨 Building backend image..."
|
||||||
|
docker build -t openrag-backend -f Dockerfile.backend .
|
||||||
|
|
||||||
|
build-fe:
|
||||||
|
@echo "🔨 Building frontend image..."
|
||||||
|
docker build -t openrag-frontend -f Dockerfile.frontend .
|
||||||
|
|
||||||
|
# Logging and debugging
|
||||||
|
logs:
|
||||||
|
@echo "📋 Showing all container logs..."
|
||||||
|
docker-compose logs -f
|
||||||
|
|
||||||
|
logs-be:
|
||||||
|
@echo "📋 Showing backend logs..."
|
||||||
|
docker-compose logs -f openrag-backend
|
||||||
|
|
||||||
|
logs-fe:
|
||||||
|
@echo "📋 Showing frontend logs..."
|
||||||
|
docker-compose logs -f openrag-frontend
|
||||||
|
|
||||||
|
logs-lf:
|
||||||
|
@echo "📋 Showing langflow logs..."
|
||||||
|
docker-compose logs -f langflow
|
||||||
|
|
||||||
|
logs-os:
|
||||||
|
@echo "📋 Showing opensearch logs..."
|
||||||
|
docker-compose logs -f opensearch
|
||||||
|
|
||||||
|
# Shell access
|
||||||
|
shell-be:
|
||||||
|
@echo "🐚 Opening shell in backend container..."
|
||||||
|
docker-compose exec openrag-backend /bin/bash
|
||||||
|
|
||||||
|
shell-lf:
|
||||||
|
@echo "🐚 Opening shell in langflow container..."
|
||||||
|
docker-compose exec langflow /bin/bash
|
||||||
|
|
||||||
|
shell-os:
|
||||||
|
@echo "🐚 Opening shell in opensearch container..."
|
||||||
|
docker-compose exec opensearch /bin/bash
|
||||||
|
|
||||||
|
# Testing and quality
|
||||||
|
test:
|
||||||
|
@echo "🧪 Running backend tests..."
|
||||||
|
uv run pytest
|
||||||
|
|
||||||
|
lint:
|
||||||
|
@echo "🔍 Running linting checks..."
|
||||||
|
cd frontend && npm run lint
|
||||||
|
@echo "Frontend linting complete"
|
||||||
|
|
||||||
|
# Service status
|
||||||
|
status:
|
||||||
|
@echo "📊 Container status:"
|
||||||
|
@docker-compose ps 2>/dev/null || echo "No containers running"
|
||||||
|
|
||||||
|
health:
|
||||||
|
@echo "🏥 Health check:"
|
||||||
|
@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
|
||||||
|
@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
|
||||||
|
@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
|
||||||
|
|
||||||
|
# Database operations
|
||||||
|
db-reset:
|
||||||
|
@echo "🗄️ Resetting OpenSearch indices..."
|
||||||
|
curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
|
||||||
|
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
|
||||||
|
@echo "Indices reset. Restart backend to recreate."
|
||||||
|
|
||||||
|
# Flow management
|
||||||
|
flow-upload:
|
||||||
|
@echo "📁 Uploading flow to Langflow..."
|
||||||
|
@if [ -z "$(FLOW_FILE)" ]; then echo "Usage: make flow-upload FLOW_FILE=path/to/flow.json"; exit 1; fi
|
||||||
|
curl -X POST "http://localhost:7860/api/v1/flows" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d @$(FLOW_FILE)
|
||||||
|
|
||||||
|
# Quick development shortcuts
|
||||||
|
quick: dev-local
|
||||||
|
@echo "🚀 Quick start: infrastructure running!"
|
||||||
|
@echo "Run these in separate terminals:"
|
||||||
|
@echo " make backend"
|
||||||
|
@echo " make frontend"
|
||||||
|
|
||||||
|
# Environment setup
|
||||||
|
setup:
|
||||||
|
@echo "⚙️ Setting up development environment..."
|
||||||
|
@if [ ! -f .env ]; then cp .env.example .env && echo "📝 Created .env from template"; fi
|
||||||
|
@$(MAKE) install
|
||||||
|
@echo "✅ Setup complete! Run 'make dev' to start."
|
||||||
|
|
@ -15,10 +15,10 @@ services:
|
||||||
bash -c "
|
bash -c "
|
||||||
# Start OpenSearch in background
|
# Start OpenSearch in background
|
||||||
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
||||||
|
|
||||||
# Wait a bit for OpenSearch to start, then apply security config
|
# Wait a bit for OpenSearch to start, then apply security config
|
||||||
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
||||||
|
|
||||||
# Wait for background processes
|
# Wait for background processes
|
||||||
wait
|
wait
|
||||||
"
|
"
|
||||||
|
|
@ -53,7 +53,8 @@ services:
|
||||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
||||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
||||||
- FLOW_ID=${FLOW_ID}
|
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
|
||||||
|
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
|
||||||
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
|
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
|
||||||
- OPENSEARCH_PORT=9200
|
- OPENSEARCH_PORT=9200
|
||||||
- OPENSEARCH_USERNAME=admin
|
- OPENSEARCH_USERNAME=admin
|
||||||
|
|
@ -98,7 +99,8 @@ services:
|
||||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
||||||
- JWT="dummy"
|
- JWT="dummy"
|
||||||
- OPENRAG-QUERY-FILTER="{}"
|
- OPENRAG-QUERY-FILTER="{}"
|
||||||
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER
|
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
||||||
|
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD
|
||||||
- LANGFLOW_LOG_LEVEL=DEBUG
|
- LANGFLOW_LOG_LEVEL=DEBUG
|
||||||
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,10 @@ services:
|
||||||
bash -c "
|
bash -c "
|
||||||
# Start OpenSearch in background
|
# Start OpenSearch in background
|
||||||
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
||||||
|
|
||||||
# Wait a bit for OpenSearch to start, then apply security config
|
# Wait a bit for OpenSearch to start, then apply security config
|
||||||
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
||||||
|
|
||||||
# Wait for background processes
|
# Wait for background processes
|
||||||
wait
|
wait
|
||||||
"
|
"
|
||||||
|
|
@ -52,7 +52,8 @@ services:
|
||||||
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
|
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
||||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
||||||
- FLOW_ID=${FLOW_ID}
|
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
|
||||||
|
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
|
||||||
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
|
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
|
||||||
- OPENSEARCH_PORT=9200
|
- OPENSEARCH_PORT=9200
|
||||||
- OPENSEARCH_USERNAME=admin
|
- OPENSEARCH_USERNAME=admin
|
||||||
|
|
@ -98,7 +99,8 @@ services:
|
||||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
||||||
- JWT="dummy"
|
- JWT="dummy"
|
||||||
- OPENRAG-QUERY-FILTER="{}"
|
- OPENRAG-QUERY-FILTER="{}"
|
||||||
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER
|
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
||||||
|
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD
|
||||||
- LANGFLOW_LOG_LEVEL=DEBUG
|
- LANGFLOW_LOG_LEVEL=DEBUG
|
||||||
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
||||||
|
|
|
||||||
2032
flows/ingestion_flow.json
Normal file
2032
flows/ingestion_flow.json
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -133,24 +133,50 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD
|
||||||
const formData = new FormData()
|
const formData = new FormData()
|
||||||
formData.append('file', files[0])
|
formData.append('file', files[0])
|
||||||
|
|
||||||
const response = await fetch('/api/upload', {
|
// 1) Upload to Langflow
|
||||||
|
const upRes = await fetch('/api/langflow/files/upload', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
body: formData,
|
body: formData,
|
||||||
})
|
})
|
||||||
|
const upJson = await upRes.json()
|
||||||
const result = await response.json()
|
if (!upRes.ok) {
|
||||||
|
throw new Error(upJson?.error || 'Upload to Langflow failed')
|
||||||
if (response.ok) {
|
|
||||||
window.dispatchEvent(new CustomEvent('fileUploaded', {
|
|
||||||
detail: { file: files[0], result }
|
|
||||||
}))
|
|
||||||
// Trigger search refresh after successful upload
|
|
||||||
window.dispatchEvent(new CustomEvent('knowledgeUpdated'))
|
|
||||||
} else {
|
|
||||||
window.dispatchEvent(new CustomEvent('fileUploadError', {
|
|
||||||
detail: { filename: files[0].name, error: result.error || 'Upload failed' }
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fileId = upJson?.id
|
||||||
|
const filePath = upJson?.path
|
||||||
|
if (!fileId || !filePath) {
|
||||||
|
throw new Error('Langflow did not return file id/path')
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) Run ingestion flow
|
||||||
|
const runRes = await fetch('/api/langflow/ingest', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ file_paths: [filePath] }),
|
||||||
|
})
|
||||||
|
const runJson = await runRes.json()
|
||||||
|
if (!runRes.ok) {
|
||||||
|
throw new Error(runJson?.error || 'Langflow ingestion failed')
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) Delete file from Langflow
|
||||||
|
const delRes = await fetch('/api/langflow/files', {
|
||||||
|
method: 'DELETE',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ file_ids: [fileId] }),
|
||||||
|
})
|
||||||
|
const delJson = await delRes.json().catch(() => ({}))
|
||||||
|
if (!delRes.ok) {
|
||||||
|
throw new Error(delJson?.error || 'Langflow file delete failed')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Notify UI
|
||||||
|
window.dispatchEvent(new CustomEvent('fileUploaded', {
|
||||||
|
detail: { file: files[0], result: { file_id: fileId, file_path: filePath, run: runJson } }
|
||||||
|
}))
|
||||||
|
// Trigger search refresh after successful ingestion
|
||||||
|
window.dispatchEvent(new CustomEvent('knowledgeUpdated'))
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
window.dispatchEvent(new CustomEvent('fileUploadError', {
|
window.dispatchEvent(new CustomEvent('fileUploadError', {
|
||||||
detail: { filename: files[0].name, error: error instanceof Error ? error.message : 'Upload failed' }
|
detail: { filename: files[0].name, error: error instanceof Error ? error.message : 'Upload failed' }
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ function AdminPage() {
|
||||||
})
|
})
|
||||||
|
|
||||||
const result = await response.json()
|
const result = await response.json()
|
||||||
|
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
setUploadStatus(`File uploaded successfully! ID: ${result.id}`)
|
setUploadStatus(`File uploaded successfully! ID: ${result.id}`)
|
||||||
setSelectedFile(null)
|
setSelectedFile(null)
|
||||||
|
|
@ -132,23 +132,23 @@ function AdminPage() {
|
||||||
})
|
})
|
||||||
|
|
||||||
const result = await response.json()
|
const result = await response.json()
|
||||||
|
|
||||||
if (response.status === 201) {
|
if (response.status === 201) {
|
||||||
// New flow: Got task ID, use centralized tracking
|
// New flow: Got task ID, use centralized tracking
|
||||||
const taskId = result.task_id || result.id
|
const taskId = result.task_id || result.id
|
||||||
const totalFiles = result.total_files || 0
|
const totalFiles = result.total_files || 0
|
||||||
|
|
||||||
if (!taskId) {
|
if (!taskId) {
|
||||||
throw new Error("No task ID received from server")
|
throw new Error("No task ID received from server")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add task to centralized tracking
|
// Add task to centralized tracking
|
||||||
addTask(taskId)
|
addTask(taskId)
|
||||||
|
|
||||||
setUploadStatus(`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`)
|
setUploadStatus(`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`)
|
||||||
setFolderPath("")
|
setFolderPath("")
|
||||||
setPathUploadLoading(false)
|
setPathUploadLoading(false)
|
||||||
|
|
||||||
} else if (response.ok) {
|
} else if (response.ok) {
|
||||||
// Original flow: Direct response with results
|
// Original flow: Direct response with results
|
||||||
const successful = result.results?.filter((r: {status: string}) => r.status === "indexed").length || 0
|
const successful = result.results?.filter((r: {status: string}) => r.status === "indexed").length || 0
|
||||||
|
|
|
||||||
|
|
@ -52,11 +52,11 @@ interface Connector {
|
||||||
}
|
}
|
||||||
|
|
||||||
interface SyncResult {
|
interface SyncResult {
|
||||||
processed?: number;
|
processed?: number;
|
||||||
added?: number;
|
added?: number;
|
||||||
errors?: number;
|
errors?: number;
|
||||||
skipped?: number;
|
skipped?: number;
|
||||||
total?: number;
|
total?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Connection {
|
interface Connection {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "openrag"
|
name = "openrag"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ async def connector_sync(request: Request, connector_service, session_manager):
|
||||||
max_files,
|
max_files,
|
||||||
jwt_token=jwt_token,
|
jwt_token=jwt_token,
|
||||||
)
|
)
|
||||||
|
task_ids.append(task_id)
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{
|
||||||
"task_ids": task_ids,
|
"task_ids": task_ids,
|
||||||
|
|
|
||||||
159
src/api/langflow_files.py
Normal file
159
src/api/langflow_files.py
Normal file
|
|
@ -0,0 +1,159 @@
|
||||||
|
from starlette.requests import Request
|
||||||
|
from starlette.responses import JSONResponse
|
||||||
|
|
||||||
|
from services.langflow_file_service import LangflowFileService
|
||||||
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def upload_user_file(
|
||||||
|
request: Request, langflow_file_service: LangflowFileService, session_manager
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
logger.debug("upload_user_file endpoint called")
|
||||||
|
form = await request.form()
|
||||||
|
upload_file = form.get("file")
|
||||||
|
if upload_file is None:
|
||||||
|
logger.error("No file provided in upload request")
|
||||||
|
return JSONResponse({"error": "Missing file"}, status_code=400)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Processing file", filename=upload_file.filename, size=upload_file.size
|
||||||
|
)
|
||||||
|
|
||||||
|
# starlette UploadFile provides file-like; httpx needs (filename, file, content_type)
|
||||||
|
content = await upload_file.read()
|
||||||
|
file_tuple = (
|
||||||
|
upload_file.filename,
|
||||||
|
content,
|
||||||
|
upload_file.content_type or "application/octet-stream",
|
||||||
|
)
|
||||||
|
|
||||||
|
jwt_token = getattr(request.state, "jwt_token", None)
|
||||||
|
logger.debug("JWT token status", jwt_present=jwt_token is not None)
|
||||||
|
|
||||||
|
logger.debug("Calling langflow_file_service.upload_user_file")
|
||||||
|
result = await langflow_file_service.upload_user_file(file_tuple, jwt_token)
|
||||||
|
logger.debug("Upload successful", result=result)
|
||||||
|
return JSONResponse(result, status_code=201)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"upload_user_file endpoint failed",
|
||||||
|
error_type=type(e).__name__,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
logger.error("Full traceback", traceback=traceback.format_exc())
|
||||||
|
return JSONResponse({"error": str(e)}, status_code=500)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_ingestion(
|
||||||
|
request: Request, langflow_file_service: LangflowFileService, session_manager
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
payload = await request.json()
|
||||||
|
file_ids = payload.get("file_ids")
|
||||||
|
file_paths = payload.get("file_paths") or []
|
||||||
|
session_id = payload.get("session_id")
|
||||||
|
tweaks = payload.get("tweaks") or {}
|
||||||
|
settings = payload.get("settings", {})
|
||||||
|
|
||||||
|
# We assume file_paths is provided. If only file_ids are provided, client would need to resolve to paths via Files API (not implemented here).
|
||||||
|
if not file_paths and not file_ids:
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": "Provide file_paths or file_ids"}, status_code=400
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert UI settings to component tweaks using exact component IDs
|
||||||
|
if settings:
|
||||||
|
logger.debug("Applying ingestion settings", settings=settings)
|
||||||
|
|
||||||
|
# Split Text component tweaks (SplitText-QIKhg)
|
||||||
|
if (
|
||||||
|
settings.get("chunkSize")
|
||||||
|
or settings.get("chunkOverlap")
|
||||||
|
or settings.get("separator")
|
||||||
|
):
|
||||||
|
if "SplitText-QIKhg" not in tweaks:
|
||||||
|
tweaks["SplitText-QIKhg"] = {}
|
||||||
|
if settings.get("chunkSize"):
|
||||||
|
tweaks["SplitText-QIKhg"]["chunk_size"] = settings["chunkSize"]
|
||||||
|
if settings.get("chunkOverlap"):
|
||||||
|
tweaks["SplitText-QIKhg"]["chunk_overlap"] = settings[
|
||||||
|
"chunkOverlap"
|
||||||
|
]
|
||||||
|
if settings.get("separator"):
|
||||||
|
tweaks["SplitText-QIKhg"]["separator"] = settings["separator"]
|
||||||
|
|
||||||
|
# OpenAI Embeddings component tweaks (OpenAIEmbeddings-joRJ6)
|
||||||
|
if settings.get("embeddingModel"):
|
||||||
|
if "OpenAIEmbeddings-joRJ6" not in tweaks:
|
||||||
|
tweaks["OpenAIEmbeddings-joRJ6"] = {}
|
||||||
|
tweaks["OpenAIEmbeddings-joRJ6"]["model"] = settings["embeddingModel"]
|
||||||
|
|
||||||
|
# Note: OpenSearch component tweaks not needed for ingestion
|
||||||
|
# (search parameters are for retrieval, not document processing)
|
||||||
|
|
||||||
|
logger.debug("Final tweaks with settings applied", tweaks=tweaks)
|
||||||
|
# Include user JWT if available
|
||||||
|
jwt_token = getattr(request.state, "jwt_token", None)
|
||||||
|
|
||||||
|
# Extract user info from User object
|
||||||
|
user = getattr(request.state, "user", None)
|
||||||
|
user_id = user.user_id if user else None
|
||||||
|
user_name = user.name if user else None
|
||||||
|
user_email = user.email if user else None
|
||||||
|
|
||||||
|
if jwt_token:
|
||||||
|
# Set auth context for downstream services
|
||||||
|
from auth_context import set_auth_context
|
||||||
|
|
||||||
|
set_auth_context(user_id, jwt_token)
|
||||||
|
|
||||||
|
result = await langflow_file_service.run_ingestion_flow(
|
||||||
|
file_paths=file_paths or [],
|
||||||
|
jwt_token=jwt_token,
|
||||||
|
session_id=session_id,
|
||||||
|
tweaks=tweaks,
|
||||||
|
owner=user_id,
|
||||||
|
owner_name=user_name,
|
||||||
|
owner_email=user_email,
|
||||||
|
connector_type="local",
|
||||||
|
)
|
||||||
|
return JSONResponse(result)
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"error": str(e)}, status_code=500)
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_user_files(
|
||||||
|
request: Request, langflow_file_service: LangflowFileService, session_manager
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
payload = await request.json()
|
||||||
|
file_ids = payload.get("file_ids")
|
||||||
|
if not file_ids or not isinstance(file_ids, list):
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": "file_ids must be a non-empty list"}, status_code=400
|
||||||
|
)
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
for fid in file_ids:
|
||||||
|
try:
|
||||||
|
await langflow_file_service.delete_user_file(fid)
|
||||||
|
except Exception as e:
|
||||||
|
errors.append({"file_id": fid, "error": str(e)})
|
||||||
|
|
||||||
|
status = 207 if errors else 200
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"deleted": [
|
||||||
|
fid for fid in file_ids if fid not in [e["file_id"] for e in errors]
|
||||||
|
],
|
||||||
|
"errors": errors,
|
||||||
|
},
|
||||||
|
status_code=status,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"error": str(e)}, status_code=500)
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
import os
|
|
||||||
from starlette.responses import JSONResponse
|
from starlette.responses import JSONResponse
|
||||||
from config.settings import LANGFLOW_URL, FLOW_ID, LANGFLOW_PUBLIC_URL
|
from config.settings import (
|
||||||
|
LANGFLOW_URL,
|
||||||
|
LANGFLOW_CHAT_FLOW_ID,
|
||||||
|
LANGFLOW_INGEST_FLOW_ID,
|
||||||
|
LANGFLOW_PUBLIC_URL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def get_settings(request, session_manager):
|
async def get_settings(request, session_manager):
|
||||||
|
|
@ -9,16 +13,92 @@ async def get_settings(request, session_manager):
|
||||||
# Return public settings that are safe to expose to frontend
|
# Return public settings that are safe to expose to frontend
|
||||||
settings = {
|
settings = {
|
||||||
"langflow_url": LANGFLOW_URL,
|
"langflow_url": LANGFLOW_URL,
|
||||||
"flow_id": FLOW_ID,
|
"flow_id": LANGFLOW_CHAT_FLOW_ID,
|
||||||
|
"ingest_flow_id": LANGFLOW_INGEST_FLOW_ID,
|
||||||
"langflow_public_url": LANGFLOW_PUBLIC_URL,
|
"langflow_public_url": LANGFLOW_PUBLIC_URL,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Only expose edit URL when a public URL is configured
|
# Only expose edit URLs when a public URL is configured
|
||||||
if LANGFLOW_PUBLIC_URL and FLOW_ID:
|
if LANGFLOW_PUBLIC_URL and LANGFLOW_CHAT_FLOW_ID:
|
||||||
settings["langflow_edit_url"] = (
|
settings["langflow_edit_url"] = (
|
||||||
f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{FLOW_ID}"
|
f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{LANGFLOW_CHAT_FLOW_ID}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if LANGFLOW_PUBLIC_URL and LANGFLOW_INGEST_FLOW_ID:
|
||||||
|
settings["langflow_ingest_edit_url"] = (
|
||||||
|
f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{LANGFLOW_INGEST_FLOW_ID}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fetch ingestion flow configuration to get actual component defaults
|
||||||
|
if LANGFLOW_INGEST_FLOW_ID:
|
||||||
|
try:
|
||||||
|
from config.settings import generate_langflow_api_key
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
api_key = await generate_langflow_api_key()
|
||||||
|
if api_key:
|
||||||
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||||
|
response = await client.get(
|
||||||
|
f"{LANGFLOW_URL}/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}",
|
||||||
|
headers={"x-api-key": api_key},
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
flow_data = response.json()
|
||||||
|
|
||||||
|
# Extract component defaults (ingestion-specific settings only)
|
||||||
|
ingestion_defaults = {
|
||||||
|
"chunkSize": 1000,
|
||||||
|
"chunkOverlap": 200,
|
||||||
|
"separator": "\\n",
|
||||||
|
"embeddingModel": "text-embedding-3-small",
|
||||||
|
}
|
||||||
|
|
||||||
|
if flow_data.get("data", {}).get("nodes"):
|
||||||
|
for node in flow_data["data"]["nodes"]:
|
||||||
|
node_template = (
|
||||||
|
node.get("data", {})
|
||||||
|
.get("node", {})
|
||||||
|
.get("template", {})
|
||||||
|
)
|
||||||
|
|
||||||
|
# Split Text component (SplitText-QIKhg)
|
||||||
|
if node.get("id") == "SplitText-QIKhg":
|
||||||
|
if node_template.get("chunk_size", {}).get(
|
||||||
|
"value"
|
||||||
|
):
|
||||||
|
ingestion_defaults["chunkSize"] = (
|
||||||
|
node_template["chunk_size"]["value"]
|
||||||
|
)
|
||||||
|
if node_template.get("chunk_overlap", {}).get(
|
||||||
|
"value"
|
||||||
|
):
|
||||||
|
ingestion_defaults["chunkOverlap"] = (
|
||||||
|
node_template["chunk_overlap"]["value"]
|
||||||
|
)
|
||||||
|
if node_template.get("separator", {}).get(
|
||||||
|
"value"
|
||||||
|
):
|
||||||
|
ingestion_defaults["separator"] = (
|
||||||
|
node_template["separator"]["value"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# OpenAI Embeddings component (OpenAIEmbeddings-joRJ6)
|
||||||
|
elif node.get("id") == "OpenAIEmbeddings-joRJ6":
|
||||||
|
if node_template.get("model", {}).get("value"):
|
||||||
|
ingestion_defaults["embeddingModel"] = (
|
||||||
|
node_template["model"]["value"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note: OpenSearch component settings are not exposed for ingestion
|
||||||
|
# (search-related parameters like number_of_results, score_threshold
|
||||||
|
# are for retrieval, not ingestion)
|
||||||
|
|
||||||
|
settings["ingestion_defaults"] = ingestion_defaults
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARNING] Failed to fetch ingestion flow defaults: {e}")
|
||||||
|
# Continue without ingestion defaults
|
||||||
|
|
||||||
return JSONResponse(settings)
|
return JSONResponse(settings)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,22 @@
|
||||||
import os
|
import os
|
||||||
import requests
|
|
||||||
import time
|
import time
|
||||||
from dotenv import load_dotenv
|
|
||||||
from utils.logging_config import get_logger
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
import httpx
|
||||||
|
import requests
|
||||||
|
from agentd.patch import patch_openai_with_mcp
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from openai import AsyncOpenAI
|
||||||
from opensearchpy import AsyncOpenSearch
|
from opensearchpy import AsyncOpenSearch
|
||||||
from opensearchpy._async.http_aiohttp import AIOHttpConnection
|
from opensearchpy._async.http_aiohttp import AIOHttpConnection
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
from agentd.patch import patch_openai_with_mcp
|
from utils.logging_config import get_logger
|
||||||
from openai import AsyncOpenAI
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
load_dotenv("../")
|
load_dotenv("../")
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
# Environment variables
|
# Environment variables
|
||||||
OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost")
|
OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost")
|
||||||
OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", "9200"))
|
OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", "9200"))
|
||||||
|
|
@ -22,8 +25,18 @@ OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD")
|
||||||
LANGFLOW_URL = os.getenv("LANGFLOW_URL", "http://localhost:7860")
|
LANGFLOW_URL = os.getenv("LANGFLOW_URL", "http://localhost:7860")
|
||||||
# Optional: public URL for browser links (e.g., http://localhost:7860)
|
# Optional: public URL for browser links (e.g., http://localhost:7860)
|
||||||
LANGFLOW_PUBLIC_URL = os.getenv("LANGFLOW_PUBLIC_URL")
|
LANGFLOW_PUBLIC_URL = os.getenv("LANGFLOW_PUBLIC_URL")
|
||||||
FLOW_ID = os.getenv("FLOW_ID")
|
# Backwards compatible flow ID handling with deprecation warnings
|
||||||
|
_legacy_flow_id = os.getenv("FLOW_ID")
|
||||||
|
|
||||||
|
LANGFLOW_CHAT_FLOW_ID = os.getenv("LANGFLOW_CHAT_FLOW_ID") or _legacy_flow_id
|
||||||
|
LANGFLOW_INGEST_FLOW_ID = os.getenv("LANGFLOW_INGEST_FLOW_ID")
|
||||||
NUDGES_FLOW_ID = os.getenv("NUDGES_FLOW_ID")
|
NUDGES_FLOW_ID = os.getenv("NUDGES_FLOW_ID")
|
||||||
|
|
||||||
|
if _legacy_flow_id and not os.getenv("LANGFLOW_CHAT_FLOW_ID"):
|
||||||
|
logger.warning("FLOW_ID is deprecated. Please use LANGFLOW_CHAT_FLOW_ID instead")
|
||||||
|
LANGFLOW_CHAT_FLOW_ID = _legacy_flow_id
|
||||||
|
|
||||||
|
|
||||||
# Langflow superuser credentials for API key generation
|
# Langflow superuser credentials for API key generation
|
||||||
LANGFLOW_SUPERUSER = os.getenv("LANGFLOW_SUPERUSER")
|
LANGFLOW_SUPERUSER = os.getenv("LANGFLOW_SUPERUSER")
|
||||||
LANGFLOW_SUPERUSER_PASSWORD = os.getenv("LANGFLOW_SUPERUSER_PASSWORD")
|
LANGFLOW_SUPERUSER_PASSWORD = os.getenv("LANGFLOW_SUPERUSER_PASSWORD")
|
||||||
|
|
@ -94,15 +107,47 @@ INDEX_BODY = {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Convenience base URL for Langflow REST API
|
||||||
|
LANGFLOW_BASE_URL = f"{LANGFLOW_URL}/api/v1"
|
||||||
|
|
||||||
|
|
||||||
async def generate_langflow_api_key():
|
async def generate_langflow_api_key():
|
||||||
"""Generate Langflow API key using superuser credentials at startup"""
|
"""Generate Langflow API key using superuser credentials at startup"""
|
||||||
global LANGFLOW_KEY
|
global LANGFLOW_KEY
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"generate_langflow_api_key called", current_key_present=bool(LANGFLOW_KEY)
|
||||||
|
)
|
||||||
|
|
||||||
# If key already provided via env, do not attempt generation
|
# If key already provided via env, do not attempt generation
|
||||||
if LANGFLOW_KEY:
|
if LANGFLOW_KEY:
|
||||||
logger.info("Using LANGFLOW_KEY from environment, skipping generation")
|
if os.getenv("LANGFLOW_KEY"):
|
||||||
return LANGFLOW_KEY
|
logger.info("Using LANGFLOW_KEY from environment; skipping generation")
|
||||||
|
return LANGFLOW_KEY
|
||||||
|
else:
|
||||||
|
# We have a cached key, but let's validate it first
|
||||||
|
logger.debug("Validating cached LANGFLOW_KEY", key_prefix=LANGFLOW_KEY[:8])
|
||||||
|
try:
|
||||||
|
validation_response = requests.get(
|
||||||
|
f"{LANGFLOW_URL}/api/v1/users/whoami",
|
||||||
|
headers={"x-api-key": LANGFLOW_KEY},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
if validation_response.status_code == 200:
|
||||||
|
logger.debug("Cached API key is valid", key_prefix=LANGFLOW_KEY[:8])
|
||||||
|
return LANGFLOW_KEY
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Cached API key is invalid, generating fresh key",
|
||||||
|
status_code=validation_response.status_code,
|
||||||
|
)
|
||||||
|
LANGFLOW_KEY = None # Clear invalid key
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"Cached API key validation failed, generating fresh key",
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
LANGFLOW_KEY = None # Clear invalid key
|
||||||
|
|
||||||
if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD:
|
if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
@ -115,7 +160,6 @@ async def generate_langflow_api_key():
|
||||||
max_attempts = int(os.getenv("LANGFLOW_KEY_RETRIES", "15"))
|
max_attempts = int(os.getenv("LANGFLOW_KEY_RETRIES", "15"))
|
||||||
delay_seconds = float(os.getenv("LANGFLOW_KEY_RETRY_DELAY", "2.0"))
|
delay_seconds = float(os.getenv("LANGFLOW_KEY_RETRY_DELAY", "2.0"))
|
||||||
|
|
||||||
last_error = None
|
|
||||||
for attempt in range(1, max_attempts + 1):
|
for attempt in range(1, max_attempts + 1):
|
||||||
try:
|
try:
|
||||||
# Login to get access token
|
# Login to get access token
|
||||||
|
|
@ -148,14 +192,28 @@ async def generate_langflow_api_key():
|
||||||
if not api_key:
|
if not api_key:
|
||||||
raise KeyError("api_key")
|
raise KeyError("api_key")
|
||||||
|
|
||||||
LANGFLOW_KEY = api_key
|
# Validate the API key works
|
||||||
logger.info(
|
validation_response = requests.get(
|
||||||
"Successfully generated Langflow API key",
|
f"{LANGFLOW_URL}/api/v1/users/whoami",
|
||||||
api_key_preview=api_key[:8],
|
headers={"x-api-key": api_key},
|
||||||
|
timeout=10,
|
||||||
)
|
)
|
||||||
return api_key
|
if validation_response.status_code == 200:
|
||||||
|
LANGFLOW_KEY = api_key
|
||||||
|
logger.info(
|
||||||
|
"Successfully generated and validated Langflow API key",
|
||||||
|
key_prefix=api_key[:8],
|
||||||
|
)
|
||||||
|
return api_key
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
"Generated API key validation failed",
|
||||||
|
status_code=validation_response.status_code,
|
||||||
|
)
|
||||||
|
raise ValueError(
|
||||||
|
f"API key validation failed: {validation_response.status_code}"
|
||||||
|
)
|
||||||
except (requests.exceptions.RequestException, KeyError) as e:
|
except (requests.exceptions.RequestException, KeyError) as e:
|
||||||
last_error = e
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Attempt to generate Langflow API key failed",
|
"Attempt to generate Langflow API key failed",
|
||||||
attempt=attempt,
|
attempt=attempt,
|
||||||
|
|
@ -182,6 +240,7 @@ class AppClients:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.opensearch = None
|
self.opensearch = None
|
||||||
self.langflow_client = None
|
self.langflow_client = None
|
||||||
|
self.langflow_http_client = None
|
||||||
self.patched_async_client = None
|
self.patched_async_client = None
|
||||||
self.converter = None
|
self.converter = None
|
||||||
|
|
||||||
|
|
@ -204,9 +263,15 @@ class AppClients:
|
||||||
# Initialize Langflow client with generated/provided API key
|
# Initialize Langflow client with generated/provided API key
|
||||||
if LANGFLOW_KEY and self.langflow_client is None:
|
if LANGFLOW_KEY and self.langflow_client is None:
|
||||||
try:
|
try:
|
||||||
self.langflow_client = AsyncOpenAI(
|
if not OPENSEARCH_PASSWORD:
|
||||||
base_url=f"{LANGFLOW_URL}/api/v1", api_key=LANGFLOW_KEY
|
raise ValueError("OPENSEARCH_PASSWORD is not set")
|
||||||
)
|
else:
|
||||||
|
await self.ensure_langflow_client()
|
||||||
|
# Note: OPENSEARCH_PASSWORD global variable should be created automatically
|
||||||
|
# via LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT in docker-compose
|
||||||
|
logger.info(
|
||||||
|
"Langflow client initialized - OPENSEARCH_PASSWORD should be available via environment variables"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Failed to initialize Langflow client", error=str(e))
|
logger.warning("Failed to initialize Langflow client", error=str(e))
|
||||||
self.langflow_client = None
|
self.langflow_client = None
|
||||||
|
|
@ -221,6 +286,11 @@ class AppClients:
|
||||||
# Initialize document converter
|
# Initialize document converter
|
||||||
self.converter = DocumentConverter()
|
self.converter = DocumentConverter()
|
||||||
|
|
||||||
|
# Initialize Langflow HTTP client
|
||||||
|
self.langflow_http_client = httpx.AsyncClient(
|
||||||
|
base_url=LANGFLOW_URL, timeout=60.0
|
||||||
|
)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def ensure_langflow_client(self):
|
async def ensure_langflow_client(self):
|
||||||
|
|
@ -242,6 +312,71 @@ class AppClients:
|
||||||
self.langflow_client = None
|
self.langflow_client = None
|
||||||
return self.langflow_client
|
return self.langflow_client
|
||||||
|
|
||||||
|
async def langflow_request(self, method: str, endpoint: str, **kwargs):
|
||||||
|
"""Central method for all Langflow API requests"""
|
||||||
|
api_key = await generate_langflow_api_key()
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("No Langflow API key available")
|
||||||
|
|
||||||
|
# Merge headers properly - passed headers take precedence over defaults
|
||||||
|
default_headers = {"x-api-key": api_key, "Content-Type": "application/json"}
|
||||||
|
existing_headers = kwargs.pop("headers", {})
|
||||||
|
headers = {**default_headers, **existing_headers}
|
||||||
|
|
||||||
|
# Remove Content-Type if explicitly set to None (for file uploads)
|
||||||
|
if headers.get("Content-Type") is None:
|
||||||
|
headers.pop("Content-Type", None)
|
||||||
|
|
||||||
|
url = f"{LANGFLOW_URL}{endpoint}"
|
||||||
|
|
||||||
|
return await self.langflow_http_client.request(
|
||||||
|
method=method, url=url, headers=headers, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _create_langflow_global_variable(self, name: str, value: str):
|
||||||
|
"""Create a global variable in Langflow via API"""
|
||||||
|
api_key = await generate_langflow_api_key()
|
||||||
|
if not api_key:
|
||||||
|
logger.warning(
|
||||||
|
"Cannot create Langflow global variable: No API key", variable_name=name
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
url = f"{LANGFLOW_URL}/api/v1/variables/"
|
||||||
|
payload = {
|
||||||
|
"name": name,
|
||||||
|
"value": value,
|
||||||
|
"default_fields": [],
|
||||||
|
"type": "Credential",
|
||||||
|
}
|
||||||
|
headers = {"x-api-key": api_key, "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.post(url, headers=headers, json=payload)
|
||||||
|
|
||||||
|
if response.status_code in [200, 201]:
|
||||||
|
logger.info(
|
||||||
|
"Successfully created Langflow global variable",
|
||||||
|
variable_name=name,
|
||||||
|
)
|
||||||
|
elif response.status_code == 400 and "already exists" in response.text:
|
||||||
|
logger.info(
|
||||||
|
"Langflow global variable already exists", variable_name=name
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to create Langflow global variable",
|
||||||
|
variable_name=name,
|
||||||
|
status_code=response.status_code,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Exception creating Langflow global variable",
|
||||||
|
variable_name=name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
def create_user_opensearch_client(self, jwt_token: str):
|
def create_user_opensearch_client(self, jwt_token: str):
|
||||||
"""Create OpenSearch client with user's JWT token for OIDC auth"""
|
"""Create OpenSearch client with user's JWT token for OIDC auth"""
|
||||||
headers = {"Authorization": f"Bearer {jwt_token}"}
|
headers = {"Authorization": f"Bearer {jwt_token}"}
|
||||||
|
|
|
||||||
|
|
@ -400,8 +400,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
export_mime = self._pick_export_mime(mime_type)
|
export_mime = self._pick_export_mime(mime_type)
|
||||||
if mime_type.startswith("application/vnd.google-apps."):
|
if mime_type.startswith("application/vnd.google-apps."):
|
||||||
# default fallback if not overridden
|
# default fallback if not overridden
|
||||||
if not export_mime:
|
#if not export_mime:
|
||||||
export_mime = "application/pdf"
|
# export_mime = "application/pdf"
|
||||||
|
export_mime = "application/pdf"
|
||||||
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
||||||
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
302
src/connectors/langflow_connector_service.py
Normal file
302
src/connectors/langflow_connector_service.py
Normal file
|
|
@ -0,0 +1,302 @@
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
# Create custom processor for connector files using Langflow
|
||||||
|
from models.processors import LangflowConnectorFileProcessor
|
||||||
|
from services.langflow_file_service import LangflowFileService
|
||||||
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
from .base import BaseConnector, ConnectorDocument
|
||||||
|
from .connection_manager import ConnectionManager
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class LangflowConnectorService:
|
||||||
|
"""Service to manage connector documents and process them via Langflow"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
task_service=None,
|
||||||
|
session_manager=None,
|
||||||
|
):
|
||||||
|
self.task_service = task_service
|
||||||
|
self.session_manager = session_manager
|
||||||
|
self.connection_manager = ConnectionManager()
|
||||||
|
|
||||||
|
# Initialize LangflowFileService for processing connector documents
|
||||||
|
self.langflow_service = LangflowFileService()
|
||||||
|
|
||||||
|
async def initialize(self):
|
||||||
|
"""Initialize the service by loading existing connections"""
|
||||||
|
await self.connection_manager.load_connections()
|
||||||
|
|
||||||
|
async def get_connector(self, connection_id: str) -> Optional[BaseConnector]:
|
||||||
|
"""Get a connector by connection ID"""
|
||||||
|
return await self.connection_manager.get_connector(connection_id)
|
||||||
|
|
||||||
|
async def process_connector_document(
|
||||||
|
self,
|
||||||
|
document: ConnectorDocument,
|
||||||
|
owner_user_id: str,
|
||||||
|
connector_type: str,
|
||||||
|
jwt_token: str = None,
|
||||||
|
owner_name: str = None,
|
||||||
|
owner_email: str = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Process a document from a connector using LangflowFileService pattern"""
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Processing connector document via Langflow",
|
||||||
|
document_id=document.id,
|
||||||
|
filename=document.filename,
|
||||||
|
)
|
||||||
|
|
||||||
|
suffix = self._get_file_extension(document.mimetype)
|
||||||
|
|
||||||
|
# Create temporary file from document content
|
||||||
|
with tempfile.NamedTemporaryFile(
|
||||||
|
delete=False, suffix=suffix
|
||||||
|
) as tmp_file:
|
||||||
|
tmp_file.write(document.content)
|
||||||
|
tmp_file.flush()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Upload file to Langflow
|
||||||
|
logger.debug("Uploading file to Langflow", filename=document.filename)
|
||||||
|
content = document.content
|
||||||
|
file_tuple = (
|
||||||
|
document.filename.replace(" ", "_").replace("/", "_")+suffix,
|
||||||
|
content,
|
||||||
|
document.mimetype or "application/octet-stream",
|
||||||
|
)
|
||||||
|
|
||||||
|
upload_result = await self.langflow_service.upload_user_file(
|
||||||
|
file_tuple, jwt_token
|
||||||
|
)
|
||||||
|
langflow_file_id = upload_result["id"]
|
||||||
|
langflow_file_path = upload_result["path"]
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"File uploaded to Langflow",
|
||||||
|
file_id=langflow_file_id,
|
||||||
|
path=langflow_file_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 2: Run ingestion flow with the uploaded file
|
||||||
|
logger.debug(
|
||||||
|
"Running Langflow ingestion flow", file_path=langflow_file_path
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use the same tweaks pattern as LangflowFileService
|
||||||
|
tweaks = {} # Let Langflow handle the ingestion with default settings
|
||||||
|
|
||||||
|
ingestion_result = await self.langflow_service.run_ingestion_flow(
|
||||||
|
file_paths=[langflow_file_path],
|
||||||
|
jwt_token=jwt_token,
|
||||||
|
tweaks=tweaks,
|
||||||
|
owner=owner_user_id,
|
||||||
|
owner_name=owner_name,
|
||||||
|
owner_email=owner_email,
|
||||||
|
connector_type=connector_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug("Ingestion flow completed", result=ingestion_result)
|
||||||
|
|
||||||
|
# Step 3: Delete the file from Langflow
|
||||||
|
logger.debug("Deleting file from Langflow", file_id=langflow_file_id)
|
||||||
|
await self.langflow_service.delete_user_file(langflow_file_id)
|
||||||
|
logger.debug("File deleted from Langflow", file_id=langflow_file_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "indexed",
|
||||||
|
"filename": document.filename,
|
||||||
|
"source_url": document.source_url,
|
||||||
|
"document_id": document.id,
|
||||||
|
"connector_type": connector_type,
|
||||||
|
"langflow_result": ingestion_result,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Failed to process connector document via Langflow",
|
||||||
|
document_id=document.id,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
# Try to clean up Langflow file if upload succeeded but processing failed
|
||||||
|
if "langflow_file_id" in locals():
|
||||||
|
try:
|
||||||
|
await self.langflow_service.delete_user_file(langflow_file_id)
|
||||||
|
logger.debug(
|
||||||
|
"Cleaned up Langflow file after error",
|
||||||
|
file_id=langflow_file_id,
|
||||||
|
)
|
||||||
|
except Exception as cleanup_error:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to cleanup Langflow file",
|
||||||
|
file_id=langflow_file_id,
|
||||||
|
error=str(cleanup_error),
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up temporary file
|
||||||
|
os.unlink(tmp_file.name)
|
||||||
|
|
||||||
|
def _get_file_extension(self, mimetype: str) -> str:
|
||||||
|
"""Get file extension based on MIME type"""
|
||||||
|
mime_to_ext = {
|
||||||
|
"application/pdf": ".pdf",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||||
|
"application/msword": ".doc",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||||
|
"application/vnd.ms-powerpoint": ".ppt",
|
||||||
|
"text/plain": ".txt",
|
||||||
|
"text/html": ".html",
|
||||||
|
"application/rtf": ".rtf",
|
||||||
|
"application/vnd.google-apps.document": ".pdf", # Exported as PDF
|
||||||
|
"application/vnd.google-apps.presentation": ".pdf",
|
||||||
|
"application/vnd.google-apps.spreadsheet": ".pdf",
|
||||||
|
}
|
||||||
|
return mime_to_ext.get(mimetype, ".bin")
|
||||||
|
|
||||||
|
async def sync_connector_files(
|
||||||
|
self,
|
||||||
|
connection_id: str,
|
||||||
|
user_id: str,
|
||||||
|
max_files: int = None,
|
||||||
|
jwt_token: str = None,
|
||||||
|
) -> str:
|
||||||
|
"""Sync files from a connector connection using Langflow processing"""
|
||||||
|
if not self.task_service:
|
||||||
|
raise ValueError(
|
||||||
|
"TaskService not available - connector sync requires task service dependency"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Starting Langflow-based sync for connection",
|
||||||
|
connection_id=connection_id,
|
||||||
|
max_files=max_files,
|
||||||
|
)
|
||||||
|
|
||||||
|
connector = await self.get_connector(connection_id)
|
||||||
|
if not connector:
|
||||||
|
raise ValueError(
|
||||||
|
f"Connection '{connection_id}' not found or not authenticated"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug("Got connector", authenticated=connector.is_authenticated)
|
||||||
|
|
||||||
|
if not connector.is_authenticated:
|
||||||
|
raise ValueError(f"Connection '{connection_id}' not authenticated")
|
||||||
|
|
||||||
|
# Collect files to process (limited by max_files)
|
||||||
|
files_to_process = []
|
||||||
|
page_token = None
|
||||||
|
|
||||||
|
# Calculate page size to minimize API calls
|
||||||
|
page_size = min(max_files or 100, 1000) if max_files else 100
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# List files from connector with limit
|
||||||
|
logger.debug(
|
||||||
|
"Calling list_files", page_size=page_size, page_token=page_token
|
||||||
|
)
|
||||||
|
file_list = await connector.list_files(page_token, limit=page_size)
|
||||||
|
logger.debug(
|
||||||
|
"Got files from connector", file_count=len(file_list.get("files", []))
|
||||||
|
)
|
||||||
|
files = file_list["files"]
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
break
|
||||||
|
|
||||||
|
for file_info in files:
|
||||||
|
if max_files and len(files_to_process) >= max_files:
|
||||||
|
break
|
||||||
|
files_to_process.append(file_info)
|
||||||
|
|
||||||
|
# Stop if we have enough files or no more pages
|
||||||
|
if (max_files and len(files_to_process) >= max_files) or not file_list.get(
|
||||||
|
"nextPageToken"
|
||||||
|
):
|
||||||
|
break
|
||||||
|
|
||||||
|
page_token = file_list.get("nextPageToken")
|
||||||
|
|
||||||
|
# Get user information
|
||||||
|
user = self.session_manager.get_user(user_id) if self.session_manager else None
|
||||||
|
owner_name = user.name if user else None
|
||||||
|
owner_email = user.email if user else None
|
||||||
|
|
||||||
|
processor = LangflowConnectorFileProcessor(
|
||||||
|
self,
|
||||||
|
connection_id,
|
||||||
|
files_to_process,
|
||||||
|
user_id,
|
||||||
|
jwt_token=jwt_token,
|
||||||
|
owner_name=owner_name,
|
||||||
|
owner_email=owner_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use file IDs as items
|
||||||
|
file_ids = [file_info["id"] for file_info in files_to_process]
|
||||||
|
|
||||||
|
# Create custom task using TaskService
|
||||||
|
task_id = await self.task_service.create_custom_task(
|
||||||
|
user_id, file_ids, processor
|
||||||
|
)
|
||||||
|
|
||||||
|
return task_id
|
||||||
|
|
||||||
|
async def sync_specific_files(
|
||||||
|
self,
|
||||||
|
connection_id: str,
|
||||||
|
user_id: str,
|
||||||
|
file_ids: List[str],
|
||||||
|
jwt_token: str = None,
|
||||||
|
) -> str:
|
||||||
|
"""Sync specific files by their IDs using Langflow processing"""
|
||||||
|
if not self.task_service:
|
||||||
|
raise ValueError(
|
||||||
|
"TaskService not available - connector sync requires task service dependency"
|
||||||
|
)
|
||||||
|
|
||||||
|
connector = await self.get_connector(connection_id)
|
||||||
|
if not connector:
|
||||||
|
raise ValueError(
|
||||||
|
f"Connection '{connection_id}' not found or not authenticated"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not connector.is_authenticated:
|
||||||
|
raise ValueError(f"Connection '{connection_id}' not authenticated")
|
||||||
|
|
||||||
|
if not file_ids:
|
||||||
|
raise ValueError("No file IDs provided")
|
||||||
|
|
||||||
|
# Get user information
|
||||||
|
user = self.session_manager.get_user(user_id) if self.session_manager else None
|
||||||
|
owner_name = user.name if user else None
|
||||||
|
owner_email = user.email if user else None
|
||||||
|
|
||||||
|
processor = LangflowConnectorFileProcessor(
|
||||||
|
self,
|
||||||
|
connection_id,
|
||||||
|
file_ids,
|
||||||
|
user_id,
|
||||||
|
jwt_token=jwt_token,
|
||||||
|
owner_name=owner_name,
|
||||||
|
owner_email=owner_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create custom task using TaskService
|
||||||
|
task_id = await self.task_service.create_custom_task(
|
||||||
|
user_id, file_ids, processor
|
||||||
|
)
|
||||||
|
|
||||||
|
return task_id
|
||||||
|
|
||||||
|
async def _get_connector(self, connection_id: str) -> Optional[BaseConnector]:
|
||||||
|
"""Get a connector by connection ID (alias for get_connector)"""
|
||||||
|
return await self.get_connector(connection_id)
|
||||||
102
src/main.py
102
src/main.py
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
# Configure structured logging early
|
# Configure structured logging early
|
||||||
from services.flows_service import FlowsService
|
from services.flows_service import FlowsService
|
||||||
|
from connectors.langflow_connector_service import LangflowConnectorService
|
||||||
from utils.logging_config import configure_from_env, get_logger
|
from utils.logging_config import configure_from_env, get_logger
|
||||||
|
|
||||||
configure_from_env()
|
configure_from_env()
|
||||||
|
|
@ -12,34 +13,58 @@ import multiprocessing
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from starlette.applications import Starlette
|
from starlette.applications import Starlette
|
||||||
from starlette.routing import Route
|
from starlette.routing import Route
|
||||||
|
|
||||||
# Set multiprocessing start method to 'spawn' for CUDA compatibility
|
# Set multiprocessing start method to 'spawn' for CUDA compatibility
|
||||||
multiprocessing.set_start_method("spawn", force=True)
|
multiprocessing.set_start_method("spawn", force=True)
|
||||||
|
|
||||||
|
# Create process pool FIRST, before any torch/CUDA imports
|
||||||
from utils.process_pool import process_pool
|
from utils.process_pool import process_pool
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
# API endpoints
|
||||||
|
from api import (
|
||||||
|
auth,
|
||||||
|
chat,
|
||||||
|
connectors,
|
||||||
|
knowledge_filter,
|
||||||
|
langflow_files,
|
||||||
|
oidc,
|
||||||
|
search,
|
||||||
|
settings,
|
||||||
|
tasks,
|
||||||
|
upload,
|
||||||
|
)
|
||||||
|
from auth_middleware import optional_auth, require_auth
|
||||||
|
|
||||||
# Configuration and setup
|
# Configuration and setup
|
||||||
from config.settings import clients, INDEX_NAME, INDEX_BODY, SESSION_SECRET
|
from config.settings import (
|
||||||
from config.settings import is_no_auth_mode
|
INDEX_BODY,
|
||||||
from utils.gpu_detection import detect_gpu_devices
|
INDEX_NAME,
|
||||||
|
SESSION_SECRET,
|
||||||
|
clients,
|
||||||
|
is_no_auth_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Existing services
|
||||||
|
from services.auth_service import AuthService
|
||||||
|
from services.chat_service import ChatService
|
||||||
|
|
||||||
# Services
|
# Services
|
||||||
from services.document_service import DocumentService
|
from services.document_service import DocumentService
|
||||||
|
from services.knowledge_filter_service import KnowledgeFilterService
|
||||||
|
|
||||||
|
# Configuration and setup
|
||||||
|
# Services
|
||||||
|
from services.langflow_file_service import LangflowFileService
|
||||||
|
from services.monitor_service import MonitorService
|
||||||
from services.search_service import SearchService
|
from services.search_service import SearchService
|
||||||
from services.task_service import TaskService
|
from services.task_service import TaskService
|
||||||
from services.auth_service import AuthService
|
|
||||||
from services.chat_service import ChatService
|
|
||||||
from services.knowledge_filter_service import KnowledgeFilterService
|
|
||||||
from services.monitor_service import MonitorService
|
|
||||||
|
|
||||||
# Existing services
|
|
||||||
from connectors.service import ConnectorService
|
|
||||||
from session_manager import SessionManager
|
from session_manager import SessionManager
|
||||||
from auth_middleware import require_auth, optional_auth
|
from utils.process_pool import process_pool
|
||||||
|
|
||||||
# API endpoints
|
# API endpoints
|
||||||
from api import (
|
from api import (
|
||||||
|
|
@ -217,7 +242,10 @@ async def ingest_default_documents_when_ready(services):
|
||||||
logger.info("Ingesting default documents when ready")
|
logger.info("Ingesting default documents when ready")
|
||||||
base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
|
base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
|
||||||
if not os.path.isdir(base_dir):
|
if not os.path.isdir(base_dir):
|
||||||
logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir)
|
logger.info(
|
||||||
|
"Default documents directory not found; skipping ingestion",
|
||||||
|
base_dir=base_dir,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Collect files recursively
|
# Collect files recursively
|
||||||
|
|
@ -228,7 +256,9 @@ async def ingest_default_documents_when_ready(services):
|
||||||
]
|
]
|
||||||
|
|
||||||
if not file_paths:
|
if not file_paths:
|
||||||
logger.info("No default documents found; nothing to ingest", base_dir=base_dir)
|
logger.info(
|
||||||
|
"No default documents found; nothing to ingest", base_dir=base_dir
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None)
|
# Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None)
|
||||||
|
|
@ -253,12 +283,14 @@ async def ingest_default_documents_when_ready(services):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Default documents ingestion failed", error=str(e))
|
logger.error("Default documents ingestion failed", error=str(e))
|
||||||
|
|
||||||
|
|
||||||
async def startup_tasks(services):
|
async def startup_tasks(services):
|
||||||
"""Startup tasks"""
|
"""Startup tasks"""
|
||||||
logger.info("Starting startup tasks")
|
logger.info("Starting startup tasks")
|
||||||
await init_index()
|
await init_index()
|
||||||
await ingest_default_documents_when_ready(services)
|
await ingest_default_documents_when_ready(services)
|
||||||
|
|
||||||
|
|
||||||
async def initialize_services():
|
async def initialize_services():
|
||||||
"""Initialize all services and their dependencies"""
|
"""Initialize all services and their dependencies"""
|
||||||
# Generate JWT keys if they don't exist
|
# Generate JWT keys if they don't exist
|
||||||
|
|
@ -283,11 +315,7 @@ async def initialize_services():
|
||||||
document_service.process_pool = process_pool
|
document_service.process_pool = process_pool
|
||||||
|
|
||||||
# Initialize connector service
|
# Initialize connector service
|
||||||
connector_service = ConnectorService(
|
connector_service = LangflowConnectorService(
|
||||||
patched_async_client=clients.patched_async_client,
|
|
||||||
process_pool=process_pool,
|
|
||||||
embed_model="text-embedding-3-small",
|
|
||||||
index_name=INDEX_NAME,
|
|
||||||
task_service=task_service,
|
task_service=task_service,
|
||||||
session_manager=session_manager,
|
session_manager=session_manager,
|
||||||
)
|
)
|
||||||
|
|
@ -298,7 +326,6 @@ async def initialize_services():
|
||||||
# Load persisted connector connections at startup so webhooks and syncs
|
# Load persisted connector connections at startup so webhooks and syncs
|
||||||
# can resolve existing subscriptions immediately after server boot
|
# can resolve existing subscriptions immediately after server boot
|
||||||
# Skip in no-auth mode since connectors require OAuth
|
# Skip in no-auth mode since connectors require OAuth
|
||||||
from config.settings import is_no_auth_mode
|
|
||||||
|
|
||||||
if not is_no_auth_mode():
|
if not is_no_auth_mode():
|
||||||
try:
|
try:
|
||||||
|
|
@ -315,12 +342,15 @@ async def initialize_services():
|
||||||
else:
|
else:
|
||||||
logger.info("[CONNECTORS] Skipping connection loading in no-auth mode")
|
logger.info("[CONNECTORS] Skipping connection loading in no-auth mode")
|
||||||
|
|
||||||
|
langflow_file_service = LangflowFileService()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"document_service": document_service,
|
"document_service": document_service,
|
||||||
"search_service": search_service,
|
"search_service": search_service,
|
||||||
"task_service": task_service,
|
"task_service": task_service,
|
||||||
"chat_service": chat_service,
|
"chat_service": chat_service,
|
||||||
"flows_service": flows_service,
|
"flows_service": flows_service,
|
||||||
|
"langflow_file_service": langflow_file_service,
|
||||||
"auth_service": auth_service,
|
"auth_service": auth_service,
|
||||||
"connector_service": connector_service,
|
"connector_service": connector_service,
|
||||||
"knowledge_filter_service": knowledge_filter_service,
|
"knowledge_filter_service": knowledge_filter_service,
|
||||||
|
|
@ -347,6 +377,40 @@ async def create_app():
|
||||||
),
|
),
|
||||||
methods=["POST"],
|
methods=["POST"],
|
||||||
),
|
),
|
||||||
|
# Langflow Files endpoints
|
||||||
|
Route(
|
||||||
|
"/langflow/files/upload",
|
||||||
|
optional_auth(services["session_manager"])(
|
||||||
|
partial(
|
||||||
|
langflow_files.upload_user_file,
|
||||||
|
langflow_file_service=services["langflow_file_service"],
|
||||||
|
session_manager=services["session_manager"],
|
||||||
|
)
|
||||||
|
),
|
||||||
|
methods=["POST"],
|
||||||
|
),
|
||||||
|
Route(
|
||||||
|
"/langflow/ingest",
|
||||||
|
require_auth(services["session_manager"])(
|
||||||
|
partial(
|
||||||
|
langflow_files.run_ingestion,
|
||||||
|
langflow_file_service=services["langflow_file_service"],
|
||||||
|
session_manager=services["session_manager"],
|
||||||
|
)
|
||||||
|
),
|
||||||
|
methods=["POST"],
|
||||||
|
),
|
||||||
|
Route(
|
||||||
|
"/langflow/files",
|
||||||
|
require_auth(services["session_manager"])(
|
||||||
|
partial(
|
||||||
|
langflow_files.delete_user_files,
|
||||||
|
langflow_file_service=services["langflow_file_service"],
|
||||||
|
session_manager=services["session_manager"],
|
||||||
|
)
|
||||||
|
),
|
||||||
|
methods=["DELETE"],
|
||||||
|
),
|
||||||
Route(
|
Route(
|
||||||
"/upload_context",
|
"/upload_context",
|
||||||
require_auth(services["session_manager"])(
|
require_auth(services["session_manager"])(
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Dict
|
from typing import Any
|
||||||
from .tasks import UploadTask, FileTask
|
from .tasks import UploadTask, FileTask
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
|
@ -91,10 +91,9 @@ class ConnectorFileProcessor(TaskProcessor):
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Process a connector file using ConnectorService"""
|
"""Process a connector file using ConnectorService"""
|
||||||
from models.tasks import TaskStatus
|
from models.tasks import TaskStatus
|
||||||
import time
|
|
||||||
|
|
||||||
file_id = item # item is the connector file ID
|
file_id = item # item is the connector file ID
|
||||||
file_info = self.file_info_map.get(file_id)
|
self.file_info_map.get(file_id)
|
||||||
|
|
||||||
# Get the connector and connection info
|
# Get the connector and connection info
|
||||||
connector = await self.connector_service.get_connector(self.connection_id)
|
connector = await self.connector_service.get_connector(self.connection_id)
|
||||||
|
|
@ -126,6 +125,79 @@ class ConnectorFileProcessor(TaskProcessor):
|
||||||
upload_task.successful_files += 1
|
upload_task.successful_files += 1
|
||||||
|
|
||||||
|
|
||||||
|
class LangflowConnectorFileProcessor(TaskProcessor):
|
||||||
|
"""Processor for connector file uploads using Langflow"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
langflow_connector_service,
|
||||||
|
connection_id: str,
|
||||||
|
files_to_process: list,
|
||||||
|
user_id: str = None,
|
||||||
|
jwt_token: str = None,
|
||||||
|
owner_name: str = None,
|
||||||
|
owner_email: str = None,
|
||||||
|
):
|
||||||
|
self.langflow_connector_service = langflow_connector_service
|
||||||
|
self.connection_id = connection_id
|
||||||
|
self.files_to_process = files_to_process
|
||||||
|
self.user_id = user_id
|
||||||
|
self.jwt_token = jwt_token
|
||||||
|
self.owner_name = owner_name
|
||||||
|
self.owner_email = owner_email
|
||||||
|
# Create lookup map for file info - handle both file objects and file IDs
|
||||||
|
self.file_info_map = {}
|
||||||
|
for f in files_to_process:
|
||||||
|
if isinstance(f, dict):
|
||||||
|
# Full file info objects
|
||||||
|
self.file_info_map[f["id"]] = f
|
||||||
|
else:
|
||||||
|
# Just file IDs - will need to fetch metadata during processing
|
||||||
|
self.file_info_map[f] = None
|
||||||
|
|
||||||
|
async def process_item(
|
||||||
|
self, upload_task: UploadTask, item: str, file_task: FileTask
|
||||||
|
) -> None:
|
||||||
|
"""Process a connector file using LangflowConnectorService"""
|
||||||
|
from models.tasks import TaskStatus
|
||||||
|
|
||||||
|
file_id = item # item is the connector file ID
|
||||||
|
self.file_info_map.get(file_id)
|
||||||
|
|
||||||
|
# Get the connector and connection info
|
||||||
|
connector = await self.langflow_connector_service.get_connector(
|
||||||
|
self.connection_id
|
||||||
|
)
|
||||||
|
connection = (
|
||||||
|
await self.langflow_connector_service.connection_manager.get_connection(
|
||||||
|
self.connection_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if not connector or not connection:
|
||||||
|
raise ValueError(f"Connection '{self.connection_id}' not found")
|
||||||
|
|
||||||
|
# Get file content from connector (the connector will fetch metadata if needed)
|
||||||
|
document = await connector.get_file_content(file_id)
|
||||||
|
|
||||||
|
# Use the user_id passed during initialization
|
||||||
|
if not self.user_id:
|
||||||
|
raise ValueError("user_id not provided to LangflowConnectorFileProcessor")
|
||||||
|
|
||||||
|
# Process using Langflow pipeline
|
||||||
|
result = await self.langflow_connector_service.process_connector_document(
|
||||||
|
document,
|
||||||
|
self.user_id,
|
||||||
|
connection.connector_type,
|
||||||
|
jwt_token=self.jwt_token,
|
||||||
|
owner_name=self.owner_name,
|
||||||
|
owner_email=self.owner_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
file_task.status = TaskStatus.COMPLETED
|
||||||
|
file_task.result = result
|
||||||
|
upload_task.successful_files += 1
|
||||||
|
|
||||||
|
|
||||||
class S3FileProcessor(TaskProcessor):
|
class S3FileProcessor(TaskProcessor):
|
||||||
"""Processor for files stored in S3 buckets"""
|
"""Processor for files stored in S3 buckets"""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, FLOW_ID
|
from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL
|
||||||
from agent import (
|
from agent import (
|
||||||
async_chat,
|
async_chat,
|
||||||
async_langflow,
|
async_langflow,
|
||||||
|
|
@ -6,10 +6,15 @@ from agent import (
|
||||||
)
|
)
|
||||||
from auth_context import set_auth_context
|
from auth_context import set_auth_context
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
from agent import async_chat, async_chat_stream, async_langflow
|
||||||
|
from auth_context import set_auth_context
|
||||||
|
from config.settings import LANGFLOW_CHAT_FLOW_ID, LANGFLOW_URL, clients
|
||||||
|
|
||||||
|
|
||||||
class ChatService:
|
class ChatService:
|
||||||
async def chat(
|
async def chat(
|
||||||
|
|
@ -59,9 +64,9 @@ class ChatService:
|
||||||
if not prompt:
|
if not prompt:
|
||||||
raise ValueError("Prompt is required")
|
raise ValueError("Prompt is required")
|
||||||
|
|
||||||
if not LANGFLOW_URL or not FLOW_ID:
|
if not LANGFLOW_URL or not LANGFLOW_CHAT_FLOW_ID:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"LANGFLOW_URL and FLOW_ID environment variables are required"
|
"LANGFLOW_URL and LANGFLOW_CHAT_FLOW_ID environment variables are required"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare extra headers for JWT authentication
|
# Prepare extra headers for JWT authentication
|
||||||
|
|
@ -71,9 +76,9 @@ class ChatService:
|
||||||
|
|
||||||
# Get context variables for filters, limit, and threshold
|
# Get context variables for filters, limit, and threshold
|
||||||
from auth_context import (
|
from auth_context import (
|
||||||
|
get_score_threshold,
|
||||||
get_search_filters,
|
get_search_filters,
|
||||||
get_search_limit,
|
get_search_limit,
|
||||||
get_score_threshold,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
filters = get_search_filters()
|
filters = get_search_filters()
|
||||||
|
|
@ -135,7 +140,7 @@ class ChatService:
|
||||||
|
|
||||||
return async_langflow_chat_stream(
|
return async_langflow_chat_stream(
|
||||||
langflow_client,
|
langflow_client,
|
||||||
FLOW_ID,
|
LANGFLOW_CHAT_FLOW_ID,
|
||||||
prompt,
|
prompt,
|
||||||
user_id,
|
user_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
|
|
@ -146,7 +151,7 @@ class ChatService:
|
||||||
|
|
||||||
response_text, response_id = await async_langflow_chat(
|
response_text, response_id = await async_langflow_chat(
|
||||||
langflow_client,
|
langflow_client,
|
||||||
FLOW_ID,
|
LANGFLOW_CHAT_FLOW_ID,
|
||||||
prompt,
|
prompt,
|
||||||
user_id,
|
user_id,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
|
|
@ -237,9 +242,9 @@ class ChatService:
|
||||||
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
|
"Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
|
||||||
)
|
)
|
||||||
response_text, response_id = await async_langflow(
|
response_text, response_id = await async_langflow(
|
||||||
langflow_client,
|
langflow_client=langflow_client,
|
||||||
FLOW_ID,
|
flow_id=LANGFLOW_CHAT_FLOW_ID,
|
||||||
document_prompt,
|
prompt=document_prompt,
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
previous_response_id=previous_response_id,
|
previous_response_id=previous_response_id,
|
||||||
)
|
)
|
||||||
|
|
@ -258,17 +263,17 @@ class ChatService:
|
||||||
|
|
||||||
async def get_chat_history(self, user_id: str):
|
async def get_chat_history(self, user_id: str):
|
||||||
"""Get chat conversation history for a user"""
|
"""Get chat conversation history for a user"""
|
||||||
from agent import get_user_conversations, active_conversations
|
from agent import active_conversations, get_user_conversations
|
||||||
|
|
||||||
if not user_id:
|
if not user_id:
|
||||||
return {"error": "User ID is required", "conversations": []}
|
return {"error": "User ID is required", "conversations": []}
|
||||||
|
|
||||||
# Get metadata from persistent storage
|
# Get metadata from persistent storage
|
||||||
conversations_dict = get_user_conversations(user_id)
|
conversations_dict = get_user_conversations(user_id)
|
||||||
|
|
||||||
# Get in-memory conversations (with function calls)
|
# Get in-memory conversations (with function calls)
|
||||||
in_memory_conversations = active_conversations.get(user_id, {})
|
in_memory_conversations = active_conversations.get(user_id, {})
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Getting chat history for user",
|
"Getting chat history for user",
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
|
|
@ -278,7 +283,7 @@ class ChatService:
|
||||||
|
|
||||||
# Convert conversations dict to list format with metadata
|
# Convert conversations dict to list format with metadata
|
||||||
conversations = []
|
conversations = []
|
||||||
|
|
||||||
# First, process in-memory conversations (they have function calls)
|
# First, process in-memory conversations (they have function calls)
|
||||||
for response_id, conversation_state in in_memory_conversations.items():
|
for response_id, conversation_state in in_memory_conversations.items():
|
||||||
# Filter out system messages
|
# Filter out system messages
|
||||||
|
|
@ -294,13 +299,13 @@ class ChatService:
|
||||||
}
|
}
|
||||||
if msg.get("response_id"):
|
if msg.get("response_id"):
|
||||||
message_data["response_id"] = msg["response_id"]
|
message_data["response_id"] = msg["response_id"]
|
||||||
|
|
||||||
# Include function call data if present
|
# Include function call data if present
|
||||||
if msg.get("chunks"):
|
if msg.get("chunks"):
|
||||||
message_data["chunks"] = msg["chunks"]
|
message_data["chunks"] = msg["chunks"]
|
||||||
if msg.get("response_data"):
|
if msg.get("response_data"):
|
||||||
message_data["response_data"] = msg["response_data"]
|
message_data["response_data"] = msg["response_data"]
|
||||||
|
|
||||||
messages.append(message_data)
|
messages.append(message_data)
|
||||||
|
|
||||||
if messages: # Only include conversations with actual messages
|
if messages: # Only include conversations with actual messages
|
||||||
|
|
@ -334,25 +339,27 @@ class ChatService:
|
||||||
"previous_response_id"
|
"previous_response_id"
|
||||||
),
|
),
|
||||||
"total_messages": len(messages),
|
"total_messages": len(messages),
|
||||||
"source": "in_memory"
|
"source": "in_memory",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Then, add any persistent metadata that doesn't have in-memory data
|
# Then, add any persistent metadata that doesn't have in-memory data
|
||||||
for response_id, metadata in conversations_dict.items():
|
for response_id, metadata in conversations_dict.items():
|
||||||
if response_id not in in_memory_conversations:
|
if response_id not in in_memory_conversations:
|
||||||
# This is metadata-only conversation (no function calls)
|
# This is metadata-only conversation (no function calls)
|
||||||
conversations.append({
|
conversations.append(
|
||||||
"response_id": response_id,
|
{
|
||||||
"title": metadata.get("title", "New Chat"),
|
"response_id": response_id,
|
||||||
"endpoint": "chat",
|
"title": metadata.get("title", "New Chat"),
|
||||||
"messages": [], # No messages in metadata-only
|
"endpoint": "chat",
|
||||||
"created_at": metadata.get("created_at"),
|
"messages": [], # No messages in metadata-only
|
||||||
"last_activity": metadata.get("last_activity"),
|
"created_at": metadata.get("created_at"),
|
||||||
"previous_response_id": metadata.get("previous_response_id"),
|
"last_activity": metadata.get("last_activity"),
|
||||||
"total_messages": metadata.get("total_messages", 0),
|
"previous_response_id": metadata.get("previous_response_id"),
|
||||||
"source": "metadata_only"
|
"total_messages": metadata.get("total_messages", 0),
|
||||||
})
|
"source": "metadata_only",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Sort by last activity (most recent first)
|
# Sort by last activity (most recent first)
|
||||||
conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True)
|
conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True)
|
||||||
|
|
@ -368,7 +375,7 @@ class ChatService:
|
||||||
"""Get langflow conversation history for a user - now fetches from both OpenRAG memory and Langflow database"""
|
"""Get langflow conversation history for a user - now fetches from both OpenRAG memory and Langflow database"""
|
||||||
from agent import get_user_conversations
|
from agent import get_user_conversations
|
||||||
from services.langflow_history_service import langflow_history_service
|
from services.langflow_history_service import langflow_history_service
|
||||||
|
|
||||||
if not user_id:
|
if not user_id:
|
||||||
return {"error": "User ID is required", "conversations": []}
|
return {"error": "User ID is required", "conversations": []}
|
||||||
|
|
||||||
|
|
@ -378,27 +385,27 @@ class ChatService:
|
||||||
# 1. Get local conversation metadata (no actual messages stored here)
|
# 1. Get local conversation metadata (no actual messages stored here)
|
||||||
conversations_dict = get_user_conversations(user_id)
|
conversations_dict = get_user_conversations(user_id)
|
||||||
local_metadata = {}
|
local_metadata = {}
|
||||||
|
|
||||||
for response_id, conversation_metadata in conversations_dict.items():
|
for response_id, conversation_metadata in conversations_dict.items():
|
||||||
# Store metadata for later use with Langflow data
|
# Store metadata for later use with Langflow data
|
||||||
local_metadata[response_id] = conversation_metadata
|
local_metadata[response_id] = conversation_metadata
|
||||||
|
|
||||||
# 2. Get actual conversations from Langflow database (source of truth for messages)
|
# 2. Get actual conversations from Langflow database (source of truth for messages)
|
||||||
print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}")
|
print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}")
|
||||||
langflow_history = (
|
langflow_history = (
|
||||||
await langflow_history_service.get_user_conversation_history(
|
await langflow_history_service.get_user_conversation_history(
|
||||||
user_id, flow_id=FLOW_ID
|
user_id, flow_id=LANGFLOW_CHAT_FLOW_ID
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if langflow_history.get("conversations"):
|
if langflow_history.get("conversations"):
|
||||||
for conversation in langflow_history["conversations"]:
|
for conversation in langflow_history["conversations"]:
|
||||||
session_id = conversation["session_id"]
|
session_id = conversation["session_id"]
|
||||||
|
|
||||||
# Only process sessions that belong to this user (exist in local metadata)
|
# Only process sessions that belong to this user (exist in local metadata)
|
||||||
if session_id not in local_metadata:
|
if session_id not in local_metadata:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Use Langflow messages (with function calls) as source of truth
|
# Use Langflow messages (with function calls) as source of truth
|
||||||
messages = []
|
messages = []
|
||||||
for msg in conversation.get("messages", []):
|
for msg in conversation.get("messages", []):
|
||||||
|
|
@ -407,62 +414,73 @@ class ChatService:
|
||||||
"content": msg["content"],
|
"content": msg["content"],
|
||||||
"timestamp": msg.get("timestamp"),
|
"timestamp": msg.get("timestamp"),
|
||||||
"langflow_message_id": msg.get("langflow_message_id"),
|
"langflow_message_id": msg.get("langflow_message_id"),
|
||||||
"source": "langflow"
|
"source": "langflow",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Include function call data if present
|
# Include function call data if present
|
||||||
if msg.get("chunks"):
|
if msg.get("chunks"):
|
||||||
message_data["chunks"] = msg["chunks"]
|
message_data["chunks"] = msg["chunks"]
|
||||||
if msg.get("response_data"):
|
if msg.get("response_data"):
|
||||||
message_data["response_data"] = msg["response_data"]
|
message_data["response_data"] = msg["response_data"]
|
||||||
|
|
||||||
messages.append(message_data)
|
messages.append(message_data)
|
||||||
|
|
||||||
if messages:
|
if messages:
|
||||||
# Use local metadata if available, otherwise generate from Langflow data
|
# Use local metadata if available, otherwise generate from Langflow data
|
||||||
metadata = local_metadata.get(session_id, {})
|
metadata = local_metadata.get(session_id, {})
|
||||||
|
|
||||||
if not metadata.get("title"):
|
if not metadata.get("title"):
|
||||||
first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None)
|
first_user_msg = next(
|
||||||
|
(msg for msg in messages if msg["role"] == "user"), None
|
||||||
|
)
|
||||||
title = (
|
title = (
|
||||||
first_user_msg["content"][:50] + "..."
|
first_user_msg["content"][:50] + "..."
|
||||||
if first_user_msg and len(first_user_msg["content"]) > 50
|
if first_user_msg
|
||||||
|
and len(first_user_msg["content"]) > 50
|
||||||
else first_user_msg["content"]
|
else first_user_msg["content"]
|
||||||
if first_user_msg
|
if first_user_msg
|
||||||
else "Langflow chat"
|
else "Langflow chat"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
title = metadata["title"]
|
title = metadata["title"]
|
||||||
|
|
||||||
all_conversations.append({
|
all_conversations.append(
|
||||||
"response_id": session_id,
|
{
|
||||||
"title": title,
|
"response_id": session_id,
|
||||||
"endpoint": "langflow",
|
"title": title,
|
||||||
"messages": messages, # Function calls preserved from Langflow
|
"endpoint": "langflow",
|
||||||
"created_at": metadata.get("created_at") or conversation.get("created_at"),
|
"messages": messages, # Function calls preserved from Langflow
|
||||||
"last_activity": metadata.get("last_activity") or conversation.get("last_activity"),
|
"created_at": metadata.get("created_at")
|
||||||
"total_messages": len(messages),
|
or conversation.get("created_at"),
|
||||||
"source": "langflow_enhanced",
|
"last_activity": metadata.get("last_activity")
|
||||||
"langflow_session_id": session_id,
|
or conversation.get("last_activity"),
|
||||||
"langflow_flow_id": conversation.get("flow_id")
|
"total_messages": len(messages),
|
||||||
})
|
"source": "langflow_enhanced",
|
||||||
|
"langflow_session_id": session_id,
|
||||||
|
"langflow_flow_id": conversation.get("flow_id"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# 3. Add any local metadata that doesn't have Langflow data yet (recent conversations)
|
# 3. Add any local metadata that doesn't have Langflow data yet (recent conversations)
|
||||||
for response_id, metadata in local_metadata.items():
|
for response_id, metadata in local_metadata.items():
|
||||||
if not any(c["response_id"] == response_id for c in all_conversations):
|
if not any(c["response_id"] == response_id for c in all_conversations):
|
||||||
all_conversations.append({
|
all_conversations.append(
|
||||||
"response_id": response_id,
|
{
|
||||||
"title": metadata.get("title", "New Chat"),
|
"response_id": response_id,
|
||||||
"endpoint": "langflow",
|
"title": metadata.get("title", "New Chat"),
|
||||||
"messages": [], # Will be filled when Langflow sync catches up
|
"endpoint": "langflow",
|
||||||
"created_at": metadata.get("created_at"),
|
"messages": [], # Will be filled when Langflow sync catches up
|
||||||
"last_activity": metadata.get("last_activity"),
|
"created_at": metadata.get("created_at"),
|
||||||
"total_messages": metadata.get("total_messages", 0),
|
"last_activity": metadata.get("last_activity"),
|
||||||
"source": "metadata_only"
|
"total_messages": metadata.get("total_messages", 0),
|
||||||
})
|
"source": "metadata_only",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if langflow_history.get("conversations"):
|
if langflow_history.get("conversations"):
|
||||||
print(f"[DEBUG] Added {len(langflow_history['conversations'])} historical conversations from Langflow")
|
print(
|
||||||
|
f"[DEBUG] Added {len(langflow_history['conversations'])} historical conversations from Langflow"
|
||||||
|
)
|
||||||
elif langflow_history.get("error"):
|
elif langflow_history.get("error"):
|
||||||
print(
|
print(
|
||||||
f"[DEBUG] Could not fetch Langflow history for user {user_id}: {langflow_history['error']}"
|
f"[DEBUG] Could not fetch Langflow history for user {user_id}: {langflow_history['error']}"
|
||||||
|
|
@ -473,12 +491,14 @@ class ChatService:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[ERROR] Failed to fetch Langflow history: {e}")
|
print(f"[ERROR] Failed to fetch Langflow history: {e}")
|
||||||
# Continue with just in-memory conversations
|
# Continue with just in-memory conversations
|
||||||
|
|
||||||
# Sort by last activity (most recent first)
|
# Sort by last activity (most recent first)
|
||||||
all_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True)
|
all_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True)
|
||||||
|
|
||||||
print(f"[DEBUG] Returning {len(all_conversations)} conversations ({len(local_metadata)} from local metadata)")
|
print(
|
||||||
|
f"[DEBUG] Returning {len(all_conversations)} conversations ({len(local_metadata)} from local metadata)"
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
"endpoint": "langflow",
|
"endpoint": "langflow",
|
||||||
|
|
|
||||||
157
src/services/langflow_file_service.py
Normal file
157
src/services/langflow_file_service.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from config.settings import LANGFLOW_INGEST_FLOW_ID, clients
|
||||||
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class LangflowFileService:
|
||||||
|
def __init__(self):
|
||||||
|
self.flow_id_ingest = LANGFLOW_INGEST_FLOW_ID
|
||||||
|
|
||||||
|
async def upload_user_file(
|
||||||
|
self, file_tuple, jwt_token: Optional[str] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Upload a file using Langflow Files API v2: POST /api/v2/files.
|
||||||
|
Returns JSON with keys: id, name, path, size, provider.
|
||||||
|
"""
|
||||||
|
logger.debug("[LF] Upload (v2) -> /api/v2/files")
|
||||||
|
resp = await clients.langflow_request(
|
||||||
|
"POST",
|
||||||
|
"/api/v2/files",
|
||||||
|
files={"file": file_tuple},
|
||||||
|
headers={"Content-Type": None},
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
"[LF] Upload response",
|
||||||
|
status_code=resp.status_code,
|
||||||
|
reason=resp.reason_phrase,
|
||||||
|
)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
logger.error(
|
||||||
|
"[LF] Upload failed",
|
||||||
|
status_code=resp.status_code,
|
||||||
|
reason=resp.reason_phrase,
|
||||||
|
body=resp.text,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def delete_user_file(self, file_id: str) -> None:
|
||||||
|
"""Delete a file by id using v2: DELETE /api/v2/files/{id}."""
|
||||||
|
# NOTE: use v2 root, not /api/v1
|
||||||
|
logger.debug("[LF] Delete (v2) -> /api/v2/files/{id}", file_id=file_id)
|
||||||
|
resp = await clients.langflow_request("DELETE", f"/api/v2/files/{file_id}")
|
||||||
|
logger.debug(
|
||||||
|
"[LF] Delete response",
|
||||||
|
status_code=resp.status_code,
|
||||||
|
reason=resp.reason_phrase,
|
||||||
|
)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
logger.error(
|
||||||
|
"[LF] Delete failed",
|
||||||
|
status_code=resp.status_code,
|
||||||
|
reason=resp.reason_phrase,
|
||||||
|
body=resp.text[:500],
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
async def run_ingestion_flow(
|
||||||
|
self,
|
||||||
|
file_paths: List[str],
|
||||||
|
jwt_token: str,
|
||||||
|
session_id: Optional[str] = None,
|
||||||
|
tweaks: Optional[Dict[str, Any]] = None,
|
||||||
|
owner: Optional[str] = None,
|
||||||
|
owner_name: Optional[str] = None,
|
||||||
|
owner_email: Optional[str] = None,
|
||||||
|
connector_type: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Trigger the ingestion flow with provided file paths.
|
||||||
|
The flow must expose a File component path in input schema or accept files parameter.
|
||||||
|
"""
|
||||||
|
if not self.flow_id_ingest:
|
||||||
|
logger.error("[LF] LANGFLOW_INGEST_FLOW_ID is not configured")
|
||||||
|
raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured")
|
||||||
|
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"input_value": "Ingest files",
|
||||||
|
"input_type": "chat",
|
||||||
|
"output_type": "text", # Changed from "json" to "text"
|
||||||
|
}
|
||||||
|
if not tweaks:
|
||||||
|
tweaks = {}
|
||||||
|
|
||||||
|
# Pass files via tweaks to File component (File-PSU37 from the flow)
|
||||||
|
if file_paths:
|
||||||
|
tweaks["File-PSU37"] = {"path": file_paths}
|
||||||
|
|
||||||
|
# Pass JWT token via tweaks using the x-langflow-global-var- pattern
|
||||||
|
if jwt_token:
|
||||||
|
# Using the global variable pattern that Langflow expects for OpenSearch components
|
||||||
|
tweaks["OpenSearchHybrid-Ve6bS"] = {"jwt_token": jwt_token}
|
||||||
|
logger.debug("[LF] Added JWT token to tweaks for OpenSearch components")
|
||||||
|
else:
|
||||||
|
logger.warning("[LF] No JWT token provided")
|
||||||
|
|
||||||
|
# Pass metadata via tweaks to OpenSearch component
|
||||||
|
metadata_tweaks = []
|
||||||
|
if owner:
|
||||||
|
metadata_tweaks.append({"key": "owner", "value": owner})
|
||||||
|
if owner_name:
|
||||||
|
metadata_tweaks.append({"key": "owner_name", "value": owner_name})
|
||||||
|
if owner_email:
|
||||||
|
metadata_tweaks.append({"key": "owner_email", "value": owner_email})
|
||||||
|
if connector_type:
|
||||||
|
metadata_tweaks.append({"key": "connector_type", "value": connector_type})
|
||||||
|
|
||||||
|
if metadata_tweaks:
|
||||||
|
# Initialize the OpenSearch component tweaks if not already present
|
||||||
|
if "OpenSearchHybrid-Ve6bS" not in tweaks:
|
||||||
|
tweaks["OpenSearchHybrid-Ve6bS"] = {}
|
||||||
|
tweaks["OpenSearchHybrid-Ve6bS"]["docs_metadata"] = metadata_tweaks
|
||||||
|
logger.debug(
|
||||||
|
"[LF] Added metadata to tweaks", metadata_count=len(metadata_tweaks)
|
||||||
|
)
|
||||||
|
if tweaks:
|
||||||
|
payload["tweaks"] = tweaks
|
||||||
|
if session_id:
|
||||||
|
payload["session_id"] = session_id
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"[LF] Run ingestion -> /run/%s | files=%s session_id=%s tweaks_keys=%s jwt_present=%s",
|
||||||
|
self.flow_id_ingest,
|
||||||
|
len(file_paths) if file_paths else 0,
|
||||||
|
session_id,
|
||||||
|
list(tweaks.keys()) if isinstance(tweaks, dict) else None,
|
||||||
|
bool(jwt_token),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Avoid logging full payload to prevent leaking sensitive data (e.g., JWT)
|
||||||
|
|
||||||
|
resp = await clients.langflow_request(
|
||||||
|
"POST", f"/api/v1/run/{self.flow_id_ingest}", json=payload
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
"[LF] Run response", status_code=resp.status_code, reason=resp.reason_phrase
|
||||||
|
)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
logger.error(
|
||||||
|
"[LF] Run failed",
|
||||||
|
status_code=resp.status_code,
|
||||||
|
reason=resp.reason_phrase,
|
||||||
|
body=resp.text[:1000],
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
try:
|
||||||
|
resp_json = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"[LF] Failed to parse run response as JSON",
|
||||||
|
body=resp.text[:1000],
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
return resp_json
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict
|
||||||
from agentd.tool_decorator import tool
|
from agentd.tool_decorator import tool
|
||||||
from config.settings import clients, INDEX_NAME, EMBED_MODEL
|
from config.settings import clients, INDEX_NAME, EMBED_MODEL
|
||||||
from auth_context import get_auth_context
|
from auth_context import get_auth_context
|
||||||
|
|
@ -166,11 +166,11 @@ class SearchService:
|
||||||
for hit in results["hits"]["hits"]:
|
for hit in results["hits"]["hits"]:
|
||||||
chunks.append(
|
chunks.append(
|
||||||
{
|
{
|
||||||
"filename": hit["_source"]["filename"],
|
"filename": hit["_source"].get("filename"),
|
||||||
"mimetype": hit["_source"]["mimetype"],
|
"mimetype": hit["_source"].get("mimetype"),
|
||||||
"page": hit["_source"]["page"],
|
"page": hit["_source"].get("page"),
|
||||||
"text": hit["_source"]["text"],
|
"text": hit["_source"].get("text"),
|
||||||
"score": hit["_score"],
|
"score": hit.get("_score"),
|
||||||
"source_url": hit["_source"].get("source_url"),
|
"source_url": hit["_source"].get("source_url"),
|
||||||
"owner": hit["_source"].get("owner"),
|
"owner": hit["_source"].get("owner"),
|
||||||
"owner_name": hit["_source"].get("owner_name"),
|
"owner_name": hit["_source"].get("owner_name"),
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,11 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import uuid
|
|
||||||
import time
|
|
||||||
import random
|
import random
|
||||||
from typing import Dict, Optional
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
from models.tasks import TaskStatus, UploadTask, FileTask
|
from models.tasks import FileTask, TaskStatus, UploadTask
|
||||||
from utils.gpu_detection import get_worker_count
|
|
||||||
from session_manager import AnonymousUser
|
from session_manager import AnonymousUser
|
||||||
|
from utils.gpu_detection import get_worker_count
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
@ -16,9 +15,7 @@ class TaskService:
|
||||||
def __init__(self, document_service=None, process_pool=None):
|
def __init__(self, document_service=None, process_pool=None):
|
||||||
self.document_service = document_service
|
self.document_service = document_service
|
||||||
self.process_pool = process_pool
|
self.process_pool = process_pool
|
||||||
self.task_store: Dict[
|
self.task_store: dict[str, dict[str, UploadTask]] = {} # user_id -> {task_id -> UploadTask}
|
||||||
str, Dict[str, UploadTask]
|
|
||||||
] = {} # user_id -> {task_id -> UploadTask}
|
|
||||||
self.background_tasks = set()
|
self.background_tasks = set()
|
||||||
|
|
||||||
if self.process_pool is None:
|
if self.process_pool is None:
|
||||||
|
|
@ -69,9 +66,7 @@ class TaskService:
|
||||||
self.task_store[user_id][task_id] = upload_task
|
self.task_store[user_id][task_id] = upload_task
|
||||||
|
|
||||||
# Start background processing
|
# Start background processing
|
||||||
background_task = asyncio.create_task(
|
background_task = asyncio.create_task(self.background_custom_processor(user_id, task_id, items))
|
||||||
self.background_custom_processor(user_id, task_id, items)
|
|
||||||
)
|
|
||||||
self.background_tasks.add(background_task)
|
self.background_tasks.add(background_task)
|
||||||
background_task.add_done_callback(self.background_tasks.discard)
|
background_task.add_done_callback(self.background_tasks.discard)
|
||||||
|
|
||||||
|
|
@ -89,27 +84,18 @@ class TaskService:
|
||||||
|
|
||||||
# Process files with limited concurrency to avoid overwhelming the system
|
# Process files with limited concurrency to avoid overwhelming the system
|
||||||
max_workers = get_worker_count()
|
max_workers = get_worker_count()
|
||||||
semaphore = asyncio.Semaphore(
|
semaphore = asyncio.Semaphore(max_workers * 2) # Allow 2x process pool size for async I/O
|
||||||
max_workers * 2
|
|
||||||
) # Allow 2x process pool size for async I/O
|
|
||||||
|
|
||||||
async def process_with_semaphore(file_path: str):
|
async def process_with_semaphore(file_path: str):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
await self.document_service.process_single_file_task(
|
await self.document_service.process_single_file_task(upload_task, file_path)
|
||||||
upload_task, file_path
|
|
||||||
)
|
|
||||||
|
|
||||||
tasks = [
|
tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()]
|
||||||
process_with_semaphore(file_path)
|
|
||||||
for file_path in upload_task.file_tasks.keys()
|
|
||||||
]
|
|
||||||
|
|
||||||
await asyncio.gather(*tasks, return_exceptions=True)
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error("Background upload processor failed", task_id=task_id, error=str(e))
|
||||||
"Background upload processor failed", task_id=task_id, error=str(e)
|
|
||||||
)
|
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
@ -117,9 +103,7 @@ class TaskService:
|
||||||
self.task_store[user_id][task_id].status = TaskStatus.FAILED
|
self.task_store[user_id][task_id].status = TaskStatus.FAILED
|
||||||
self.task_store[user_id][task_id].updated_at = time.time()
|
self.task_store[user_id][task_id].updated_at = time.time()
|
||||||
|
|
||||||
async def background_custom_processor(
|
async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None:
|
||||||
self, user_id: str, task_id: str, items: list
|
|
||||||
) -> None:
|
|
||||||
"""Background task to process items using custom processor"""
|
"""Background task to process items using custom processor"""
|
||||||
try:
|
try:
|
||||||
upload_task = self.task_store[user_id][task_id]
|
upload_task = self.task_store[user_id][task_id]
|
||||||
|
|
@ -141,9 +125,7 @@ class TaskService:
|
||||||
try:
|
try:
|
||||||
await processor.process_item(upload_task, item, file_task)
|
await processor.process_item(upload_task, item, file_task)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error("Failed to process item", item=str(item), error=str(e))
|
||||||
"Failed to process item", item=str(item), error=str(e)
|
|
||||||
)
|
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
@ -170,9 +152,7 @@ class TaskService:
|
||||||
pass
|
pass
|
||||||
raise # Re-raise to properly handle cancellation
|
raise # Re-raise to properly handle cancellation
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error("Background custom processor failed", task_id=task_id, error=str(e))
|
||||||
"Background custom processor failed", task_id=task_id, error=str(e)
|
|
||||||
)
|
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
@ -180,7 +160,7 @@ class TaskService:
|
||||||
self.task_store[user_id][task_id].status = TaskStatus.FAILED
|
self.task_store[user_id][task_id].status = TaskStatus.FAILED
|
||||||
self.task_store[user_id][task_id].updated_at = time.time()
|
self.task_store[user_id][task_id].updated_at = time.time()
|
||||||
|
|
||||||
def get_task_status(self, user_id: str, task_id: str) -> Optional[dict]:
|
def get_task_status(self, user_id: str, task_id: str) -> dict | None:
|
||||||
"""Get the status of a specific upload task
|
"""Get the status of a specific upload task
|
||||||
|
|
||||||
Includes fallback to shared tasks stored under the "anonymous" user key
|
Includes fallback to shared tasks stored under the "anonymous" user key
|
||||||
|
|
@ -194,10 +174,7 @@ class TaskService:
|
||||||
|
|
||||||
upload_task = None
|
upload_task = None
|
||||||
for candidate_user_id in candidate_user_ids:
|
for candidate_user_id in candidate_user_ids:
|
||||||
if (
|
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
|
||||||
candidate_user_id in self.task_store
|
|
||||||
and task_id in self.task_store[candidate_user_id]
|
|
||||||
):
|
|
||||||
upload_task = self.task_store[candidate_user_id][task_id]
|
upload_task = self.task_store[candidate_user_id][task_id]
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
@ -271,10 +248,7 @@ class TaskService:
|
||||||
|
|
||||||
store_user_id = None
|
store_user_id = None
|
||||||
for candidate_user_id in candidate_user_ids:
|
for candidate_user_id in candidate_user_ids:
|
||||||
if (
|
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
|
||||||
candidate_user_id in self.task_store
|
|
||||||
and task_id in self.task_store[candidate_user_id]
|
|
||||||
):
|
|
||||||
store_user_id = candidate_user_id
|
store_user_id = candidate_user_id
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
@ -288,10 +262,7 @@ class TaskService:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Cancel the background task to stop scheduling new work
|
# Cancel the background task to stop scheduling new work
|
||||||
if (
|
if hasattr(upload_task, "background_task") and not upload_task.background_task.done():
|
||||||
hasattr(upload_task, "background_task")
|
|
||||||
and not upload_task.background_task.done()
|
|
||||||
):
|
|
||||||
upload_task.background_task.cancel()
|
upload_task.background_task.cancel()
|
||||||
|
|
||||||
# Mark task as failed (cancelled)
|
# Mark task as failed (cancelled)
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,23 @@
|
||||||
"""Environment configuration manager for OpenRAG TUI."""
|
"""Environment configuration manager for OpenRAG TUI."""
|
||||||
|
|
||||||
import os
|
|
||||||
import secrets
|
import secrets
|
||||||
import string
|
import string
|
||||||
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Optional, List
|
from typing import Dict, List, Optional
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
from ..utils.validation import (
|
from ..utils.validation import (
|
||||||
validate_openai_api_key,
|
sanitize_env_value,
|
||||||
|
validate_documents_paths,
|
||||||
validate_google_oauth_client_id,
|
validate_google_oauth_client_id,
|
||||||
validate_non_empty,
|
validate_non_empty,
|
||||||
|
validate_openai_api_key,
|
||||||
validate_url,
|
validate_url,
|
||||||
validate_documents_paths,
|
|
||||||
sanitize_env_value,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -31,7 +31,8 @@ class EnvConfig:
|
||||||
langflow_secret_key: str = ""
|
langflow_secret_key: str = ""
|
||||||
langflow_superuser: str = "admin"
|
langflow_superuser: str = "admin"
|
||||||
langflow_superuser_password: str = ""
|
langflow_superuser_password: str = ""
|
||||||
flow_id: str = "1098eea1-6649-4e1d-aed1-b77249fb8dd0"
|
langflow_chat_flow_id: str = "1098eea1-6649-4e1d-aed1-b77249fb8dd0"
|
||||||
|
langflow_ingest_flow_id: str = "5488df7c-b93f-4f87-a446-b67028bc0813"
|
||||||
|
|
||||||
# OAuth settings
|
# OAuth settings
|
||||||
google_oauth_client_id: str = ""
|
google_oauth_client_id: str = ""
|
||||||
|
|
@ -98,7 +99,8 @@ class EnvManager:
|
||||||
"LANGFLOW_SECRET_KEY": "langflow_secret_key",
|
"LANGFLOW_SECRET_KEY": "langflow_secret_key",
|
||||||
"LANGFLOW_SUPERUSER": "langflow_superuser",
|
"LANGFLOW_SUPERUSER": "langflow_superuser",
|
||||||
"LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password",
|
"LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password",
|
||||||
"FLOW_ID": "flow_id",
|
"LANGFLOW_CHAT_FLOW_ID": "langflow_chat_flow_id",
|
||||||
|
"LANGFLOW_INGEST_FLOW_ID": "langflow_ingest_flow_id",
|
||||||
"NUDGES_FLOW_ID": "nudges_flow_id",
|
"NUDGES_FLOW_ID": "nudges_flow_id",
|
||||||
"GOOGLE_OAUTH_CLIENT_ID": "google_oauth_client_id",
|
"GOOGLE_OAUTH_CLIENT_ID": "google_oauth_client_id",
|
||||||
"GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret",
|
"GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret",
|
||||||
|
|
@ -235,7 +237,10 @@ class EnvManager:
|
||||||
f.write(
|
f.write(
|
||||||
f"LANGFLOW_SUPERUSER_PASSWORD={self.config.langflow_superuser_password}\n"
|
f"LANGFLOW_SUPERUSER_PASSWORD={self.config.langflow_superuser_password}\n"
|
||||||
)
|
)
|
||||||
f.write(f"FLOW_ID={self.config.flow_id}\n")
|
f.write(f"LANGFLOW_CHAT_FLOW_ID={self.config.langflow_chat_flow_id}\n")
|
||||||
|
f.write(
|
||||||
|
f"LANGFLOW_INGEST_FLOW_ID={self.config.langflow_ingest_flow_id}\n"
|
||||||
|
)
|
||||||
f.write(f"NUDGES_FLOW_ID={self.config.nudges_flow_id}\n")
|
f.write(f"NUDGES_FLOW_ID={self.config.nudges_flow_id}\n")
|
||||||
f.write(f"OPENSEARCH_PASSWORD={self.config.opensearch_password}\n")
|
f.write(f"OPENSEARCH_PASSWORD={self.config.opensearch_password}\n")
|
||||||
f.write(f"OPENAI_API_KEY={self.config.openai_api_key}\n")
|
f.write(f"OPENAI_API_KEY={self.config.openai_api_key}\n")
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@ class DiagnosticsScreen(Screen):
|
||||||
yield Button("Refresh", variant="primary", id="refresh-btn")
|
yield Button("Refresh", variant="primary", id="refresh-btn")
|
||||||
yield Button("Check Podman", variant="default", id="check-podman-btn")
|
yield Button("Check Podman", variant="default", id="check-podman-btn")
|
||||||
yield Button("Check Docker", variant="default", id="check-docker-btn")
|
yield Button("Check Docker", variant="default", id="check-docker-btn")
|
||||||
|
yield Button("Check OpenSearch Security", variant="default", id="check-opensearch-security-btn")
|
||||||
yield Button("Copy to Clipboard", variant="default", id="copy-btn")
|
yield Button("Copy to Clipboard", variant="default", id="copy-btn")
|
||||||
yield Button("Save to File", variant="default", id="save-btn")
|
yield Button("Save to File", variant="default", id="save-btn")
|
||||||
yield Button("Back", variant="default", id="back-btn")
|
yield Button("Back", variant="default", id="back-btn")
|
||||||
|
|
@ -92,6 +93,8 @@ class DiagnosticsScreen(Screen):
|
||||||
asyncio.create_task(self.check_podman())
|
asyncio.create_task(self.check_podman())
|
||||||
elif event.button.id == "check-docker-btn":
|
elif event.button.id == "check-docker-btn":
|
||||||
asyncio.create_task(self.check_docker())
|
asyncio.create_task(self.check_docker())
|
||||||
|
elif event.button.id == "check-opensearch-security-btn":
|
||||||
|
asyncio.create_task(self.check_opensearch_security())
|
||||||
elif event.button.id == "copy-btn":
|
elif event.button.id == "copy-btn":
|
||||||
self.copy_to_clipboard()
|
self.copy_to_clipboard()
|
||||||
elif event.button.id == "save-btn":
|
elif event.button.id == "save-btn":
|
||||||
|
|
@ -415,5 +418,208 @@ class DiagnosticsScreen(Screen):
|
||||||
|
|
||||||
log.write("")
|
log.write("")
|
||||||
|
|
||||||
|
async def check_opensearch_security(self) -> None:
|
||||||
|
"""Run OpenSearch security configuration diagnostics."""
|
||||||
|
log = self.query_one("#diagnostics-log", Log)
|
||||||
|
log.write("[bold green]OpenSearch Security Diagnostics[/bold green]")
|
||||||
|
|
||||||
|
# Get OpenSearch password from environment or prompt user that it's needed
|
||||||
|
opensearch_password = os.getenv("OPENSEARCH_PASSWORD")
|
||||||
|
if not opensearch_password:
|
||||||
|
log.write("[red]OPENSEARCH_PASSWORD environment variable not set[/red]")
|
||||||
|
log.write("[yellow]Set OPENSEARCH_PASSWORD to test security configuration[/yellow]")
|
||||||
|
log.write("")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Test basic authentication
|
||||||
|
log.write("Testing basic authentication...")
|
||||||
|
cmd = [
|
||||||
|
"curl", "-s", "-k", "-w", "%{http_code}",
|
||||||
|
"-u", f"admin:{opensearch_password}",
|
||||||
|
"https://localhost:9200"
|
||||||
|
]
|
||||||
|
process = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||||
|
)
|
||||||
|
stdout, stderr = await process.communicate()
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
|
response = stdout.decode().strip()
|
||||||
|
# Extract HTTP status code (last 3 characters)
|
||||||
|
if len(response) >= 3:
|
||||||
|
status_code = response[-3:]
|
||||||
|
response_body = response[:-3]
|
||||||
|
if status_code == "200":
|
||||||
|
log.write("[green]✓ Basic authentication successful[/green]")
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
info = json.loads(response_body)
|
||||||
|
if "version" in info and "distribution" in info["version"]:
|
||||||
|
log.write(f" OpenSearch version: {info['version']['number']}")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Basic authentication failed with status {status_code}[/red]")
|
||||||
|
else:
|
||||||
|
log.write("[red]✗ Unexpected response from OpenSearch[/red]")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Failed to connect to OpenSearch: {stderr.decode().strip()}[/red]")
|
||||||
|
|
||||||
|
# Test security plugin account info
|
||||||
|
log.write("Testing security plugin account info...")
|
||||||
|
cmd = [
|
||||||
|
"curl", "-s", "-k", "-w", "%{http_code}",
|
||||||
|
"-u", f"admin:{opensearch_password}",
|
||||||
|
"https://localhost:9200/_plugins/_security/api/account"
|
||||||
|
]
|
||||||
|
process = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||||
|
)
|
||||||
|
stdout, stderr = await process.communicate()
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
|
response = stdout.decode().strip()
|
||||||
|
if len(response) >= 3:
|
||||||
|
status_code = response[-3:]
|
||||||
|
response_body = response[:-3]
|
||||||
|
if status_code == "200":
|
||||||
|
log.write("[green]✓ Security plugin accessible[/green]")
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
user_info = json.loads(response_body)
|
||||||
|
if "user_name" in user_info:
|
||||||
|
log.write(f" Current user: {user_info['user_name']}")
|
||||||
|
if "roles" in user_info:
|
||||||
|
log.write(f" Roles: {', '.join(user_info['roles'])}")
|
||||||
|
if "tenants" in user_info:
|
||||||
|
tenants = list(user_info['tenants'].keys())
|
||||||
|
log.write(f" Tenants: {', '.join(tenants)}")
|
||||||
|
except:
|
||||||
|
log.write(" Account info retrieved but couldn't parse JSON")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Security plugin returned status {status_code}[/red]")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Failed to access security plugin: {stderr.decode().strip()}[/red]")
|
||||||
|
|
||||||
|
# Test internal users
|
||||||
|
log.write("Testing internal users configuration...")
|
||||||
|
cmd = [
|
||||||
|
"curl", "-s", "-k", "-w", "%{http_code}",
|
||||||
|
"-u", f"admin:{opensearch_password}",
|
||||||
|
"https://localhost:9200/_plugins/_security/api/internalusers"
|
||||||
|
]
|
||||||
|
process = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||||
|
)
|
||||||
|
stdout, stderr = await process.communicate()
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
|
response = stdout.decode().strip()
|
||||||
|
if len(response) >= 3:
|
||||||
|
status_code = response[-3:]
|
||||||
|
response_body = response[:-3]
|
||||||
|
if status_code == "200":
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
users = json.loads(response_body)
|
||||||
|
if "admin" in users:
|
||||||
|
log.write("[green]✓ Admin user configured[/green]")
|
||||||
|
admin_user = users["admin"]
|
||||||
|
if admin_user.get("reserved"):
|
||||||
|
log.write(" Admin user is reserved (protected)")
|
||||||
|
log.write(f" Total internal users: {len(users)}")
|
||||||
|
except:
|
||||||
|
log.write("[green]✓ Internal users endpoint accessible[/green]")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Internal users returned status {status_code}[/red]")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Failed to access internal users: {stderr.decode().strip()}[/red]")
|
||||||
|
|
||||||
|
# Test authentication domains configuration
|
||||||
|
log.write("Testing authentication configuration...")
|
||||||
|
cmd = [
|
||||||
|
"curl", "-s", "-k", "-w", "%{http_code}",
|
||||||
|
"-u", f"admin:{opensearch_password}",
|
||||||
|
"https://localhost:9200/_plugins/_security/api/securityconfig"
|
||||||
|
]
|
||||||
|
process = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||||
|
)
|
||||||
|
stdout, stderr = await process.communicate()
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
|
response = stdout.decode().strip()
|
||||||
|
if len(response) >= 3:
|
||||||
|
status_code = response[-3:]
|
||||||
|
response_body = response[:-3]
|
||||||
|
if status_code == "200":
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
config = json.loads(response_body)
|
||||||
|
if "config" in config and "dynamic" in config["config"] and "authc" in config["config"]["dynamic"]:
|
||||||
|
authc = config["config"]["dynamic"]["authc"]
|
||||||
|
if "openid_auth_domain" in authc:
|
||||||
|
log.write("[green]✓ OpenID Connect authentication domain configured[/green]")
|
||||||
|
oidc_config = authc["openid_auth_domain"].get("http_authenticator", {}).get("config", {})
|
||||||
|
if "openid_connect_url" in oidc_config:
|
||||||
|
log.write(f" OIDC URL: {oidc_config['openid_connect_url']}")
|
||||||
|
if "subject_key" in oidc_config:
|
||||||
|
log.write(f" Subject key: {oidc_config['subject_key']}")
|
||||||
|
if "basic_internal_auth_domain" in authc:
|
||||||
|
log.write("[green]✓ Basic internal authentication domain configured[/green]")
|
||||||
|
|
||||||
|
# Check for multi-tenancy
|
||||||
|
if "kibana" in config["config"]["dynamic"]:
|
||||||
|
kibana_config = config["config"]["dynamic"]["kibana"]
|
||||||
|
if kibana_config.get("multitenancy_enabled"):
|
||||||
|
log.write("[green]✓ Multi-tenancy enabled[/green]")
|
||||||
|
else:
|
||||||
|
log.write("[yellow]⚠ Authentication configuration not found in expected format[/yellow]")
|
||||||
|
except Exception as e:
|
||||||
|
log.write("[green]✓ Security config endpoint accessible[/green]")
|
||||||
|
log.write(f" (Could not parse JSON: {str(e)[:50]}...)")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Security config returned status {status_code}[/red]")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Failed to access security config: {stderr.decode().strip()}[/red]")
|
||||||
|
|
||||||
|
# Test indices with potential security filtering
|
||||||
|
log.write("Testing index access...")
|
||||||
|
cmd = [
|
||||||
|
"curl", "-s", "-k", "-w", "%{http_code}",
|
||||||
|
"-u", f"admin:{opensearch_password}",
|
||||||
|
"https://localhost:9200/_cat/indices?v"
|
||||||
|
]
|
||||||
|
process = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||||
|
)
|
||||||
|
stdout, stderr = await process.communicate()
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
|
response = stdout.decode().strip()
|
||||||
|
if len(response) >= 3:
|
||||||
|
status_code = response[-3:]
|
||||||
|
response_body = response[:-3]
|
||||||
|
if status_code == "200":
|
||||||
|
log.write("[green]✓ Index listing accessible[/green]")
|
||||||
|
lines = response_body.strip().split('\n')
|
||||||
|
if len(lines) > 1: # Skip header
|
||||||
|
indices_found = []
|
||||||
|
for line in lines[1:]:
|
||||||
|
if 'documents' in line:
|
||||||
|
indices_found.append('documents')
|
||||||
|
elif 'knowledge_filters' in line:
|
||||||
|
indices_found.append('knowledge_filters')
|
||||||
|
elif '.opendistro_security' in line:
|
||||||
|
indices_found.append('.opendistro_security')
|
||||||
|
if indices_found:
|
||||||
|
log.write(f" Key indices found: {', '.join(indices_found)}")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Index listing returned status {status_code}[/red]")
|
||||||
|
else:
|
||||||
|
log.write(f"[red]✗ Failed to list indices: {stderr.decode().strip()}[/red]")
|
||||||
|
|
||||||
|
log.write("")
|
||||||
|
|
||||||
|
|
||||||
# Made with Bob
|
# Made with Bob
|
||||||
|
|
|
||||||
4
uv.lock
generated
4
uv.lock
generated
|
|
@ -1,5 +1,5 @@
|
||||||
version = 1
|
version = 1
|
||||||
revision = 2
|
revision = 3
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"sys_platform == 'darwin'",
|
"sys_platform == 'darwin'",
|
||||||
|
|
@ -1405,7 +1405,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openrag"
|
name = "openrag"
|
||||||
version = "0.1.0"
|
version = "0.1.1"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "agentd" },
|
{ name = "agentd" },
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue