Merge branch 'main' into fix-main

This commit is contained in:
Sebastián Estévez 2025-10-08 23:15:32 -04:00 committed by GitHub
commit 101c83ad36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
77 changed files with 2225 additions and 793 deletions

BIN
.DS_Store vendored

Binary file not shown.

View file

@ -37,6 +37,9 @@ AWS_SECRET_ACCESS_KEY=
# OPTIONAL url for openrag link to langflow in the UI
LANGFLOW_PUBLIC_URL=
# OPTIONAL: Override host for docling service (for special networking setups)
# HOST_DOCKER_INTERNAL=host.containers.internal
# Langflow auth
LANGFLOW_AUTO_LOGIN=False
LANGFLOW_SUPERUSER=

View file

@ -1,59 +0,0 @@
name: Build Langflow Responses Multi-Arch
on:
workflow_dispatch:
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
arch: amd64
runs-on: ubuntu-latest
- platform: linux/arm64
arch: arm64
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
runs-on: ${{ matrix.runs-on }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push langflow (${{ matrix.arch }})
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.langflow
platforms: ${{ matrix.platform }}
push: true
tags: phact/langflow:responses-${{ matrix.arch }}
cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
manifest:
needs: build
runs-on: ubuntu-latest
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Create and push multi-arch manifest
run: |
docker buildx imagetools create -t phact/langflow:responses \
phact/langflow:responses-amd64 \
phact/langflow:responses-arm64

View file

@ -1,16 +1,95 @@
name: Build Multi-Architecture Docker Images
name: Release + Docker Images (multi-arch)
on:
push:
branches:
- main
paths:
- 'pyproject.toml'
workflow_dispatch:
inputs:
update_latest:
description: 'Update latest tags (production release)'
required: false
default: false
type: boolean
jobs:
build-python-packages:
runs-on: ubuntu-latest
outputs:
skip_release: ${{ steps.version.outputs.skip_release }}
version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
# Check if tag already exists
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping release"
echo "skip_release=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "skip_release=false" >> $GITHUB_OUTPUT
# Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "is_prerelease=false" >> $GITHUB_OUTPUT
echo "Release type: Production"
else
echo "is_prerelease=true" >> $GITHUB_OUTPUT
echo "Release type: Prerelease"
fi
- name: Build wheel and source distribution
if: steps.version.outputs.skip_release != 'true'
run: |
uv build
- name: List built artifacts
if: steps.version.outputs.skip_release != 'true'
run: |
ls -la dist/
echo "Built artifacts:"
for file in dist/*; do
echo " - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
done
- name: Upload build artifacts
if: steps.version.outputs.skip_release != 'true'
uses: actions/upload-artifact@v4
with:
name: python-packages
path: dist/
retention-days: 30
- name: Create Release
if: steps.version.outputs.skip_release != 'true'
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.version.outputs.version }}
name: Release ${{ steps.version.outputs.version }}
draft: false
prerelease: ${{ steps.version.outputs.is_prerelease }}
generate_release_notes: true
files: |
dist/*.whl
dist/*.tar.gz
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build:
needs: build-python-packages
if: needs.build-python-packages.outputs.skip_release != 'true'
strategy:
fail-fast: false
matrix:
@ -106,9 +185,9 @@ jobs:
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
manifest:
needs: build
needs: [build, build-python-packages]
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
steps:
- name: Checkout
uses: actions/checkout@v4
@ -146,8 +225,8 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
# Only update latest tags if version is numeric AND checkbox is checked
if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
# Only update latest tags if version is numeric
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "Updating latest tags for production release: $VERSION"
docker buildx imagetools create -t phact/openrag-backend:latest \
phact/openrag-backend:$VERSION-amd64 \
@ -165,5 +244,5 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
else
echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
echo "Skipping latest tags - version: $VERSION (not numeric)"
fi

54
.github/workflows/test-integration.yml vendored Normal file
View file

@ -0,0 +1,54 @@
name: Integration Tests
on:
pull_request:
push:
branches:
- main
jobs:
tests:
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb]
env:
# Prefer repository/environment variable first, then secret, then a sane fallback
OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
steps:
- run: df -h
#- name: "node-cleanup"
#run: |
# sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
# sudo docker image prune --all --force
# sudo docker builder prune -a
- run: df -h
- name: Checkout
uses: actions/checkout@v4
- name: Set up UV
uses: astral-sh/setup-uv@v3
with:
version: latest
- name: Python version
run: uv python install 3.13
- name: Install dependencies
run: uv sync
- name: Run integration tests
env:
OPENSEARCH_HOST: localhost
OPENSEARCH_PORT: 9200
OPENSEARCH_USERNAME: admin
OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }}
LOG_LEVEL: DEBUG
# Force no-auth mode so tests bypass OAuth
GOOGLE_OAUTH_CLIENT_ID: ""
GOOGLE_OAUTH_CLIENT_SECRET: ""
# Disable startup ingest noise unless a test enables it
DISABLE_STARTUP_INGEST: "true"
run: |
make test-ci
echo "Keys directory after tests:"
ls -la keys/ || echo "No keys directory"

2
.gitignore vendored
View file

@ -18,6 +18,8 @@ wheels/
1001*.pdf
*.json
!flows/*.json
!src/tui/_assets/flows/*.json
!src/tui/_assets/flows/components/*.json
.DS_Store
config/

View file

@ -11,20 +11,48 @@ Thank you for your interest in contributing to OpenRAG! This guide will help you
- Python 3.13+ with uv package manager
- Node.js 18+ and npm
### Environment Setup
### Set up OpenRAG for development
1. Set up your development environment.
```bash
# Clone the repository
git clone <repository-url>
# Clone and setup environment
git clone https://github.com/langflow-ai/openrag.git
cd openrag
# Setup development environment
make setup # Creates .env and installs dependencies
```
### Configuration
2. Configure the `.env` file with your API keys and credentials.
Edit `.env` with your API keys and credentials. See the main README for required environment variables.
```bash
# Required
OPENAI_API_KEY=your_openai_api_key
OPENSEARCH_PASSWORD=your_secure_password
LANGFLOW_SUPERUSER=admin
LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
NUDGES_FLOW_ID=your_nudges_flow_id
```
For extended configuration, including ingestion and optional variables, see [docs/reference/configuration.mdx](docs/docs/reference/configuration.mdx).
3. Start OpenRAG.
```bash
# Full stack with GPU support
make dev
# Or CPU only
make dev-cpu
```
Access the services:
- **Frontend**: http://localhost:3000
- **Backend API**: http://localhost:8000
- **Langflow**: http://localhost:7860
- **OpenSearch**: http://localhost:9200
- **OpenSearch Dashboards**: http://localhost:5601
## 🔧 Development Commands

View file

@ -1,4 +1,4 @@
FROM langflowai/langflow-nightly:1.6.3.dev0
FROM langflowai/langflow-nightly:1.6.3.dev1
EXPOSE 7860

1
MANIFEST.in Normal file
View file

@ -0,0 +1 @@
recursive-include src/tui/_assets *

149
Makefile
View file

@ -1,7 +1,17 @@
# OpenRAG Development Makefile
# Provides easy commands for development workflow
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
# Load variables from .env if present so `make` commands pick them up
ifneq (,$(wildcard .env))
include .env
# Export all simple KEY=VALUE pairs to the environment for child processes
export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env)
endif
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \
test test-integration test-ci \
backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \
shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
# Default target
help:
@ -32,14 +42,16 @@ help:
@echo " shell-lf - Shell into langflow container"
@echo ""
@echo "Testing:"
@echo " test - Run backend tests"
@echo " test - Run all backend tests"
@echo " test-integration - Run integration tests (requires infra)"
@echo " test-ci - Start infra, run integration tests, tear down"
@echo " lint - Run linting checks"
@echo ""
# Development environments
dev:
@echo "🚀 Starting OpenRAG with GPU support..."
docker-compose up -d
docker compose up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -49,7 +61,7 @@ dev:
dev-cpu:
@echo "🚀 Starting OpenRAG with CPU only..."
docker-compose -f docker-compose-cpu.yml up -d
docker compose -f docker-compose-cpu.yml up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -59,7 +71,7 @@ dev-cpu:
dev-local:
@echo "🔧 Starting infrastructure only (for local development)..."
docker-compose up -d opensearch dashboards langflow
docker compose up -d opensearch dashboards langflow
@echo "✅ Infrastructure started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -69,7 +81,7 @@ dev-local:
infra:
@echo "🔧 Starting infrastructure services only..."
docker-compose up -d opensearch dashboards langflow
docker compose up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -86,15 +98,15 @@ infra-cpu:
# Container management
stop:
@echo "🛑 Stopping all containers..."
docker-compose down
docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
docker compose down
docker compose -f docker-compose-cpu.yml down 2>/dev/null || true
restart: stop dev
clean: stop
@echo "🧹 Cleaning up containers and volumes..."
docker-compose down -v --remove-orphans
docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
docker compose down -v --remove-orphans
docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
docker system prune -f
# Local development
@ -114,7 +126,7 @@ install: install-be install-fe
install-be:
@echo "📦 Installing backend dependencies..."
uv sync
uv sync --extra torch-cu128
install-fe:
@echo "📦 Installing frontend dependencies..."
@ -123,7 +135,7 @@ install-fe:
# Building
build:
@echo "🔨 Building Docker images..."
docker-compose build
docker compose build
build-be:
@echo "🔨 Building backend image..."
@ -136,41 +148,124 @@ build-fe:
# Logging and debugging
logs:
@echo "📋 Showing all container logs..."
docker-compose logs -f
docker compose logs -f
logs-be:
@echo "📋 Showing backend logs..."
docker-compose logs -f openrag-backend
docker compose logs -f openrag-backend
logs-fe:
@echo "📋 Showing frontend logs..."
docker-compose logs -f openrag-frontend
docker compose logs -f openrag-frontend
logs-lf:
@echo "📋 Showing langflow logs..."
docker-compose logs -f langflow
docker compose logs -f langflow
logs-os:
@echo "📋 Showing opensearch logs..."
docker-compose logs -f opensearch
docker compose logs -f opensearch
# Shell access
shell-be:
@echo "🐚 Opening shell in backend container..."
docker-compose exec openrag-backend /bin/bash
docker compose exec openrag-backend /bin/bash
shell-lf:
@echo "🐚 Opening shell in langflow container..."
docker-compose exec langflow /bin/bash
docker compose exec langflow /bin/bash
shell-os:
@echo "🐚 Opening shell in opensearch container..."
docker-compose exec opensearch /bin/bash
docker compose exec opensearch /bin/bash
# Testing and quality
test:
@echo "🧪 Running backend tests..."
uv run pytest
@echo "🧪 Running all backend tests..."
uv run pytest tests/ -v
test-integration:
@echo "🧪 Running integration tests (requires infrastructure)..."
@echo "💡 Make sure to run 'make infra' first!"
uv run pytest tests/integration/ -v
# CI-friendly integration test target: brings up infra, waits, runs tests, tears down
test-ci:
@set -e; \
echo "Installing test dependencies..."; \
uv sync --group dev; \
if [ ! -f keys/private_key.pem ]; then \
echo "Generating RSA keys for JWT signing..."; \
uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \
else \
echo "RSA keys already exist, ensuring correct permissions..."; \
chmod 600 keys/private_key.pem 2>/dev/null || true; \
chmod 644 keys/public_key.pem 2>/dev/null || true; \
fi; \
echo "Cleaning up old containers and volumes..."; \
docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \
echo "Pulling latest images..."; \
docker compose -f docker-compose-cpu.yml pull; \
echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \
docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \
echo "Starting docling-serve..."; \
DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \
echo "Docling-serve started at $$DOCLING_ENDPOINT"; \
echo "Waiting for backend OIDC endpoint..."; \
for i in $$(seq 1 60); do \
docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Waiting for OpenSearch security config to be fully applied..."; \
for i in $$(seq 1 60); do \
if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \
echo "✓ Security configuration applied"; \
break; \
fi; \
sleep 2; \
done; \
echo "Verifying OIDC authenticator is active in OpenSearch..."; \
AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \
if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \
echo "✓ OIDC authenticator configured"; \
echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \
else \
echo "✗ OIDC authenticator NOT found in security config!"; \
echo "Security config:"; \
echo "$$AUTHC_CONFIG" | head -50; \
exit 1; \
fi; \
echo "Waiting for Langflow..."; \
for i in $$(seq 1 60); do \
curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \
for i in $$(seq 1 60); do \
curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Running integration tests"; \
LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \
GOOGLE_OAUTH_CLIENT_ID="" \
GOOGLE_OAUTH_CLIENT_SECRET="" \
OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \
OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \
DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \
uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \
TEST_RESULT=$$?; \
echo ""; \
echo "=== Post-test JWT diagnostics ==="; \
echo "Generating test JWT token..."; \
TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \
if [ -n "$$TEST_TOKEN" ]; then \
echo "Testing JWT against OpenSearch..."; \
HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \
echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \
fi; \
echo "================================="; \
echo ""; \
echo "Tearing down infra"; \
uv run python scripts/docling_ctl.py stop || true; \
docker compose down -v || true; \
exit $$TEST_RESULT
lint:
@echo "🔍 Running linting checks..."
@ -180,19 +275,19 @@ lint:
# Service status
status:
@echo "📊 Container status:"
@docker-compose ps 2>/dev/null || echo "No containers running"
@docker compose ps 2>/dev/null || echo "No containers running"
health:
@echo "🏥 Health check:"
@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
@echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
# Database operations
db-reset:
@echo "🗄️ Resetting OpenSearch indices..."
curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true
@echo "Indices reset. Restart backend to recreate."
# Flow management
@ -215,4 +310,4 @@ setup:
@echo "⚙️ Setting up development environment..."
@if [ ! -f .env ]; then cp .env.example .env && echo "📝 Created .env from template"; fi
@$(MAKE) install
@echo "✅ Setup complete! Run 'make dev' to start."
@echo "✅ Setup complete! Run 'make dev' to start."

214
README.md
View file

@ -2,20 +2,6 @@
# OpenRAG
</div>
<div align="center">
<a href="#quick-start" style="color: #0366d6;">🚀 Quick Start</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#tui-interface" style="color: #0366d6;">💻 TUI Interface</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#docker-deployment" style="color: #0366d6;">🐳 Docker Deployment</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#development" style="color: #0366d6;">⚙️ Development</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#troubleshooting" style="color: #0366d6;">🔧 Troubleshooting</a>
</div>
OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration. [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/phact/openrag)
<div align="center">
<a href="https://github.com/langflow-ai/langflow"><img src="https://img.shields.io/badge/Langflow-1C1C1E?style=flat&logo=langflow" alt="Langflow"></a>
&nbsp;&nbsp;
@ -24,144 +10,124 @@ OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables
<a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
&nbsp;&nbsp;
<a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
&nbsp;&nbsp;
<a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
</div>
OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration.
</div>
<div align="center">
<a href="#quickstart" style="color: #0366d6;">Quickstart</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#tui-interface" style="color: #0366d6;">TUI Interface</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#docker-deployment" style="color: #0366d6;">Docker Deployment</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#development" style="color: #0366d6;">Development</a> &nbsp;&nbsp;|&nbsp;&nbsp;
<a href="#troubleshooting" style="color: #0366d6;">Troubleshooting</a>
</div>
## Quickstart
Use the OpenRAG Terminal User Interface (TUI) to manage your OpenRAG installation without complex command-line operations.
To launch OpenRAG with the TUI, do the following:
## 🚀 Quick Start
1. Clone the OpenRAG repository.
```bash
git clone https://github.com/langflow-ai/openrag.git
cd openrag
```
### Prerequisites
2. To start the TUI, from the repository root, run:
```bash
# Install dependencies first
uv sync
# Launch the TUI
uv run openrag
```
- Docker or Podman with Compose installed
- Make (for development commands)
The TUI opens and guides you through OpenRAG setup.
### 1. Environment Setup
For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx).
```bash
# Clone and setup environment
git clone https://github.com/langflow-ai/openrag.git
cd openrag
make setup # Creates .env and installs dependencies
```
## Docker Deployment
### 2. Configure Environment
If you prefer to use Docker to run OpenRAG, the repository includes two Docker Compose `.yml` files.
They deploy the same applications and containers, but to different environments.
Edit `.env` with your API keys and credentials:
- [`docker-compose.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose.yml) is an OpenRAG deployment for environments with GPU support. GPU support requires an NVIDIA GPU with CUDA support and compatible NVIDIA drivers installed on the OpenRAG host machine.
```bash
# Required
OPENAI_API_KEY=your_openai_api_key
OPENSEARCH_PASSWORD=your_secure_password
LANGFLOW_SUPERUSER=admin
LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
NUDGES_FLOW_ID=your_nudges_flow_id
```
See extended configuration, including ingestion and optional variables: [docs/reference/configuration.md](docs/docs/reference/configuration.md)
### 3. Start OpenRAG
- [`docker-compose-cpu.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) is a CPU-only version of OpenRAG for systems without GPU support. Use this Docker compose file for environments where GPU drivers aren't available.
```bash
# Full stack with GPU support
make dev
Both Docker deployments depend on `docling serve` to be running on port `5001` on the host machine. This enables [Mac MLX](https://opensource.apple.com/projects/mlx/) support for document processing. Installing OpenRAG with the TUI starts `docling serve` automatically, but for a Docker deployment you must manually start the `docling serve` process.
# Or CPU only
make dev-cpu
```
To deploy OpenRAG with Docker:
Access the services:
- **Frontend**: http://localhost:3000
- **Backend API**: http://localhost:8000
- **Langflow**: http://localhost:7860
- **OpenSearch**: http://localhost:9200
- **OpenSearch Dashboards**: http://localhost:5601
1. Clone the OpenRAG repository.
```bash
git clone https://github.com/langflow-ai/openrag.git
cd openrag
```
## 🖥️ TUI Interface
2. Install dependencies.
```bash
uv sync
```
OpenRAG includes a powerful Terminal User Interface (TUI) for easy setup, configuration, and monitoring. The TUI provides a user-friendly way to manage your OpenRAG installation without complex command-line operations.
3. Start `docling serve` on the host machine.
```bash
uv run python scripts/docling_ctl.py start --port 5001
```
4. Confirm `docling serve` is running.
```
uv run python scripts/docling_ctl.py status
```
![OpenRAG TUI Interface](assets/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)
Successful result:
```bash
Status: running
Endpoint: http://127.0.0.1:5001
Docs: http://127.0.0.1:5001/docs
PID: 27746
```
### Launching the TUI
5. Build and start all services.
```bash
# Install dependencies first
uv sync
For the GPU-accelerated deployment, run:
```bash
docker compose build
docker compose up -d
```
# Launch the TUI
uv run openrag
```
For environments without GPU support, run:
```bash
docker compose -f docker-compose-cpu.yml up -d
```
### TUI Features
The OpenRAG Docker Compose file starts five containers:
| Container Name | Default Address | Purpose |
|---|---|---|
| OpenRAG Backend | http://localhost:8000 | FastAPI server and core functionality. |
| OpenRAG Frontend | http://localhost:3000 | React web interface for users. |
| Langflow | http://localhost:7860 | AI workflow engine and flow management. |
| OpenSearch | http://localhost:9200 | Vector database for document storage. |
| OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |
See the full TUI guide for features, navigation, and benefits: [docs/get-started/tui.mdx](docs/docs/get-started/tui.mdx)
6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx).
To stop `docling serve`, run:
```bash
uv run python scripts/docling_ctl.py stop
```
For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx).
## Troubleshooting
## 🐳 Docker Deployment
For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx).
### Standard Deployment
## Development
```bash
# Build and start all services
docker compose build
docker compose up -d
```
### CPU-Only Deployment
For environments without GPU support:
```bash
docker compose -f docker-compose-cpu.yml up -d
```
More deployment commands and tips: [docs/get-started/docker.mdx](docs/docs/get-started/docker.mdx)
## 🔧 Troubleshooting
### Podman on macOS
If using Podman on macOS, you may need to increase VM memory:
```bash
podman machine stop
podman machine rm
podman machine init --memory 8192 # 8 GB example
podman machine start
```
### Common Issues
See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)
## 🛠️ Development
For developers wanting to contribute to OpenRAG or set up a development environment, please see our comprehensive development guide:
**[📚 See CONTRIBUTING.md for detailed development instructions](CONTRIBUTING.md)**
The contributing guide includes:
- Complete development environment setup
- Local development workflows
- Testing and debugging procedures
- Code style guidelines
- Architecture overview
- Pull request guidelines
### Quick Development Commands
```bash
make help # See all available commands
make setup # Initial development setup
make infra # Start infrastructure services
make backend # Run backend locally
make frontend # Run frontend locally
```
For developers wanting to contribute to OpenRAG or set up a development environment, see [CONTRIBUTING.md](CONTRIBUTING.md).

View file

@ -74,7 +74,7 @@ services:
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
- ./flows:/app/flows:U,z
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
@ -91,7 +91,7 @@ services:
langflow:
volumes:
- ./flows:/app/flows:Z
- ./flows:/app/flows:U,z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
# build:
# context: .
@ -108,6 +108,7 @@ services:
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None

View file

@ -73,7 +73,7 @@ services:
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:z
- ./flows:/app/flows:U,z
gpus: all
openrag-frontend:
@ -81,7 +81,6 @@ services:
# build:
# context: .
# dockerfile: Dockerfile.frontend
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
@ -92,7 +91,7 @@ services:
langflow:
volumes:
- ./flows:/app/flows:z
- ./flows:/app/flows:U,z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
# build:
# context: .
@ -109,6 +108,7 @@ services:
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- FILENAME=None
- MIMETYPE=None

View file

@ -1,4 +0,0 @@
:::info
OpenRAG is is currently in public preview.
Development is ongoing, and the features and functionality are subject to change.
:::

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
@ -34,11 +31,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
</details>
## Use the OpenRAG OpenSearch Agent flow
## Use the OpenRAG OpenSearch Agent flow {#flow}
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
This flow contains seven components connected together to chat with your data:
This flow contains eight components connected together to chat with your data:
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@ -49,6 +46,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
<PartialModifyFlows />

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
@ -50,4 +47,31 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
## Knowledge ingestion flows
[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
This flow contains ten components connected together to process and store documents in your knowledge base.
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
<PartialModifyFlows />
### OpenSearch URL Ingestion flow {#url-flow}
An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).

View file

@ -7,17 +7,23 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
OpenSearch provides powerful hybrid search capabilities with enterprise-grade security and multi-tenancy support.
## Authentication and document access {#auth}
OpenRAG supports two authentication modes based on how you [install OpenRAG](/install), and which mode you choose affects document access.
**No-auth mode (Basic Setup)**: This mode uses a single anonymous JWT token for OpenSearch authentication, so documents uploaded to the `documents` index by one user are visible to all other users on the OpenRAG server.
**OAuth mode (Advanced Setup)**: Each OpenRAG user is granted a JWT token, and each document is tagged with user ownership. Documents are filtered by user ownership, ensuring users only see documents they uploaded or have access to.
## Ingest knowledge
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
### Direct file ingestion
@ -78,18 +84,6 @@ You can select multiples.
The ingestion process may take some time, depending on the size of your documents.
4. When ingestion is complete, your documents are available in the Knowledge screen.
### Sync cloud connectors
Your connected data sources are found in the <Icon name="Settings2" aria-hidden="true"/> **Settings** page.
When you click **Sync Now** for a connected cloud service like Google Drive, OpenRAG scans your connected Google Drive account to find files that match your sync criteria. Sync criteria are controlled in **Sync Settings** on the same page. You can sync all files, or select a maximum number of files to sync.
For each file found, OpenRAG downloads, converts, and embeds the processed content into OpenSearch.
You can monitor the sync progress in the <Icon name="Bell" aria-hidden="true"/> **Tasks** sidebar.
Once processing is complete, the synced documents become available in your knowledge base and can be searched through the chat interface or Knowledge page.
## Explore knowledge
The **Knowledge** page lists the documents OpenRAG has ingested into the OpenSearch vector database's `documents` index.
@ -101,10 +95,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
<PartialModifyFlows />
### Knowledge ingestion settings
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
## Create knowledge filters
OpenRAG includes a knowledge filter system for organizing and managing document collections.

View file

@ -4,9 +4,6 @@ slug: /get-started/docker
---
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
There are two different Docker Compose files.
They deploy the same applications and containers, but to different environments.

View file

@ -6,9 +6,6 @@ slug: /install
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
[Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.
@ -65,13 +62,15 @@ The OpenRAG wheel installs the Terminal User Interface (TUI) for configuring and
## Set up OpenRAG with the TUI {#setup}
The TUI creates a `.env` file in your OpenRAG directory root and starts OpenRAG.
If the TUI detects a `.env` file in the OpenRAG root directory, it sources any variables from the `.env` file.
If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
**Basic Setup** generates all of the required values except the OpenAI API key.
**Basic Setup** does not set up OAuth connections for ingestion from Google Drive, OneDrive, or AWS.
**Basic Setup** generates all of the required values for OpenRAG except the OpenAI API key.
**Basic Setup** does not set up OAuth connections for ingestion from cloud providers.
For OAuth setup, use **Advanced Setup**.
If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
If the TUI detects a `.env` file in the OpenRAG root directory, it will source any variables from the `.env` file.
**Basic Setup** and **Advanced Setup** enforce the same authentication settings for the Langflow server, but manage document access differently. For more information, see [Authentication and document access](/knowledge#auth).
<Tabs groupId="Setup method">
<TabItem value="Basic setup" label="Basic setup" default>
@ -90,6 +89,7 @@ If the TUI detects a `.env` file in the OpenRAG root directory, it will source a
7. Continue with [Application Onboarding](#application-onboarding).
</TabItem>
<TabItem value="Advanced setup" label="Advanced setup">
1. To install OpenRAG with **Advanced Setup**, click **Advanced Setup** or press <kbd>2</kbd>.
2. Click **Generate Passwords** to generate passwords for OpenSearch and Langflow.
3. Paste your OpenAI API key in the OpenAI API key field.

View file

@ -6,9 +6,6 @@ slug: /quickstart
import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.

View file

@ -3,10 +3,6 @@ title: Terminal User Interface (TUI) commands
slug: /get-started/tui
---
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
![OpenRAG TUI Interface](@site/static/img/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)

View file

@ -3,10 +3,6 @@ title: What is OpenRAG?
slug: /
---
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG is an open-source package for building agentic RAG systems.
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.

View file

@ -5,9 +5,6 @@ slug: /support/troubleshoot
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 951 KiB

After

Width:  |  Height:  |  Size: 1,004 KiB

File diff suppressed because one or more lines are too long

View file

@ -232,6 +232,7 @@
},
{
"animated": false,
"className": "",
"data": {
"sourceHandle": {
"dataType": "EmbeddingModel",
@ -733,6 +734,10 @@
{
"key": "owner_email",
"value": "OWNER_EMAIL"
},
{
"key": "connector_type",
"value": "CONNECTOR_TYPE_URL"
}
]
},
@ -1808,7 +1813,7 @@
],
"frozen": false,
"icon": "table",
"last_updated": "2025-10-03T20:31:36.023Z",
"last_updated": "2025-10-06T17:46:55.068Z",
"legacy": false,
"lf_version": "1.6.0",
"metadata": {
@ -2224,7 +2229,7 @@
],
"frozen": false,
"icon": "table",
"last_updated": "2025-10-03T20:31:36.025Z",
"last_updated": "2025-10-06T17:46:55.069Z",
"legacy": false,
"lf_version": "1.6.0",
"metadata": {
@ -2897,7 +2902,7 @@
],
"frozen": false,
"icon": "table",
"last_updated": "2025-10-03T20:31:36.026Z",
"last_updated": "2025-10-06T17:46:55.069Z",
"legacy": false,
"metadata": {
"code_hash": "b4d6b19b6eef",
@ -3310,7 +3315,7 @@
],
"frozen": false,
"icon": "binary",
"last_updated": "2025-10-03T20:31:47.177Z",
"last_updated": "2025-10-06T17:46:54.996Z",
"legacy": false,
"metadata": {
"code_hash": "8607e963fdef",
@ -3595,17 +3600,17 @@
}
],
"viewport": {
"x": -407.1633937626607,
"y": -577.5291936220412,
"zoom": 0.5347553210574026
"x": -538.2311610019549,
"y": -337.3313239657308,
"zoom": 0.45546556043892106
}
},
"description": "This flow is to ingest the URL to open search.",
"endpoint_name": null,
"mcp_enabled": true,
"id": "72c3d17c-2dac-4a73-b48a-6518473d7830",
"mcp_enabled": true,
"is_component": false,
"last_tested_version": "1.6.0",
"last_tested_version": "1.6.3.dev1",
"name": "OpenSearch URL Ingestion Flow",
"tags": [
"openai",

View file

@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
{queryOverride && (
<Button
variant="ghost"
className="h-full !px-1.5 !py-0"
className="h-full rounded-sm !px-1.5 !py-0"
type="button"
onClick={() => {
setSearchQueryInput("");
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
<Button
variant="ghost"
className={cn(
"h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
"h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
searchQueryInput && "block"
)}
type="submit"

View file

@ -326,4 +326,4 @@ export default function ProtectedAdminPage() {
<AdminPage />
</ProtectedRoute>
)
}
}

View file

@ -92,6 +92,7 @@ export default function ConnectorsPage() {
selectedFiles={selectedFiles}
isAuthenticated={false} // This would come from auth context in real usage
accessToken={undefined} // This would come from connected account
isIngesting={isSyncing}
/>
</div>

View file

@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import {
type ChunkResult,
type File,
@ -35,9 +30,9 @@ function ChunksPageContent() {
const { parsedFilterData, queryOverride } = useKnowledgeFilter();
const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[]
>([]);
// const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
// ChunkResult[]
// >([]);
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null
@ -83,13 +78,13 @@ function ChunksPageContent() {
}, [data, filename]);
// Set selected state for all checkboxes when selectAll changes
useEffect(() => {
if (selectAll) {
setSelectedChunks(new Set(chunks.map((_, index) => index)));
} else {
setSelectedChunks(new Set());
}
}, [selectAll, setSelectedChunks, chunks]);
// useEffect(() => {
// if (selectAll) {
// setSelectedChunks(new Set(chunks.map((_, index) => index)));
// } else {
// setSelectedChunks(new Set());
// }
// }, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => {
router.push("/knowledge");
@ -126,26 +121,25 @@ function ChunksPageContent() {
return (
<div className="flex flex-col h-full">
<div className="flex flex-col h-full">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
<Checkbox
id="selectAllChunks"
checked={selectAll}
@ -160,11 +154,12 @@ function ChunksPageContent() {
Select all
</Label>
</div> */}
</div>
</div>
</div>
{/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-auto pr-6">
<div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
{/* Content Area */}
<div className="row-start-2 lg:row-start-1">
{isFetching ? (
<div className="flex items-center justify-center h-64">
<div className="text-center">
@ -185,7 +180,7 @@ function ChunksPageContent() {
</div>
) : (
<div className="space-y-4 pb-6">
{chunksFilteredByQuery.map((chunk, index) => (
{chunks.map((chunk, index) => (
<div
key={chunk.filename + index}
className="bg-muted rounded-lg p-4 border border-border/50"
@ -242,31 +237,30 @@ function ChunksPageContent() {
</div>
)}
</div>
</div>
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="w-[320px] py-20 px-2">
<div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4">
Technical details
</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="min-w-[200px]">
<div className="mb-8">
<h2 className="text-xl font-semibold mb-4">Technical details</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Avg length
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
@ -276,54 +270,55 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div> */}
</dl>
</div>
<div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</dl>
</div>
<div className="mb-4">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename}
</dd>
</div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
</dl>
</dl>
</div>
</div>
</div>
)}
)}
</div>
</div>
);
}

View file

@ -85,6 +85,7 @@ interface Connector {
connectionId?: string;
access_token?: string;
selectedFiles?: GoogleDriveFile[] | OneDriveFile[];
available?: boolean;
}
interface SyncResult {

View file

@ -165,7 +165,7 @@ export default function UploadProviderPage() {
const handleFileSelected = (files: CloudFile[]) => {
setSelectedFiles(files);
console.log(`Selected ${files.length} files from ${provider}:`, files);
console.log(`Selected ${files.length} item(s) from ${provider}:`, files);
// You can add additional handling here like triggering sync, etc.
};
@ -376,19 +376,19 @@ export default function UploadProviderPage() {
loading={isIngesting}
disabled={!hasSelectedFiles || isIngesting}
>
{!hasSelectedFiles ? (
<>Ingest files</>
) : (
{hasSelectedFiles ? (
<>
Ingest {selectedFiles.length} file
Ingest {selectedFiles.length} item
{selectedFiles.length > 1 ? "s" : ""}
</>
) : (
<>Ingest selected items</>
)}
</Button>
</TooltipTrigger>
{!hasSelectedFiles ? (
<TooltipContent side="left">
Select at least one file before ingesting
Select at least one item before ingesting
</TooltipContent>
) : null}
</Tooltip>

View file

@ -201,7 +201,7 @@ export function CloudConnectorsDialog({
<DialogHeader>
<DialogTitle>Cloud File Connectors</DialogTitle>
<DialogDescription>
Select files from your connected cloud storage providers
Select files or folders from your connected cloud storage providers
</DialogDescription>
</DialogHeader>
@ -232,7 +232,7 @@ export function CloudConnectorsDialog({
!connector.hasAccessToken
? connector.accessTokenError ||
"Access token required - try reconnecting your account"
: `Select files from ${connector.name}`
: `Select files or folders from ${connector.name}`
}
onClick={e => {
e.preventDefault();
@ -283,6 +283,7 @@ export function CloudConnectorsDialog({
accessToken={connectorAccessTokens[connector.type]}
onPickerStateChange={() => {}}
clientId={connector.clientId}
isIngesting={false}
/>
</div>
);

View file

@ -26,7 +26,7 @@ export const FileList = ({
return (
<div className="space-y-2 relative">
<div className="flex items-center justify-between">
<p className="text-sm font-medium">Added files ({files.length})</p>
<p className="text-sm font-medium">Selected items ({files.length})</p>
<Button
ignoreTitleCase={true}
onClick={onClearAll}

View file

@ -39,7 +39,7 @@ export const PickerHeader = ({
return (
<div className="text-sm text-muted-foreground p-4 bg-muted/20 rounded-md">
Please connect to {getProviderName(provider)} first to select specific
files.
files or folders.
</div>
);
}
@ -48,7 +48,7 @@ export const PickerHeader = ({
<Card>
<CardContent className="flex flex-col items-center text-center py-8">
<p className="text-sm text-primary mb-4">
Select files from {getProviderName(provider)} to ingest.
Select files or folders from {getProviderName(provider)} to ingest.
</p>
<Button
onClick={onAddFiles}
@ -56,7 +56,7 @@ export const PickerHeader = ({
className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
>
<Plus className="h-4 w-4" />
{isPickerOpen ? "Opening picker..." : "Add files"}
{isPickerOpen ? "Opening picker..." : "Add files or folders"}
</Button>
</CardContent>
</Card>

View file

@ -52,12 +52,16 @@ export class GoogleDriveHandler {
try {
this.onPickerStateChange?.(true);
// Create a view for regular documents
const docsView = new window.google.picker.DocsView()
.setIncludeFolders(true)
.setSelectFolderEnabled(true);
const picker = new window.google.picker.PickerBuilder()
.addView(window.google.picker.ViewId.DOCS)
.addView(window.google.picker.ViewId.FOLDERS)
.addView(docsView)
.setOAuthToken(this.accessToken)
.enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED)
.setTitle("Select files from Google Drive")
.setTitle("Select files or folders from Google Drive")
.setCallback(data => this.pickerCallback(data, onFileSelected))
.build();

View file

@ -53,6 +53,7 @@ declare global {
load: (callback: () => void) => void;
};
PickerBuilder: new () => GooglePickerBuilder;
DocsView: new () => GoogleDocsView;
ViewId: {
DOCS: string;
FOLDERS: string;
@ -83,8 +84,13 @@ declare global {
}
}
export interface GoogleDocsView {
setIncludeFolders: (include: boolean) => GoogleDocsView;
setSelectFolderEnabled: (enabled: boolean) => GoogleDocsView;
}
export interface GooglePickerBuilder {
addView: (view: string) => GooglePickerBuilder;
addView: (view: GoogleDocsView | string) => GooglePickerBuilder;
setOAuthToken: (token: string) => GooglePickerBuilder;
setCallback: (
callback: (data: GooglePickerData) => void

View file

@ -19,6 +19,7 @@ import {
import { useAuth } from "@/contexts/auth-context";
// Task interface is now imported from useGetTasksQuery
export type { Task };
export interface TaskFile {
filename: string;

View file

@ -1,6 +1,10 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "openrag"
version = "0.1.14.dev3"
version = "0.1.19"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
@ -31,6 +35,9 @@ dependencies = [
"docling-serve>=1.4.1",
]
[dependency-groups]
dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
[project.scripts]
openrag = "tui.main:run_tui"

91
scripts/docling_ctl.py Normal file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""Helper script to control docling-serve using DoclingManager for CI/testing."""
import sys
import asyncio
import argparse
from pathlib import Path
# Add src to path so we can import DoclingManager
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from tui.managers.docling_manager import DoclingManager
async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
"""Start docling-serve."""
manager = DoclingManager()
if manager.is_running():
print(f"Docling-serve is already running")
status = manager.get_status()
print(f"Endpoint: {status['endpoint']}")
return 0
host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
print(f"Starting docling-serve on {host_msg}...")
success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
if success:
print(f"{message}")
status = manager.get_status()
print(f"Endpoint: {status['endpoint']}")
print(f"PID: {status['pid']}")
return 0
else:
print(f"{message}", file=sys.stderr)
return 1
async def stop_docling():
"""Stop docling-serve."""
manager = DoclingManager()
if not manager.is_running():
print("Docling-serve is not running")
return 0
print("Stopping docling-serve...")
success, message = await manager.stop()
if success:
print(f"{message}")
return 0
else:
print(f"{message}", file=sys.stderr)
return 1
async def status_docling():
"""Get docling-serve status."""
manager = DoclingManager()
status = manager.get_status()
print(f"Status: {status['status']}")
if status['status'] == 'running':
print(f"Endpoint: {status['endpoint']}")
print(f"Docs: {status['docs_url']}")
print(f"PID: {status['pid']}")
return 0 if status['status'] == 'running' else 1
async def main():
parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
args = parser.parse_args()
if args.command == "start":
return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
elif args.command == "stop":
return await stop_docling()
elif args.command == "status":
return await status_docling()
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

120
src/api/docling.py Normal file
View file

@ -0,0 +1,120 @@
"""Docling service proxy endpoints."""
import socket
import struct
from pathlib import Path
import httpx
from starlette.requests import Request
from starlette.responses import JSONResponse
from utils.container_utils import (
detect_container_environment,
get_container_host,
guess_host_ip_for_containers,
)
from utils.logging_config import get_logger
logger = get_logger(__name__)
def _get_gateway_ip_from_route() -> str | None:
"""Return the default gateway IP visible from the current network namespace."""
try:
with Path("/proc/net/route").open() as route_table:
next(route_table) # Skip header
for line in route_table:
fields = line.strip().split()
min_fields = 3 # interface, destination, gateway
if len(fields) >= min_fields and fields[1] == "00000000":
gateway_hex = fields[2]
gw_int = int(gateway_hex, 16)
gateway_ip = socket.inet_ntoa(struct.pack("<L", gw_int))
return gateway_ip
except (FileNotFoundError, PermissionError, IndexError, ValueError) as err:
logger.warning("Could not read routing table: %s", err)
return None
def determine_docling_host() -> str:
"""Determine the host address used for docling health checks."""
container_type = detect_container_environment()
if container_type:
# Try HOST_DOCKER_INTERNAL env var first
container_host = get_container_host()
if container_host:
logger.info("Using container-aware host '%s'", container_host)
return container_host
# Try special hostnames (Docker Desktop and rootless podman)
import socket
for hostname in ["host.docker.internal", "host.containers.internal"]:
try:
socket.getaddrinfo(hostname, None)
logger.info("Using %s for container-to-host communication", hostname)
return hostname
except socket.gaierror:
logger.debug("%s not available", hostname)
# Try gateway IP detection (Docker on Linux)
gateway_ip = _get_gateway_ip_from_route()
if gateway_ip:
logger.info("Detected host gateway IP: %s", gateway_ip)
return gateway_ip
# Fallback to bridge IP
fallback_ip = guess_host_ip_for_containers(logger=logger)
logger.info("Falling back to container bridge host %s", fallback_ip)
return fallback_ip
# Running outside a container
logger.info("Running outside a container; using localhost")
return "localhost"
# Detect the host IP once at startup
HOST_IP = determine_docling_host()
DOCLING_SERVICE_URL = f"http://{HOST_IP}:5001"
async def health(request: Request) -> JSONResponse:
"""
Proxy health check to docling-serve.
This allows the frontend to check docling status via same-origin request.
"""
health_url = f"{DOCLING_SERVICE_URL}/health"
try:
async with httpx.AsyncClient() as client:
response = await client.get(
health_url,
timeout=2.0
)
if response.status_code == 200:
return JSONResponse({
"status": "healthy",
"host": HOST_IP
})
else:
logger.warning("Docling health check failed", url=health_url, status_code=response.status_code)
return JSONResponse({
"status": "unhealthy",
"message": f"Health check failed with status: {response.status_code}",
"host": HOST_IP
}, status_code=503)
except httpx.TimeoutException:
logger.warning("Docling health check timeout", url=health_url)
return JSONResponse({
"status": "unhealthy",
"message": "Connection timeout",
"host": HOST_IP
}, status_code=503)
except Exception as e:
logger.error("Docling health check failed", url=health_url, error=str(e))
return JSONResponse({
"status": "unhealthy",
"message": str(e),
"host": HOST_IP
}, status_code=503)

View file

@ -28,7 +28,6 @@ def require_auth(session_manager):
async def wrapper(request: Request):
# In no-auth mode, bypass authentication entirely
if is_no_auth_mode():
logger.debug("No-auth mode: Creating anonymous user")
# Create an anonymous user object so endpoints don't break
from session_manager import User
from datetime import datetime
@ -36,7 +35,6 @@ def require_auth(session_manager):
from session_manager import AnonymousUser
request.state.user = AnonymousUser()
request.state.jwt_token = None # No JWT in no-auth mode
logger.debug("Set user_id=anonymous, jwt_token=None")
return await handler(request)
user = get_current_user(request, session_manager)

View file

@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
from utils.document_processing import create_document_converter
from utils.logging_config import get_logger
load_dotenv()
load_dotenv("../")
load_dotenv(override=False)
load_dotenv("../", override=False)
logger = get_logger(__name__)
@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
def is_no_auth_mode():
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
logger.debug(
"Checking auth mode",
no_auth_mode=result,
has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
)
return result

View file

@ -1,21 +1,20 @@
import io
import os
from pathlib import Path
import time
from collections import deque
from dataclasses import dataclass
from typing import Dict, List, Any, Optional, Iterable, Set
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
from utils.logging_config import get_logger
logger = get_logger(__name__)
# Project-specific base types (adjust imports to your project)
from ..base import BaseConnector, ConnectorDocument, DocumentACL
from .oauth import GoogleDriveOAuth
logger = get_logger(__name__)
# -------------------------
# Config model
@ -32,8 +31,8 @@ class GoogleDriveConfig:
recursive: bool = True
# Shared Drives control
drive_id: Optional[str] = None # when set, we use corpora='drive'
corpora: Optional[str] = None # 'user' | 'drive' | 'domain'; auto-picked if None
drive_id: Optional[str] = None # when set, we use corpora='drive'
corpora: Optional[str] = None # 'user' | 'drive' | 'domain'; auto-picked if None
# Optional filtering
include_mime_types: Optional[List[str]] = None
@ -80,7 +79,6 @@ class GoogleDriveConnector(BaseConnector):
_FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files")
_FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders")
def emit(self, doc: ConnectorDocument) -> None:
"""
Emit a ConnectorDocument instance.
@ -100,7 +98,9 @@ class GoogleDriveConnector(BaseConnector):
# Token file default (so callback & workers dont need to pass it)
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(project_root / "google_drive_token.json")
token_file = config.get("token_file") or str(
project_root / "google_drive_token.json"
)
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
if not isinstance(client_id, str) or not client_id.strip():
@ -115,7 +115,9 @@ class GoogleDriveConnector(BaseConnector):
)
# Normalize incoming IDs from any of the supported alias keys
def _first_present_list(cfg: Dict[str, Any], keys: Iterable[str]) -> Optional[List[str]]:
def _first_present_list(
cfg: Dict[str, Any], keys: Iterable[str]
) -> Optional[List[str]]:
for k in keys:
v = cfg.get(k)
if v: # accept non-empty list
@ -151,6 +153,7 @@ class GoogleDriveConnector(BaseConnector):
# Drive client is built in authenticate()
from google.oauth2.credentials import Credentials
self.creds: Optional[Credentials] = None
self.service: Any = None
@ -214,7 +217,7 @@ class GoogleDriveConnector(BaseConnector):
"id, name, mimeType, modifiedTime, createdTime, size, "
"webViewLink, parents, owners, driveId"
),
**self._drives_flags,
**self._drives_get_flags,
)
.execute()
)
@ -285,7 +288,9 @@ class GoogleDriveConnector(BaseConnector):
Fetch metadata for a file by ID (resolving shortcuts).
"""
if self.service is None:
raise RuntimeError("Google Drive service is not initialized. Please authenticate first.")
raise RuntimeError(
"Google Drive service is not initialized. Please authenticate first."
)
try:
meta = (
self.service.files()
@ -323,24 +328,40 @@ class GoogleDriveConnector(BaseConnector):
def _iter_selected_items(self) -> List[Dict[str, Any]]:
"""
Return a de-duplicated list of file metadata for the selected scope:
- explicit file_ids
- explicit file_ids (automatically expands folders to their contents)
- items inside folder_ids (with optional recursion)
Shortcuts are resolved to their targets automatically.
"""
seen: Set[str] = set()
items: List[Dict[str, Any]] = []
folders_to_expand: List[str] = []
# Explicit files
# Process file_ids: separate actual files from folders
if self.cfg.file_ids:
for fid in self.cfg.file_ids:
meta = self._get_file_meta_by_id(fid)
if meta and meta["id"] not in seen:
if not meta:
continue
# If it's a folder, add to folders_to_expand instead
if meta.get("mimeType") == "application/vnd.google-apps.folder":
logger.debug(
f"Item {fid} ({meta.get('name')}) is a folder, "
f"will expand to contents"
)
folders_to_expand.append(fid)
elif meta["id"] not in seen:
# It's a regular file, add it directly
seen.add(meta["id"])
items.append(meta)
# Folders
# Collect all folders to expand (from both file_ids and folder_ids)
if self.cfg.folder_ids:
folder_children = self._bfs_expand_folders(self.cfg.folder_ids)
folders_to_expand.extend(self.cfg.folder_ids)
# Expand all folders to their contents
if folders_to_expand:
folder_children = self._bfs_expand_folders(folders_to_expand)
for meta in folder_children:
meta = self._resolve_shortcut(meta)
if meta.get("id") in seen:
@ -357,7 +378,11 @@ class GoogleDriveConnector(BaseConnector):
items = self._filter_by_mime(items)
# Exclude folders from final emits:
items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"]
items = [
m
for m in items
if m.get("mimeType") != "application/vnd.google-apps.folder"
]
return items
# -------------------------
@ -389,29 +414,85 @@ class GoogleDriveConnector(BaseConnector):
def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes:
"""
Download bytes for a given file (exporting if Google-native).
Raises ValueError if the item is a folder (folders cannot be downloaded).
"""
file_id = file_meta["id"]
file_name = file_meta.get("name", "unknown")
mime_type = file_meta.get("mimeType") or ""
# Google-native: export
export_mime = self._pick_export_mime(mime_type)
if mime_type.startswith("application/vnd.google-apps."):
# default fallback if not overridden
#if not export_mime:
# export_mime = "application/pdf"
export_mime = "application/pdf"
logger.debug(
f"Downloading file {file_id} ({file_name}) with mimetype: {mime_type}"
)
# Folders cannot be downloaded or exported - this should never be reached
# as folders are automatically expanded in _iter_selected_items()
if mime_type == "application/vnd.google-apps.folder":
raise ValueError(
f"Cannot download folder {file_id} ({file_name}). "
f"This is a bug - folders should be automatically expanded before download."
)
# According to https://stackoverflow.com/questions/65053558/google-drive-api-v3-files-export-method-throws-a-403-error-export-only-support
# export_media ONLY works for Google Docs Editors files (Docs, Sheets, Slides, Drawings)
# All other files (including other Google Apps types like Forms, Sites, Maps) must use get_media
# Define which Google Workspace files are exportable
exportable_types = {
"application/vnd.google-apps.document", # Google Docs
"application/vnd.google-apps.spreadsheet", # Google Sheets
"application/vnd.google-apps.presentation", # Google Slides
"application/vnd.google-apps.drawing", # Google Drawings
}
if mime_type in exportable_types:
# This is an exportable Google Workspace file - must use export_media
export_mime = self._pick_export_mime(mime_type)
if not export_mime:
# Default fallback for unsupported Google native types
export_mime = "application/pdf"
logger.debug(
f"Using export_media for {file_id} ({mime_type} -> {export_mime})"
)
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
request = self.service.files().export_media(
fileId=file_id, mimeType=export_mime
)
else:
# This is a regular uploaded file (PDF, image, video, etc.) - use get_media
# Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
logger.debug(f"Using get_media for {file_id} ({mime_type})")
# Binary download (get_media also doesn't accept the Drive flags)
request = self.service.files().get_media(fileId=file_id)
# Download the file with error handling for misclassified Google Docs
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
done = False
while not done:
status, done = downloader.next_chunk()
# Optional: you can log progress via status.progress()
try:
while not done:
status, done = downloader.next_chunk()
# Optional: you can log progress via status.progress()
except HttpError as e:
# If download fails with "fileNotDownloadable", it's a Docs Editor file
# that wasn't properly detected. Retry with export_media.
if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
logger.warning(
f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
f"Retrying with export_media (file might be a Google Doc)"
)
export_mime = "application/pdf"
request = self.service.files().export_media(
fileId=file_id, mimeType=export_mime
)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
done = False
while not done:
status, done = downloader.next_chunk()
else:
raise
return fh.getvalue()
@ -430,7 +511,9 @@ class GoogleDriveConnector(BaseConnector):
# If still not authenticated, bail (caller should kick off OAuth init)
if not await self.oauth.is_authenticated():
logger.debug("authenticate: no valid credentials; run OAuth init/callback first.")
logger.debug(
"authenticate: no valid credentials; run OAuth init/callback first."
)
return False
# Build Drive service from OAuth helper
@ -450,7 +533,7 @@ class GoogleDriveConnector(BaseConnector):
self,
page_token: Optional[str] = None,
max_files: Optional[int] = None,
**kwargs
**kwargs,
) -> Dict[str, Any]:
"""
List files in the currently selected scope (file_ids/folder_ids/recursive).
@ -483,15 +566,24 @@ class GoogleDriveConnector(BaseConnector):
except Exception:
pass
return {"files": [], "next_page_token": None}
async def get_file_content(self, file_id: str) -> ConnectorDocument:
"""
Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument.
Raises FileNotFoundError if the ID is a folder (folders cannot be downloaded).
"""
meta = self._get_file_meta_by_id(file_id)
if not meta:
raise FileNotFoundError(f"Google Drive file not found: {file_id}")
# Check if this is a folder - folders cannot be downloaded
if meta.get("mimeType") == "application/vnd.google-apps.folder":
raise FileNotFoundError(
f"Cannot download folder {file_id} ({meta.get('name')}). "
f"Folders must be expanded to list their contents. "
f"This ID should not have been passed to get_file_content()."
)
try:
blob = self._download_file_bytes(meta)
except Exception as e:
@ -527,11 +619,13 @@ class GoogleDriveConnector(BaseConnector):
metadata={
"parents": meta.get("parents"),
"driveId": meta.get("driveId"),
"size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
"size": int(meta.get("size", 0))
if str(meta.get("size", "")).isdigit()
else None,
},
)
return doc
async def setup_subscription(self) -> str:
"""
Start a Google Drive Changes API watch (webhook).
@ -546,10 +640,14 @@ class GoogleDriveConnector(BaseConnector):
# 1) Ensure we are authenticated and have a live Drive service
ok = await self.authenticate()
if not ok:
raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated")
raise RuntimeError(
"GoogleDriveConnector.setup_subscription: not authenticated"
)
# 2) Resolve webhook address (no param in ABC, so pull from config/env)
webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv("GOOGLE_DRIVE_WEBHOOK_URL")
webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv(
"GOOGLE_DRIVE_WEBHOOK_URL"
)
if not webhook_address:
raise RuntimeError(
"GoogleDriveConnector.setup_subscription: webhook URL not configured. "
@ -600,7 +698,9 @@ class GoogleDriveConnector(BaseConnector):
}
if not isinstance(channel_id, str) or not channel_id:
raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}")
raise RuntimeError(
f"Drive watch returned invalid channel id: {channel_id!r}"
)
return channel_id
@ -665,13 +765,20 @@ class GoogleDriveConnector(BaseConnector):
return False
try:
self.service.channels().stop(body={"id": subscription_id, "resourceId": resource_id}).execute()
self.service.channels().stop(
body={"id": subscription_id, "resourceId": resource_id}
).execute()
# 4) Clear local bookkeeping
if getattr(self, "_active_channel", None) and self._active_channel.get("channel_id") == subscription_id:
if (
getattr(self, "_active_channel", None)
and self._active_channel.get("channel_id") == subscription_id
):
self._active_channel = {}
if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict):
if hasattr(self, "_subscriptions") and isinstance(
self._subscriptions, dict
):
self._subscriptions.pop(subscription_id, None)
return True
@ -682,7 +789,7 @@ class GoogleDriveConnector(BaseConnector):
except Exception:
pass
return False
async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]:
"""
Process a Google Drive Changes webhook.
@ -722,7 +829,9 @@ class GoogleDriveConnector(BaseConnector):
except Exception as e:
selected_ids = set()
try:
logger.error(f"handle_webhook: scope build failed, proceeding unfiltered: {e}")
logger.error(
f"handle_webhook: scope build failed, proceeding unfiltered: {e}"
)
except Exception:
pass
@ -759,7 +868,11 @@ class GoogleDriveConnector(BaseConnector):
# Filter to our selected scope if we have one; otherwise accept all
if selected_ids and (rid not in selected_ids):
# Shortcut target might be in scope even if the shortcut isn't
tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None
tgt = (
fobj.get("shortcutDetails", {}).get("targetId")
if fobj
else None
)
if not (tgt and tgt in selected_ids):
continue
@ -808,7 +921,9 @@ class GoogleDriveConnector(BaseConnector):
blob = self._download_file_bytes(meta)
except HttpError as e:
# Skip/record failures
logger.error(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}")
logger.error(
f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}"
)
continue
from datetime import datetime
@ -838,7 +953,9 @@ class GoogleDriveConnector(BaseConnector):
"webViewLink": meta.get("webViewLink"),
"parents": meta.get("parents"),
"driveId": meta.get("driveId"),
"size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
"size": int(meta.get("size", 0))
if str(meta.get("size", "")).isdigit()
else None,
},
content=blob,
)
@ -849,7 +966,9 @@ class GoogleDriveConnector(BaseConnector):
# -------------------------
def get_start_page_token(self) -> str:
# getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
resp = (
self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
)
return resp["startPageToken"]
def poll_changes_and_sync(self) -> Optional[str]:
@ -888,7 +1007,10 @@ class GoogleDriveConnector(BaseConnector):
# Match scope
if fid not in selected_ids:
# also consider shortcut target
if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut":
if (
file_obj.get("mimeType")
== "application/vnd.google-apps.shortcut"
):
tgt = file_obj.get("shortcutDetails", {}).get("targetId")
if tgt and tgt in selected_ids:
pass
@ -923,7 +1045,10 @@ class GoogleDriveConnector(BaseConnector):
modified_time=parse_datetime(resolved.get("modifiedTime")),
mimetype=str(resolved.get("mimeType", "")),
acl=DocumentACL(), # Set appropriate ACL if needed
metadata={"parents": resolved.get("parents"), "driveId": resolved.get("driveId")},
metadata={
"parents": resolved.get("parents"),
"driveId": resolved.get("driveId"),
},
content=blob,
)
self.emit(doc)
@ -945,7 +1070,9 @@ class GoogleDriveConnector(BaseConnector):
# -------------------------
# Optional: webhook stubs
# -------------------------
def build_watch_body(self, webhook_address: str, channel_id: Optional[str] = None) -> Dict[str, Any]:
def build_watch_body(
self, webhook_address: str, channel_id: Optional[str] = None
) -> Dict[str, Any]:
"""
Prepare the request body for changes.watch if you use webhooks.
"""
@ -964,7 +1091,7 @@ class GoogleDriveConnector(BaseConnector):
body = self.build_watch_body(webhook_address)
result = (
self.service.changes()
.watch(pageToken=page_token, body=body, **self._drives_flags)
.watch(pageToken=page_token, body=body, **self._drives_get_flags)
.execute()
)
return result
@ -974,7 +1101,9 @@ class GoogleDriveConnector(BaseConnector):
Stop a previously started webhook watch.
"""
try:
self.service.channels().stop(body={"id": channel_id, "resourceId": resource_id}).execute()
self.service.channels().stop(
body={"id": channel_id, "resourceId": resource_id}
).execute()
return True
except HttpError as e:

View file

@ -1,5 +1,3 @@
import os
import tempfile
from typing import Any, Dict, List, Optional
# Create custom processor for connector files using Langflow
@ -60,14 +58,14 @@ class LangflowConnectorService:
# Create temporary file from document content
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
# Write document content to temp file
with open(tmp_path, 'wb') as f:
with open(tmp_path, "wb") as f:
f.write(document.content)
# Step 1: Upload file to Langflow
logger.debug("Uploading file to Langflow", filename=document.filename)
content = document.content
file_tuple = (
document.filename.replace(" ", "_").replace("/", "_")+suffix,
document.filename.replace(" ", "_").replace("/", "_") + suffix,
content,
document.mimetype or "application/octet-stream",
)
@ -256,7 +254,10 @@ class LangflowConnectorService:
file_ids: List[str],
jwt_token: str = None,
) -> str:
"""Sync specific files by their IDs using Langflow processing"""
"""
Sync specific files by their IDs using Langflow processing.
Automatically expands folders to their contents.
"""
if not self.task_service:
raise ValueError(
"TaskService not available - connector sync requires task service dependency"
@ -279,10 +280,50 @@ class LangflowConnectorService:
owner_name = user.name if user else None
owner_email = user.email if user else None
# Temporarily set file_ids in the connector's config so list_files() can use them
# Store the original values to restore later
cfg = getattr(connector, "cfg", None)
original_file_ids = None
original_folder_ids = None
if cfg is not None:
original_file_ids = getattr(cfg, "file_ids", None)
original_folder_ids = getattr(cfg, "folder_ids", None)
try:
# Set the file_ids we want to sync in the connector's config
if cfg is not None:
cfg.file_ids = file_ids # type: ignore
cfg.folder_ids = None # type: ignore
# Get the expanded list of file IDs (folders will be expanded to their contents)
# This uses the connector's list_files() which calls _iter_selected_items()
result = await connector.list_files()
expanded_file_ids = [f["id"] for f in result.get("files", [])]
if not expanded_file_ids:
logger.warning(
f"No files found after expanding file_ids. "
f"Original IDs: {file_ids}. This may indicate all IDs were folders "
f"with no contents, or files that were filtered out."
)
# Return empty task rather than failing
raise ValueError("No files to sync after expanding folders")
except Exception as e:
logger.error(f"Failed to expand file_ids via list_files(): {e}")
# Fallback to original file_ids if expansion fails
expanded_file_ids = file_ids
finally:
# Restore original config values
if cfg is not None:
cfg.file_ids = original_file_ids # type: ignore
cfg.folder_ids = original_folder_ids # type: ignore
processor = LangflowConnectorFileProcessor(
self,
connection_id,
file_ids,
expanded_file_ids,
user_id,
jwt_token=jwt_token,
owner_name=owner_name,
@ -291,7 +332,7 @@ class LangflowConnectorService:
# Create custom task using TaskService
task_id = await self.task_service.create_custom_task(
user_id, file_ids, processor
user_id, expanded_file_ids, processor
)
return task_id

View file

@ -1,16 +1,11 @@
import tempfile
import os
from typing import Dict, Any, List, Optional
from typing import Any, Dict, List, Optional
from .base import BaseConnector, ConnectorDocument
from utils.logging_config import get_logger
logger = get_logger(__name__)
from .google_drive import GoogleDriveConnector
from .sharepoint import SharePointConnector
from .onedrive import OneDriveConnector
from .base import BaseConnector, ConnectorDocument
from .connection_manager import ConnectionManager
logger = get_logger(__name__)
@ -56,9 +51,11 @@ class ConnectorService:
# Create temporary file from document content
from utils.file_utils import auto_cleanup_tempfile
with auto_cleanup_tempfile(suffix=self._get_file_extension(document.mimetype)) as tmp_path:
with auto_cleanup_tempfile(
suffix=self._get_file_extension(document.mimetype)
) as tmp_path:
# Write document content to temp file
with open(tmp_path, 'wb') as f:
with open(tmp_path, "wb") as f:
f.write(document.content)
# Use existing process_file_common function with connector document metadata
@ -71,6 +68,7 @@ class ConnectorService:
# Process using consolidated processing pipeline
from models.processors import TaskProcessor
processor = TaskProcessor(document_service=doc_service)
result = await processor.process_document_standard(
file_path=tmp_path,
@ -301,7 +299,10 @@ class ConnectorService:
file_ids: List[str],
jwt_token: str = None,
) -> str:
"""Sync specific files by their IDs (used for webhook-triggered syncs)"""
"""
Sync specific files by their IDs (used for webhook-triggered syncs or manual selection).
Automatically expands folders to their contents.
"""
if not self.task_service:
raise ValueError(
"TaskService not available - connector sync requires task service dependency"
@ -324,14 +325,53 @@ class ConnectorService:
owner_name = user.name if user else None
owner_email = user.email if user else None
# Temporarily set file_ids in the connector's config so list_files() can use them
# Store the original values to restore later
original_file_ids = None
original_folder_ids = None
if hasattr(connector, "cfg"):
original_file_ids = getattr(connector.cfg, "file_ids", None)
original_folder_ids = getattr(connector.cfg, "folder_ids", None)
try:
# Set the file_ids we want to sync in the connector's config
if hasattr(connector, "cfg"):
connector.cfg.file_ids = file_ids # type: ignore
connector.cfg.folder_ids = None # type: ignore
# Get the expanded list of file IDs (folders will be expanded to their contents)
# This uses the connector's list_files() which calls _iter_selected_items()
result = await connector.list_files()
expanded_file_ids = [f["id"] for f in result.get("files", [])]
if not expanded_file_ids:
logger.warning(
f"No files found after expanding file_ids. "
f"Original IDs: {file_ids}. This may indicate all IDs were folders "
f"with no contents, or files that were filtered out."
)
# Return empty task rather than failing
raise ValueError("No files to sync after expanding folders")
except Exception as e:
logger.error(f"Failed to expand file_ids via list_files(): {e}")
# Fallback to original file_ids if expansion fails
expanded_file_ids = file_ids
finally:
# Restore original config values
if hasattr(connector, "cfg"):
connector.cfg.file_ids = original_file_ids # type: ignore
connector.cfg.folder_ids = original_folder_ids # type: ignore
# Create custom processor for specific connector files
from models.processors import ConnectorFileProcessor
# We'll pass file_ids as the files_info, the processor will handle ID-only files
# Use expanded_file_ids which has folders already expanded
processor = ConnectorFileProcessor(
self,
connection_id,
file_ids,
expanded_file_ids,
user_id,
jwt_token=jwt_token,
owner_name=owner_name,
@ -340,7 +380,7 @@ class ConnectorService:
# Create custom task using TaskService
task_id = await self.task_service.create_custom_task(
user_id, file_ids, processor
user_id, expanded_file_ids, processor
)
return task_id

View file

@ -131,7 +131,7 @@ async def configure_alerting_security():
# Don't fail startup if alerting config fails
async def _ensure_opensearch_index(self):
async def _ensure_opensearch_index():
"""Ensure OpenSearch index exists when using traditional connector service."""
try:
# Check if index already exists
@ -242,6 +242,9 @@ def generate_jwt_keys():
capture_output=True,
)
# Set restrictive permissions on private key (readable by owner only)
os.chmod(private_key_path, 0o600)
# Generate public key
subprocess.run(
[
@ -257,12 +260,21 @@ def generate_jwt_keys():
capture_output=True,
)
# Set permissions on public key (readable by all)
os.chmod(public_key_path, 0o644)
logger.info("Generated RSA keys for JWT signing")
except subprocess.CalledProcessError as e:
logger.error("Failed to generate RSA keys", error=str(e))
raise
else:
logger.info("RSA keys already exist, skipping generation")
# Ensure correct permissions on existing keys
try:
os.chmod(private_key_path, 0o600)
os.chmod(public_key_path, 0o644)
logger.info("RSA keys already exist, ensured correct permissions")
except OSError as e:
logger.warning("Failed to set permissions on existing keys", error=str(e))
async def init_index_when_ready():

View file

@ -296,11 +296,16 @@ class AuthService:
try:
if self.langflow_mcp_service and isinstance(jwt_token, str) and jwt_token.strip():
global_vars = {"JWT": jwt_token}
global_vars["CONNECTOR_TYPE_URL"] = "url"
if user_info:
if user_info.get("id"):
global_vars["OWNER"] = user_info.get("id")
if user_info.get("name"):
global_vars["OWNER_NAME"] = user_info.get("name")
# OWNER_NAME may contain spaces, which can cause issues in headers.
# Alternative: URL-encode the owner name to preserve spaces and special characters.
owner_name = user_info.get("name")
if owner_name:
global_vars["OWNER_NAME"] = str(f"\"{owner_name}\"")
if user_info.get("email"):
global_vars["OWNER_EMAIL"] = user_info.get("email")

View file

@ -126,7 +126,11 @@ class DocumentService:
from utils.file_utils import auto_cleanup_tempfile
import os
with auto_cleanup_tempfile() as tmp_path:
# Preserve file extension for docling format detection
filename = upload_file.filename or "uploaded"
suffix = os.path.splitext(filename)[1] or ""
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
# Stream upload file to temporary file
file_size = 0
with open(tmp_path, 'wb') as tmp_file:

View file

@ -242,6 +242,35 @@ class ModelsService:
headers["Authorization"] = f"Bearer {api_key}"
if project_id:
headers["Project-ID"] = project_id
# Validate credentials with a minimal completion request
async with httpx.AsyncClient() as client:
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
validation_params = {"version": "2024-09-16"}
validation_payload = {
"input": "test",
"model_id": "ibm/granite-3-2b-instruct",
"project_id": project_id,
"parameters": {
"max_new_tokens": 1,
},
}
validation_response = await client.post(
validation_url,
headers=headers,
params=validation_params,
json=validation_payload,
timeout=10.0,
)
if validation_response.status_code != 200:
raise Exception(
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
)
logger.info("IBM Watson credentials validated successfully")
# Fetch foundation models using the correct endpoint
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"

View file

@ -1 +1,8 @@
"""OpenRAG Terminal User Interface package."""
from importlib.metadata import version
try:
__version__ = version("openrag")
except Exception:
__version__ = "unknown"

View file

@ -1,121 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
# context: .
# dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME=OpenRAG
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../../../docker-compose-cpu.yml

View file

@ -1,121 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
gpus: all
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME="OpenRAG"
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../../../docker-compose.yml

View file

@ -0,0 +1 @@
../../../../documents/2506.08231v1.pdf

View file

@ -0,0 +1 @@
../../../../documents/ai-human-resources.pdf

View file

@ -0,0 +1 @@
../../../../documents/warmup_ocr.pdf

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm_text.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm_text.json

View file

@ -0,0 +1 @@
../../../../flows/ingestion_flow.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_agent.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_ingest_docling.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_nudges.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_url_mcp.json

View file

@ -2,6 +2,7 @@
import sys
from pathlib import Path
from typing import Iterable, Optional
from textual.app import App, ComposeResult
from utils.logging_config import get_logger
try:
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
return True, "Runtime requirements satisfied"
def copy_sample_documents():
def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
"""Copy packaged assets into destination and optionally overwrite existing files.
When ``force`` is True, files are refreshed if the packaged bytes differ.
"""
destination.mkdir(parents=True, exist_ok=True)
for resource in resource_tree.iterdir():
target_path = destination / resource.name
if resource.is_dir():
_copy_assets(resource, target_path, allowed_suffixes, force=force)
continue
if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
continue
resource_bytes = resource.read_bytes()
if target_path.exists():
if not force:
continue
try:
if target_path.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
target_path.write_bytes(resource_bytes)
logger.info(f"Copied bundled asset: {target_path}")
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("documents")
# Check if documents directory already exists and has files
if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
return # Documents already exist, don't overwrite
try:
# Get sample documents from package assets
assets_files = files("tui._assets.documents")
# Create documents directory if it doesn't exist
documents_dir.mkdir(exist_ok=True)
# Copy each sample document
for resource in assets_files.iterdir():
if resource.is_file() and resource.name.endswith('.pdf'):
dest_path = documents_dir / resource.name
if not dest_path.exists():
content = resource.read_bytes()
dest_path.write_bytes(content)
logger.info(f"Copied sample document: {resource.name}")
_copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample documents: {e}")
# This is not a critical error - the app can work without sample documents
def copy_sample_flows(*, force: bool = False) -> None:
"""Copy sample flows from package to current directory if they don't exist."""
flows_dir = Path("flows")
try:
assets_files = files("tui._assets.flows")
_copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample flows: {e}")
# The app can proceed without bundled flows
def copy_compose_files(*, force: bool = False) -> None:
"""Copy docker-compose templates into the workspace if they are missing."""
try:
assets_root = files("tui._assets")
except Exception as e:
logger.debug(f"Could not access compose assets: {e}")
return
for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
destination = Path(filename)
if destination.exists() and not force:
continue
try:
resource = assets_root.joinpath(filename)
if not resource.is_file():
logger.debug(f"Compose template not found in assets: {filename}")
continue
resource_bytes = resource.read_bytes()
if destination.exists():
try:
if destination.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
destination.write_bytes(resource_bytes)
logger.info(f"Copied docker-compose template: {filename}")
except Exception as error:
logger.debug(f"Could not copy compose file {filename}: {error}")
def run_tui():
"""Run the OpenRAG TUI application."""
app = None
try:
# Copy sample documents on first run
copy_sample_documents()
# Keep bundled assets aligned with the packaged versions
copy_sample_documents(force=True)
copy_sample_flows(force=True)
copy_compose_files(force=True)
app = OpenRAGTUI()
app.run()

View file

@ -8,7 +8,6 @@ import threading
import time
from typing import Optional, Tuple, Dict, Any, List, AsyncIterator
from utils.logging_config import get_logger
from utils.container_utils import guess_host_ip_for_containers
logger = get_logger(__name__)
@ -32,7 +31,8 @@ class DoclingManager:
self._process: Optional[subprocess.Popen] = None
self._port = 5001
self._host = guess_host_ip_for_containers(logger=logger) # Get appropriate host IP based on runtime
# Bind to all interfaces by default (can be overridden with DOCLING_BIND_HOST env var)
self._host = os.getenv('DOCLING_BIND_HOST', '0.0.0.0')
self._running = False
self._external_process = False
@ -150,16 +150,20 @@ class DoclingManager:
else:
pid = self._load_pid()
# Use localhost for display URLs when bound to 0.0.0.0
display_host = "localhost" if self._host == "0.0.0.0" else self._host
return {
"status": "running",
"port": self._port,
"host": self._host,
"endpoint": f"http://{self._host}:{self._port}",
"docs_url": f"http://{self._host}:{self._port}/docs",
"ui_url": f"http://{self._host}:{self._port}/ui",
"endpoint": f"http://{display_host}:{self._port}",
"docs_url": f"http://{display_host}:{self._port}/docs",
"ui_url": f"http://{display_host}:{self._port}/ui",
"pid": pid
}
else:
display_host = "localhost" if self._host == "0.0.0.0" else self._host
return {
"status": "stopped",
"port": self._port,
@ -176,10 +180,9 @@ class DoclingManager:
return False, "Docling serve is already running"
self._port = port
# Use provided host or the bridge IP we detected in __init__
# Use provided host or keep default from __init__
if host is not None:
self._host = host
# else: keep self._host as already set in __init__
# Check if port is already in use before trying to start
import socket
@ -293,7 +296,8 @@ class DoclingManager:
self._running = False
return False, f"Docling serve process exited immediately (code: {return_code})"
return True, f"Docling serve starting on http://{host}:{port}"
display_host = "localhost" if self._host == "0.0.0.0" else self._host
return True, f"Docling serve starting on http://{display_host}:{port}"
except FileNotFoundError:
return False, "docling-serve not available. Please install: uv add docling-serve"
@ -454,7 +458,8 @@ class DoclingManager:
async def follow_logs(self) -> AsyncIterator[str]:
"""Follow logs from the docling-serve process in real-time."""
# First yield status message and any existing logs
status_msg = f"Docling serve is running on http://{self._host}:{self._port}"
display_host = "localhost" if self._host == "0.0.0.0" else self._host
status_msg = f"Docling serve is running on http://{display_host}:{self._port}"
with self._log_lock:
if self._log_buffer:

View file

@ -10,6 +10,7 @@ from rich.text import Text
from rich.align import Align
from dotenv import load_dotenv
from .. import __version__
from ..managers.container_manager import ContainerManager, ServiceStatus
from ..managers.env_manager import EnvManager
from ..managers.docling_manager import DoclingManager
@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
"""
welcome_text.append(ascii_art, style="bold white")
welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
# Check if all services are running
all_services_running = self.services_running and self.docling_running

View file

@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
import logging
import re
import shutil
import socket
import subprocess
log = logger or logging.getLogger(__name__)
def can_bind_to_address(ip_addr: str) -> bool:
"""Test if we can bind to the given IP address."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
return True
except (OSError, socket.error) as e:
log.debug("Cannot bind to %s: %s", ip_addr, e)
return False
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
"Container-reachable host IP candidates: %s",
", ".join(ordered_candidates),
)
else:
log.info("Container-reachable host IP: %s", ordered_candidates[0])
return ordered_candidates[0]
# Try each candidate and return the first one we can bind to
for ip_addr in ordered_candidates:
if can_bind_to_address(ip_addr):
if len(ordered_candidates) > 1:
log.info("Selected bindable host IP: %s", ip_addr)
else:
log.info("Container-reachable host IP: %s", ip_addr)
return ip_addr
log.debug("Skipping %s (cannot bind)", ip_addr)
# None of the candidates were bindable, fall back to 127.0.0.1
log.warning(
"None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
", ".join(ordered_candidates),
)
return "127.0.0.1"
log.warning(
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."

1
tests/__init__.py Normal file
View file

@ -0,0 +1 @@
# Test package

85
tests/conftest.py Normal file
View file

@ -0,0 +1,85 @@
import asyncio
import os
import tempfile
from pathlib import Path
import pytest
import pytest_asyncio
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Force no-auth mode for testing by setting OAuth credentials to empty strings
# This ensures anonymous JWT tokens are created automatically
os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
from src.config.settings import clients
from src.session_manager import SessionManager
from src.main import generate_jwt_keys
@pytest.fixture(scope="session")
def event_loop():
"""Create an instance of the default event loop for the test session."""
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
@pytest_asyncio.fixture
async def opensearch_client():
"""OpenSearch client for testing - requires running OpenSearch."""
await clients.initialize()
yield clients.opensearch
# Cleanup test indices after tests
try:
await clients.opensearch.indices.delete(index="test_documents")
except Exception:
pass
@pytest.fixture
def session_manager():
"""Session manager for testing."""
# Generate RSA keys before creating SessionManager
generate_jwt_keys()
sm = SessionManager("test-secret-key")
print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
return sm
@pytest.fixture
def test_documents_dir():
"""Create a temporary directory with test documents."""
with tempfile.TemporaryDirectory() as temp_dir:
test_dir = Path(temp_dir)
# Create some test files in supported formats
(test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
(test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
(test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
# Create subdirectory with files
sub_dir = test_dir / "subdir"
sub_dir.mkdir()
(sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
yield test_dir
@pytest.fixture
def test_single_file():
"""Create a single test file."""
with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
temp_path = f.name
yield temp_path
# Cleanup
try:
os.unlink(temp_path)
except FileNotFoundError:
pass

View file

@ -0,0 +1 @@
# Integration tests package

View file

@ -0,0 +1,296 @@
import asyncio
import os
from pathlib import Path
import httpx
import pytest
async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
"""Poll existing endpoints until the app and OpenSearch are ready.
Strategy:
- GET /auth/me should return 200 immediately (confirms app is up).
- POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
"""
# First test OpenSearch JWT directly
from src.session_manager import SessionManager, AnonymousUser
import os
import hashlib
import jwt as jwt_lib
sm = SessionManager("test")
test_token = sm.create_jwt_token(AnonymousUser())
token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
with open(sm.public_key_path, 'rb') as f:
pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
print(f"[DEBUG] Public key hash: {pub_key_hash}")
# Decode token to see claims
decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
# Test OpenSearch JWT auth directly
opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
async with httpx.AsyncClient(verify=False) as os_client:
r_os = await os_client.post(
f"{opensearch_url}/documents/_search",
headers={"Authorization": f"Bearer {test_token}"},
json={"query": {"match_all": {}}, "size": 0}
)
print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
if r_os.status_code == 401:
print(f"[DEBUG] ❌ OpenSearch rejected JWT! OIDC config not working.")
else:
print(f"[DEBUG] ✓ OpenSearch accepted JWT!")
deadline = asyncio.get_event_loop().time() + timeout_s
last_err = None
while asyncio.get_event_loop().time() < deadline:
try:
r1 = await client.get("/auth/me")
print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
if r1.status_code in (401, 403):
raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
if r1.status_code != 200:
await asyncio.sleep(0.5)
continue
# match_all readiness probe; no embeddings
r2 = await client.post("/search", json={"query": "*", "limit": 0})
print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
if r2.status_code in (401, 403):
print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
if r2.status_code == 200:
print("[DEBUG] Service ready!")
return
last_err = r2.text
except AssertionError:
raise
except Exception as e:
last_err = str(e)
print(f"[DEBUG] Exception during readiness check: {e}")
await asyncio.sleep(0.5)
raise AssertionError(f"Service not ready in time: {last_err}")
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
"""Boot the ASGI app and exercise /upload and /search endpoints."""
# Ensure we route uploads to traditional processor and disable startup ingest
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
os.environ["DISABLE_STARTUP_INGEST"] = "true"
# Force no-auth mode so endpoints bypass authentication
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
# Import after env vars to ensure settings pick them up. Clear cached modules
import sys
# Clear cached modules so settings pick up env and router sees new flag
for mod in [
"src.api.router",
"api.router", # Also clear the non-src path
"src.api.connector_router",
"api.connector_router",
"src.config.settings",
"config.settings",
"src.auth_middleware",
"auth_middleware",
"src.main",
"api", # Clear the api package itself
"src.api",
"services", # Clear services that import clients
"src.services",
"services.search_service",
"src.services.search_service",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
import src.api.router as upload_router
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
# Wait for deletion to complete
await asyncio.sleep(1)
except Exception:
pass
app = await create_app()
# Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
# Verify index is truly empty after startup
try:
count_response = await clients.opensearch.count(index=INDEX_NAME)
doc_count = count_response.get('count', 0)
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
except Exception as e:
# If count fails, the index might not exist yet, which is fine
pass
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
# Wait for app + OpenSearch readiness using existing endpoints
await wait_for_service_ready(client)
# Create a temporary markdown file to upload
file_path = tmp_path / "endpoint_test_doc.md"
file_text = (
"# Single Test Document\n\n"
"This is a test document about OpenRAG testing framework. "
"The content should be indexed and searchable in OpenSearch after processing."
)
file_path.write_text(file_text)
# POST via router (multipart)
files = {
"file": (
file_path.name,
file_path.read_bytes(),
"text/markdown",
)
}
upload_resp = await client.post("/upload", files=files)
body = upload_resp.json()
assert upload_resp.status_code == 201, upload_resp.text
assert body.get("status") in {"indexed", "unchanged"}
assert isinstance(body.get("id"), str)
# Poll search for the specific content until it's indexed
async def _wait_for_indexed(timeout_s: float = 30.0):
deadline = asyncio.get_event_loop().time() + timeout_s
while asyncio.get_event_loop().time() < deadline:
resp = await client.post(
"/search",
json={"query": "OpenRAG testing framework", "limit": 5},
)
if resp.status_code == 200 and resp.json().get("results"):
return resp
await asyncio.sleep(0.5)
return resp
search_resp = await _wait_for_indexed()
# POST /search
assert search_resp.status_code == 200, search_resp.text
search_body = search_resp.json()
# Basic shape and at least one hit
assert isinstance(search_body.get("results"), list)
assert len(search_body["results"]) >= 0
# When hits exist, confirm our phrase is present in top result content
if search_body["results"]:
top = search_body["results"][0]
assert "text" in top or "content" in top
text = top.get("text") or top.get("content")
assert isinstance(text, str)
assert "testing" in text.lower()
finally:
# Explicitly close global clients to avoid aiohttp warnings
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
"""Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
os.environ["DISABLE_STARTUP_INGEST"] = "true"
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
import sys
for mod in [
"src.api.router",
"api.router", # Also clear the non-src path
"src.api.connector_router",
"api.connector_router",
"src.config.settings",
"config.settings",
"src.auth_middleware",
"auth_middleware",
"src.main",
"api", # Clear the api package itself
"src.api",
"services", # Clear services that import clients
"src.services",
"services.search_service",
"src.services.search_service",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
import src.api.router as upload_router
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
# Wait for deletion to complete
await asyncio.sleep(1)
except Exception:
pass
app = await create_app()
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
# Verify index is truly empty after startup
try:
count_response = await clients.opensearch.count(index=INDEX_NAME)
doc_count = count_response.get('count', 0)
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
except Exception as e:
# If count fails, the index might not exist yet, which is fine
pass
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
await wait_for_service_ready(client)
file_path = tmp_path / "router_test_doc.md"
file_path.write_text("# Router Test\n\nThis file validates the upload router.")
files = {
"file": (
file_path.name,
file_path.read_bytes(),
"text/markdown",
)
}
resp = await client.post("/router/upload_ingest", files=files)
data = resp.json()
print(f"data: {data}")
if disable_langflow_ingest:
assert resp.status_code == 201 or resp.status_code == 202, resp.text
assert data.get("status") in {"indexed", "unchanged"}
assert isinstance(data.get("id"), str)
else:
assert resp.status_code == 201 or resp.status_code == 202, resp.text
assert isinstance(data.get("task_id"), str)
assert data.get("file_count") == 1
finally:
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass

View file

@ -0,0 +1,118 @@
import asyncio
import os
from pathlib import Path
import httpx
import pytest
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
deadline = asyncio.get_event_loop().time() + timeout_s
last_err = None
while asyncio.get_event_loop().time() < deadline:
try:
r1 = await client.get("/auth/me")
if r1.status_code != 200:
await asyncio.sleep(0.5)
continue
r2 = await client.post("/search", json={"query": "*", "limit": 0})
if r2.status_code == 200:
return
last_err = r2.text
except Exception as e:
last_err = str(e)
await asyncio.sleep(0.5)
raise AssertionError(f"Service not ready in time: {last_err}")
def count_files_in_documents() -> int:
base_dir = Path(os.getcwd()) / "documents"
if not base_dir.is_dir():
return 0
return sum(1 for _ in base_dir.rglob("*") if _.is_file())
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
# Ensure startup ingest runs and choose pipeline per param
os.environ["DISABLE_STARTUP_INGEST"] = "false"
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
"true" if disable_langflow_ingest else "false"
)
# Force no-auth mode for simpler endpoint access
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
# Reload settings to pick up env for this test run
import sys
for mod in [
"src.api.router",
"src.api.connector_router",
"src.config.settings",
"src.auth_middleware",
"src.main",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
from src.config.settings import clients, INDEX_NAME
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
except Exception:
pass
app = await create_app()
# Trigger startup tasks explicitly
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
await wait_for_ready(client)
expected_files = count_files_in_documents()
# Poll /tasks until we see at least one startup ingest task
async def _wait_for_task(timeout_s: float = 60.0):
deadline = asyncio.get_event_loop().time() + timeout_s
last = None
while asyncio.get_event_loop().time() < deadline:
resp = await client.get("/tasks")
if resp.status_code == 200:
data = resp.json()
last = data
tasks = data.get("tasks") if isinstance(data, dict) else None
if isinstance(tasks, list) and len(tasks) > 0:
return tasks
await asyncio.sleep(0.5)
return last.get("tasks") if isinstance(last, dict) else last
tasks = await _wait_for_task()
if expected_files == 0:
return # Nothing to do
if not (isinstance(tasks, list) and len(tasks) > 0):
# Fallback: verify that documents were indexed as a sign of startup ingest
sr = await client.post("/search", json={"query": "*", "limit": 1})
assert sr.status_code == 200, sr.text
total = sr.json().get("total")
assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
return
newest = tasks[0]
assert "task_id" in newest
assert newest.get("total_files") == expected_files
finally:
# Explicitly close global clients to avoid aiohttp warnings
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass

162
uv.lock generated
View file

@ -2,10 +2,10 @@ version = 1
revision = 2
requires-python = ">=3.13"
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
[[package]]
@ -291,8 +291,8 @@ name = "click"
version = "8.2.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -312,6 +312,67 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "coverage"
version = "7.10.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
{ url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
{ url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
{ url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
{ url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
{ url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
{ url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
{ url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
{ url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
{ url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
{ url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
{ url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
{ url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
{ url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
{ url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
{ url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
{ url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
{ url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
{ url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
{ url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
{ url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
{ url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
{ url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
{ url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
{ url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
{ url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
{ url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
{ url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
{ url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
{ url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
{ url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
{ url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
{ url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
{ url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
{ url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
{ url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
{ url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
{ url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
{ url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
{ url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
{ url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
{ url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
{ url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
{ url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
{ url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
{ url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
{ url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
{ url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
{ url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
{ url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
{ url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
{ url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
{ url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
]
[[package]]
name = "cramjam"
version = "2.11.0"
@ -454,8 +515,8 @@ name = "dill"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
@ -619,8 +680,8 @@ name = "docling-mcp"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -943,8 +1004,8 @@ name = "fsspec"
version = "2025.5.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
@ -1264,8 +1325,8 @@ name = "huggingface-hub"
version = "0.33.2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -1339,6 +1400,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
]
[[package]]
name = "jinja2"
version = "3.1.6"
@ -1960,8 +2030,8 @@ name = "multiprocess"
version = "0.70.18"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -2282,7 +2352,7 @@ wheels = [
[[package]]
name = "openrag"
version = "0.1.14.dev3"
version = "0.1.19"
source = { editable = "." }
dependencies = [
{ name = "agentd" },
@ -2312,6 +2382,14 @@ dependencies = [
{ name = "uvicorn" },
]
[package.dev-dependencies]
dev = [
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "pytest-mock" },
]
[package.metadata]
requires-dist = [
{ name = "agentd", specifier = ">=0.2.2" },
@ -2341,6 +2419,14 @@ requires-dist = [
{ name = "uvicorn", specifier = ">=0.35.0" },
]
[package.metadata.requires-dev]
dev = [
{ name = "pytest", specifier = ">=8" },
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
{ name = "pytest-cov", specifier = ">=4.0.0" },
{ name = "pytest-mock", specifier = ">=3.12.0" },
]
[[package]]
name = "opensearch-py"
version = "3.0.0"
@ -2836,6 +2922,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
]
[[package]]
name = "pytest"
version = "8.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
]
[[package]]
name = "pytest-asyncio"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
]
[[package]]
name = "pytest-cov"
version = "7.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "coverage" },
{ name = "pluggy" },
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
]
[[package]]
name = "pytest-mock"
version = "3.15.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
]
[[package]]
name = "python-bidi"
version = "0.6.6"
@ -3622,9 +3762,9 @@ name = "torch"
version = "2.8.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
dependencies = [
{ name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
@ -3669,9 +3809,9 @@ name = "torchvision"
version = "0.23.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
dependencies = [
{ name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },