Merge branch 'main' into fix-main
This commit is contained in:
commit
101c83ad36
77 changed files with 2225 additions and 793 deletions
BIN
.DS_Store
vendored
BIN
.DS_Store
vendored
Binary file not shown.
|
|
@ -37,6 +37,9 @@ AWS_SECRET_ACCESS_KEY=
|
||||||
# OPTIONAL url for openrag link to langflow in the UI
|
# OPTIONAL url for openrag link to langflow in the UI
|
||||||
LANGFLOW_PUBLIC_URL=
|
LANGFLOW_PUBLIC_URL=
|
||||||
|
|
||||||
|
# OPTIONAL: Override host for docling service (for special networking setups)
|
||||||
|
# HOST_DOCKER_INTERNAL=host.containers.internal
|
||||||
|
|
||||||
# Langflow auth
|
# Langflow auth
|
||||||
LANGFLOW_AUTO_LOGIN=False
|
LANGFLOW_AUTO_LOGIN=False
|
||||||
LANGFLOW_SUPERUSER=
|
LANGFLOW_SUPERUSER=
|
||||||
|
|
|
||||||
59
.github/workflows/build-langflow-responses.yml
vendored
59
.github/workflows/build-langflow-responses.yml
vendored
|
|
@ -1,59 +0,0 @@
|
||||||
name: Build Langflow Responses Multi-Arch
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- platform: linux/amd64
|
|
||||||
arch: amd64
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
- platform: linux/arm64
|
|
||||||
arch: arm64
|
|
||||||
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
|
|
||||||
|
|
||||||
runs-on: ${{ matrix.runs-on }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Login to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKER_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Build and push langflow (${{ matrix.arch }})
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile.langflow
|
|
||||||
platforms: ${{ matrix.platform }}
|
|
||||||
push: true
|
|
||||||
tags: phact/langflow:responses-${{ matrix.arch }}
|
|
||||||
cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
|
|
||||||
cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
|
|
||||||
|
|
||||||
manifest:
|
|
||||||
needs: build
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Login to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKER_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Create and push multi-arch manifest
|
|
||||||
run: |
|
|
||||||
docker buildx imagetools create -t phact/langflow:responses \
|
|
||||||
phact/langflow:responses-amd64 \
|
|
||||||
phact/langflow:responses-arm64
|
|
||||||
103
.github/workflows/build-multiarch.yml
vendored
103
.github/workflows/build-multiarch.yml
vendored
|
|
@ -1,16 +1,95 @@
|
||||||
name: Build Multi-Architecture Docker Images
|
name: Release + Docker Images (multi-arch)
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- 'pyproject.toml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
|
||||||
update_latest:
|
|
||||||
description: 'Update latest tags (production release)'
|
|
||||||
required: false
|
|
||||||
default: false
|
|
||||||
type: boolean
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
build-python-packages:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
skip_release: ${{ steps.version.outputs.skip_release }}
|
||||||
|
version: ${{ steps.version.outputs.version }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.13'
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
|
||||||
|
- name: Extract version from pyproject.toml
|
||||||
|
id: version
|
||||||
|
run: |
|
||||||
|
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
|
||||||
|
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||||
|
echo "Version: $VERSION"
|
||||||
|
|
||||||
|
# Check if tag already exists
|
||||||
|
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
|
||||||
|
echo "Tag v$VERSION already exists, skipping release"
|
||||||
|
echo "skip_release=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo "skip_release=false" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
# Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
|
||||||
|
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
|
||||||
|
echo "is_prerelease=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "Release type: Production"
|
||||||
|
else
|
||||||
|
echo "is_prerelease=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "Release type: Prerelease"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Build wheel and source distribution
|
||||||
|
if: steps.version.outputs.skip_release != 'true'
|
||||||
|
run: |
|
||||||
|
uv build
|
||||||
|
|
||||||
|
- name: List built artifacts
|
||||||
|
if: steps.version.outputs.skip_release != 'true'
|
||||||
|
run: |
|
||||||
|
ls -la dist/
|
||||||
|
echo "Built artifacts:"
|
||||||
|
for file in dist/*; do
|
||||||
|
echo " - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Upload build artifacts
|
||||||
|
if: steps.version.outputs.skip_release != 'true'
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: python-packages
|
||||||
|
path: dist/
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Create Release
|
||||||
|
if: steps.version.outputs.skip_release != 'true'
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
with:
|
||||||
|
tag_name: v${{ steps.version.outputs.version }}
|
||||||
|
name: Release ${{ steps.version.outputs.version }}
|
||||||
|
draft: false
|
||||||
|
prerelease: ${{ steps.version.outputs.is_prerelease }}
|
||||||
|
generate_release_notes: true
|
||||||
|
files: |
|
||||||
|
dist/*.whl
|
||||||
|
dist/*.tar.gz
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
build:
|
build:
|
||||||
|
needs: build-python-packages
|
||||||
|
if: needs.build-python-packages.outputs.skip_release != 'true'
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
|
@ -106,9 +185,9 @@ jobs:
|
||||||
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
|
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
|
||||||
|
|
||||||
manifest:
|
manifest:
|
||||||
needs: build
|
needs: [build, build-python-packages]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
@ -146,8 +225,8 @@ jobs:
|
||||||
phact/openrag-opensearch:$VERSION-amd64 \
|
phact/openrag-opensearch:$VERSION-amd64 \
|
||||||
phact/openrag-opensearch:$VERSION-arm64
|
phact/openrag-opensearch:$VERSION-arm64
|
||||||
|
|
||||||
# Only update latest tags if version is numeric AND checkbox is checked
|
# Only update latest tags if version is numeric
|
||||||
if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
|
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
|
||||||
echo "Updating latest tags for production release: $VERSION"
|
echo "Updating latest tags for production release: $VERSION"
|
||||||
docker buildx imagetools create -t phact/openrag-backend:latest \
|
docker buildx imagetools create -t phact/openrag-backend:latest \
|
||||||
phact/openrag-backend:$VERSION-amd64 \
|
phact/openrag-backend:$VERSION-amd64 \
|
||||||
|
|
@ -165,5 +244,5 @@ jobs:
|
||||||
phact/openrag-opensearch:$VERSION-amd64 \
|
phact/openrag-opensearch:$VERSION-amd64 \
|
||||||
phact/openrag-opensearch:$VERSION-arm64
|
phact/openrag-opensearch:$VERSION-arm64
|
||||||
else
|
else
|
||||||
echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
|
echo "Skipping latest tags - version: $VERSION (not numeric)"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
54
.github/workflows/test-integration.yml
vendored
Normal file
54
.github/workflows/test-integration.yml
vendored
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
name: Integration Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
tests:
|
||||||
|
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb]
|
||||||
|
env:
|
||||||
|
# Prefer repository/environment variable first, then secret, then a sane fallback
|
||||||
|
OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }}
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- run: df -h
|
||||||
|
#- name: "node-cleanup"
|
||||||
|
#run: |
|
||||||
|
# sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
|
||||||
|
# sudo docker image prune --all --force
|
||||||
|
# sudo docker builder prune -a
|
||||||
|
- run: df -h
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up UV
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
version: latest
|
||||||
|
|
||||||
|
- name: Python version
|
||||||
|
run: uv python install 3.13
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: uv sync
|
||||||
|
|
||||||
|
- name: Run integration tests
|
||||||
|
env:
|
||||||
|
OPENSEARCH_HOST: localhost
|
||||||
|
OPENSEARCH_PORT: 9200
|
||||||
|
OPENSEARCH_USERNAME: admin
|
||||||
|
OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }}
|
||||||
|
LOG_LEVEL: DEBUG
|
||||||
|
# Force no-auth mode so tests bypass OAuth
|
||||||
|
GOOGLE_OAUTH_CLIENT_ID: ""
|
||||||
|
GOOGLE_OAUTH_CLIENT_SECRET: ""
|
||||||
|
# Disable startup ingest noise unless a test enables it
|
||||||
|
DISABLE_STARTUP_INGEST: "true"
|
||||||
|
run: |
|
||||||
|
make test-ci
|
||||||
|
echo "Keys directory after tests:"
|
||||||
|
ls -la keys/ || echo "No keys directory"
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -18,6 +18,8 @@ wheels/
|
||||||
1001*.pdf
|
1001*.pdf
|
||||||
*.json
|
*.json
|
||||||
!flows/*.json
|
!flows/*.json
|
||||||
|
!src/tui/_assets/flows/*.json
|
||||||
|
!src/tui/_assets/flows/components/*.json
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
config/
|
config/
|
||||||
|
|
|
||||||
|
|
@ -11,20 +11,48 @@ Thank you for your interest in contributing to OpenRAG! This guide will help you
|
||||||
- Python 3.13+ with uv package manager
|
- Python 3.13+ with uv package manager
|
||||||
- Node.js 18+ and npm
|
- Node.js 18+ and npm
|
||||||
|
|
||||||
### Environment Setup
|
### Set up OpenRAG for development
|
||||||
|
|
||||||
|
1. Set up your development environment.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clone the repository
|
# Clone and setup environment
|
||||||
git clone <repository-url>
|
git clone https://github.com/langflow-ai/openrag.git
|
||||||
cd openrag
|
cd openrag
|
||||||
|
|
||||||
# Setup development environment
|
|
||||||
make setup # Creates .env and installs dependencies
|
make setup # Creates .env and installs dependencies
|
||||||
```
|
```
|
||||||
|
|
||||||
### Configuration
|
2. Configure the `.env` file with your API keys and credentials.
|
||||||
|
|
||||||
Edit `.env` with your API keys and credentials. See the main README for required environment variables.
|
```bash
|
||||||
|
# Required
|
||||||
|
OPENAI_API_KEY=your_openai_api_key
|
||||||
|
OPENSEARCH_PASSWORD=your_secure_password
|
||||||
|
LANGFLOW_SUPERUSER=admin
|
||||||
|
LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
|
||||||
|
LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
|
||||||
|
LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
|
||||||
|
NUDGES_FLOW_ID=your_nudges_flow_id
|
||||||
|
```
|
||||||
|
|
||||||
|
For extended configuration, including ingestion and optional variables, see [docs/reference/configuration.mdx](docs/docs/reference/configuration.mdx).
|
||||||
|
|
||||||
|
3. Start OpenRAG.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full stack with GPU support
|
||||||
|
make dev
|
||||||
|
|
||||||
|
# Or CPU only
|
||||||
|
make dev-cpu
|
||||||
|
```
|
||||||
|
|
||||||
|
Access the services:
|
||||||
|
- **Frontend**: http://localhost:3000
|
||||||
|
- **Backend API**: http://localhost:8000
|
||||||
|
- **Langflow**: http://localhost:7860
|
||||||
|
- **OpenSearch**: http://localhost:9200
|
||||||
|
- **OpenSearch Dashboards**: http://localhost:5601
|
||||||
|
|
||||||
## 🔧 Development Commands
|
## 🔧 Development Commands
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
FROM langflowai/langflow-nightly:1.6.3.dev0
|
FROM langflowai/langflow-nightly:1.6.3.dev1
|
||||||
|
|
||||||
EXPOSE 7860
|
EXPOSE 7860
|
||||||
|
|
||||||
|
|
|
||||||
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
recursive-include src/tui/_assets *
|
||||||
147
Makefile
147
Makefile
|
|
@ -1,7 +1,17 @@
|
||||||
# OpenRAG Development Makefile
|
# OpenRAG Development Makefile
|
||||||
# Provides easy commands for development workflow
|
# Provides easy commands for development workflow
|
||||||
|
|
||||||
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
|
# Load variables from .env if present so `make` commands pick them up
|
||||||
|
ifneq (,$(wildcard .env))
|
||||||
|
include .env
|
||||||
|
# Export all simple KEY=VALUE pairs to the environment for child processes
|
||||||
|
export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env)
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \
|
||||||
|
test test-integration test-ci \
|
||||||
|
backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \
|
||||||
|
shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
|
||||||
|
|
||||||
# Default target
|
# Default target
|
||||||
help:
|
help:
|
||||||
|
|
@ -32,14 +42,16 @@ help:
|
||||||
@echo " shell-lf - Shell into langflow container"
|
@echo " shell-lf - Shell into langflow container"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "Testing:"
|
@echo "Testing:"
|
||||||
@echo " test - Run backend tests"
|
@echo " test - Run all backend tests"
|
||||||
|
@echo " test-integration - Run integration tests (requires infra)"
|
||||||
|
@echo " test-ci - Start infra, run integration tests, tear down"
|
||||||
@echo " lint - Run linting checks"
|
@echo " lint - Run linting checks"
|
||||||
@echo ""
|
@echo ""
|
||||||
|
|
||||||
# Development environments
|
# Development environments
|
||||||
dev:
|
dev:
|
||||||
@echo "🚀 Starting OpenRAG with GPU support..."
|
@echo "🚀 Starting OpenRAG with GPU support..."
|
||||||
docker-compose up -d
|
docker compose up -d
|
||||||
@echo "✅ Services started!"
|
@echo "✅ Services started!"
|
||||||
@echo " Backend: http://localhost:8000"
|
@echo " Backend: http://localhost:8000"
|
||||||
@echo " Frontend: http://localhost:3000"
|
@echo " Frontend: http://localhost:3000"
|
||||||
|
|
@ -49,7 +61,7 @@ dev:
|
||||||
|
|
||||||
dev-cpu:
|
dev-cpu:
|
||||||
@echo "🚀 Starting OpenRAG with CPU only..."
|
@echo "🚀 Starting OpenRAG with CPU only..."
|
||||||
docker-compose -f docker-compose-cpu.yml up -d
|
docker compose -f docker-compose-cpu.yml up -d
|
||||||
@echo "✅ Services started!"
|
@echo "✅ Services started!"
|
||||||
@echo " Backend: http://localhost:8000"
|
@echo " Backend: http://localhost:8000"
|
||||||
@echo " Frontend: http://localhost:3000"
|
@echo " Frontend: http://localhost:3000"
|
||||||
|
|
@ -59,7 +71,7 @@ dev-cpu:
|
||||||
|
|
||||||
dev-local:
|
dev-local:
|
||||||
@echo "🔧 Starting infrastructure only (for local development)..."
|
@echo "🔧 Starting infrastructure only (for local development)..."
|
||||||
docker-compose up -d opensearch dashboards langflow
|
docker compose up -d opensearch dashboards langflow
|
||||||
@echo "✅ Infrastructure started!"
|
@echo "✅ Infrastructure started!"
|
||||||
@echo " Langflow: http://localhost:7860"
|
@echo " Langflow: http://localhost:7860"
|
||||||
@echo " OpenSearch: http://localhost:9200"
|
@echo " OpenSearch: http://localhost:9200"
|
||||||
|
|
@ -69,7 +81,7 @@ dev-local:
|
||||||
|
|
||||||
infra:
|
infra:
|
||||||
@echo "🔧 Starting infrastructure services only..."
|
@echo "🔧 Starting infrastructure services only..."
|
||||||
docker-compose up -d opensearch dashboards langflow
|
docker compose up -d opensearch dashboards langflow
|
||||||
@echo "✅ Infrastructure services started!"
|
@echo "✅ Infrastructure services started!"
|
||||||
@echo " Langflow: http://localhost:7860"
|
@echo " Langflow: http://localhost:7860"
|
||||||
@echo " OpenSearch: http://localhost:9200"
|
@echo " OpenSearch: http://localhost:9200"
|
||||||
|
|
@ -86,15 +98,15 @@ infra-cpu:
|
||||||
# Container management
|
# Container management
|
||||||
stop:
|
stop:
|
||||||
@echo "🛑 Stopping all containers..."
|
@echo "🛑 Stopping all containers..."
|
||||||
docker-compose down
|
docker compose down
|
||||||
docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
|
docker compose -f docker-compose-cpu.yml down 2>/dev/null || true
|
||||||
|
|
||||||
restart: stop dev
|
restart: stop dev
|
||||||
|
|
||||||
clean: stop
|
clean: stop
|
||||||
@echo "🧹 Cleaning up containers and volumes..."
|
@echo "🧹 Cleaning up containers and volumes..."
|
||||||
docker-compose down -v --remove-orphans
|
docker compose down -v --remove-orphans
|
||||||
docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
|
docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
|
||||||
docker system prune -f
|
docker system prune -f
|
||||||
|
|
||||||
# Local development
|
# Local development
|
||||||
|
|
@ -114,7 +126,7 @@ install: install-be install-fe
|
||||||
|
|
||||||
install-be:
|
install-be:
|
||||||
@echo "📦 Installing backend dependencies..."
|
@echo "📦 Installing backend dependencies..."
|
||||||
uv sync
|
uv sync --extra torch-cu128
|
||||||
|
|
||||||
install-fe:
|
install-fe:
|
||||||
@echo "📦 Installing frontend dependencies..."
|
@echo "📦 Installing frontend dependencies..."
|
||||||
|
|
@ -123,7 +135,7 @@ install-fe:
|
||||||
# Building
|
# Building
|
||||||
build:
|
build:
|
||||||
@echo "🔨 Building Docker images..."
|
@echo "🔨 Building Docker images..."
|
||||||
docker-compose build
|
docker compose build
|
||||||
|
|
||||||
build-be:
|
build-be:
|
||||||
@echo "🔨 Building backend image..."
|
@echo "🔨 Building backend image..."
|
||||||
|
|
@ -136,41 +148,124 @@ build-fe:
|
||||||
# Logging and debugging
|
# Logging and debugging
|
||||||
logs:
|
logs:
|
||||||
@echo "📋 Showing all container logs..."
|
@echo "📋 Showing all container logs..."
|
||||||
docker-compose logs -f
|
docker compose logs -f
|
||||||
|
|
||||||
logs-be:
|
logs-be:
|
||||||
@echo "📋 Showing backend logs..."
|
@echo "📋 Showing backend logs..."
|
||||||
docker-compose logs -f openrag-backend
|
docker compose logs -f openrag-backend
|
||||||
|
|
||||||
logs-fe:
|
logs-fe:
|
||||||
@echo "📋 Showing frontend logs..."
|
@echo "📋 Showing frontend logs..."
|
||||||
docker-compose logs -f openrag-frontend
|
docker compose logs -f openrag-frontend
|
||||||
|
|
||||||
logs-lf:
|
logs-lf:
|
||||||
@echo "📋 Showing langflow logs..."
|
@echo "📋 Showing langflow logs..."
|
||||||
docker-compose logs -f langflow
|
docker compose logs -f langflow
|
||||||
|
|
||||||
logs-os:
|
logs-os:
|
||||||
@echo "📋 Showing opensearch logs..."
|
@echo "📋 Showing opensearch logs..."
|
||||||
docker-compose logs -f opensearch
|
docker compose logs -f opensearch
|
||||||
|
|
||||||
# Shell access
|
# Shell access
|
||||||
shell-be:
|
shell-be:
|
||||||
@echo "🐚 Opening shell in backend container..."
|
@echo "🐚 Opening shell in backend container..."
|
||||||
docker-compose exec openrag-backend /bin/bash
|
docker compose exec openrag-backend /bin/bash
|
||||||
|
|
||||||
shell-lf:
|
shell-lf:
|
||||||
@echo "🐚 Opening shell in langflow container..."
|
@echo "🐚 Opening shell in langflow container..."
|
||||||
docker-compose exec langflow /bin/bash
|
docker compose exec langflow /bin/bash
|
||||||
|
|
||||||
shell-os:
|
shell-os:
|
||||||
@echo "🐚 Opening shell in opensearch container..."
|
@echo "🐚 Opening shell in opensearch container..."
|
||||||
docker-compose exec opensearch /bin/bash
|
docker compose exec opensearch /bin/bash
|
||||||
|
|
||||||
# Testing and quality
|
# Testing and quality
|
||||||
test:
|
test:
|
||||||
@echo "🧪 Running backend tests..."
|
@echo "🧪 Running all backend tests..."
|
||||||
uv run pytest
|
uv run pytest tests/ -v
|
||||||
|
|
||||||
|
test-integration:
|
||||||
|
@echo "🧪 Running integration tests (requires infrastructure)..."
|
||||||
|
@echo "💡 Make sure to run 'make infra' first!"
|
||||||
|
uv run pytest tests/integration/ -v
|
||||||
|
|
||||||
|
# CI-friendly integration test target: brings up infra, waits, runs tests, tears down
|
||||||
|
test-ci:
|
||||||
|
@set -e; \
|
||||||
|
echo "Installing test dependencies..."; \
|
||||||
|
uv sync --group dev; \
|
||||||
|
if [ ! -f keys/private_key.pem ]; then \
|
||||||
|
echo "Generating RSA keys for JWT signing..."; \
|
||||||
|
uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \
|
||||||
|
else \
|
||||||
|
echo "RSA keys already exist, ensuring correct permissions..."; \
|
||||||
|
chmod 600 keys/private_key.pem 2>/dev/null || true; \
|
||||||
|
chmod 644 keys/public_key.pem 2>/dev/null || true; \
|
||||||
|
fi; \
|
||||||
|
echo "Cleaning up old containers and volumes..."; \
|
||||||
|
docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \
|
||||||
|
echo "Pulling latest images..."; \
|
||||||
|
docker compose -f docker-compose-cpu.yml pull; \
|
||||||
|
echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \
|
||||||
|
docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \
|
||||||
|
echo "Starting docling-serve..."; \
|
||||||
|
DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \
|
||||||
|
echo "Docling-serve started at $$DOCLING_ENDPOINT"; \
|
||||||
|
echo "Waiting for backend OIDC endpoint..."; \
|
||||||
|
for i in $$(seq 1 60); do \
|
||||||
|
docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \
|
||||||
|
done; \
|
||||||
|
echo "Waiting for OpenSearch security config to be fully applied..."; \
|
||||||
|
for i in $$(seq 1 60); do \
|
||||||
|
if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \
|
||||||
|
echo "✓ Security configuration applied"; \
|
||||||
|
break; \
|
||||||
|
fi; \
|
||||||
|
sleep 2; \
|
||||||
|
done; \
|
||||||
|
echo "Verifying OIDC authenticator is active in OpenSearch..."; \
|
||||||
|
AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \
|
||||||
|
if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \
|
||||||
|
echo "✓ OIDC authenticator configured"; \
|
||||||
|
echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \
|
||||||
|
else \
|
||||||
|
echo "✗ OIDC authenticator NOT found in security config!"; \
|
||||||
|
echo "Security config:"; \
|
||||||
|
echo "$$AUTHC_CONFIG" | head -50; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
echo "Waiting for Langflow..."; \
|
||||||
|
for i in $$(seq 1 60); do \
|
||||||
|
curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \
|
||||||
|
done; \
|
||||||
|
echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \
|
||||||
|
for i in $$(seq 1 60); do \
|
||||||
|
curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \
|
||||||
|
done; \
|
||||||
|
echo "Running integration tests"; \
|
||||||
|
LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \
|
||||||
|
GOOGLE_OAUTH_CLIENT_ID="" \
|
||||||
|
GOOGLE_OAUTH_CLIENT_SECRET="" \
|
||||||
|
OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \
|
||||||
|
OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \
|
||||||
|
DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \
|
||||||
|
uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \
|
||||||
|
TEST_RESULT=$$?; \
|
||||||
|
echo ""; \
|
||||||
|
echo "=== Post-test JWT diagnostics ==="; \
|
||||||
|
echo "Generating test JWT token..."; \
|
||||||
|
TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \
|
||||||
|
if [ -n "$$TEST_TOKEN" ]; then \
|
||||||
|
echo "Testing JWT against OpenSearch..."; \
|
||||||
|
HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \
|
||||||
|
echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \
|
||||||
|
fi; \
|
||||||
|
echo "================================="; \
|
||||||
|
echo ""; \
|
||||||
|
echo "Tearing down infra"; \
|
||||||
|
uv run python scripts/docling_ctl.py stop || true; \
|
||||||
|
docker compose down -v || true; \
|
||||||
|
exit $$TEST_RESULT
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
@echo "🔍 Running linting checks..."
|
@echo "🔍 Running linting checks..."
|
||||||
|
|
@ -180,19 +275,19 @@ lint:
|
||||||
# Service status
|
# Service status
|
||||||
status:
|
status:
|
||||||
@echo "📊 Container status:"
|
@echo "📊 Container status:"
|
||||||
@docker-compose ps 2>/dev/null || echo "No containers running"
|
@docker compose ps 2>/dev/null || echo "No containers running"
|
||||||
|
|
||||||
health:
|
health:
|
||||||
@echo "🏥 Health check:"
|
@echo "🏥 Health check:"
|
||||||
@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
|
@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
|
||||||
@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
|
@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
|
||||||
@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
|
@echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
|
||||||
|
|
||||||
# Database operations
|
# Database operations
|
||||||
db-reset:
|
db-reset:
|
||||||
@echo "🗄️ Resetting OpenSearch indices..."
|
@echo "🗄️ Resetting OpenSearch indices..."
|
||||||
curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
|
curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true
|
||||||
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
|
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true
|
||||||
@echo "Indices reset. Restart backend to recreate."
|
@echo "Indices reset. Restart backend to recreate."
|
||||||
|
|
||||||
# Flow management
|
# Flow management
|
||||||
|
|
|
||||||
208
README.md
208
README.md
|
|
@ -2,20 +2,6 @@
|
||||||
|
|
||||||
# OpenRAG
|
# OpenRAG
|
||||||
|
|
||||||
</div>
|
|
||||||
<div align="center">
|
|
||||||
<a href="#quick-start" style="color: #0366d6;">🚀 Quick Start</a> |
|
|
||||||
<a href="#tui-interface" style="color: #0366d6;">💻 TUI Interface</a> |
|
|
||||||
<a href="#docker-deployment" style="color: #0366d6;">🐳 Docker Deployment</a> |
|
|
||||||
<a href="#development" style="color: #0366d6;">⚙️ Development</a> |
|
|
||||||
<a href="#troubleshooting" style="color: #0366d6;">🔧 Troubleshooting</a>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration. [](https://deepwiki.com/phact/openrag)
|
|
||||||
|
|
||||||
|
|
||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://github.com/langflow-ai/langflow"><img src="https://img.shields.io/badge/Langflow-1C1C1E?style=flat&logo=langflow" alt="Langflow"></a>
|
<a href="https://github.com/langflow-ai/langflow"><img src="https://img.shields.io/badge/Langflow-1C1C1E?style=flat&logo=langflow" alt="Langflow"></a>
|
||||||
|
|
||||||
|
|
@ -24,144 +10,124 @@ OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables
|
||||||
<a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
|
<a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
|
||||||
|
|
||||||
<a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
|
<a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
|
||||||
|
|
||||||
|
<a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration.
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div align="center">
|
||||||
|
<a href="#quickstart" style="color: #0366d6;">Quickstart</a> |
|
||||||
|
<a href="#tui-interface" style="color: #0366d6;">TUI Interface</a> |
|
||||||
|
<a href="#docker-deployment" style="color: #0366d6;">Docker Deployment</a> |
|
||||||
|
<a href="#development" style="color: #0366d6;">Development</a> |
|
||||||
|
<a href="#troubleshooting" style="color: #0366d6;">Troubleshooting</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
Use the OpenRAG Terminal User Interface (TUI) to manage your OpenRAG installation without complex command-line operations.
|
||||||
|
|
||||||
|
To launch OpenRAG with the TUI, do the following:
|
||||||
|
|
||||||
## 🚀 Quick Start
|
1. Clone the OpenRAG repository.
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/langflow-ai/openrag.git
|
||||||
|
cd openrag
|
||||||
|
```
|
||||||
|
|
||||||
### Prerequisites
|
2. To start the TUI, from the repository root, run:
|
||||||
|
```bash
|
||||||
|
# Install dependencies first
|
||||||
|
uv sync
|
||||||
|
|
||||||
- Docker or Podman with Compose installed
|
# Launch the TUI
|
||||||
- Make (for development commands)
|
uv run openrag
|
||||||
|
```
|
||||||
|
|
||||||
### 1. Environment Setup
|
The TUI opens and guides you through OpenRAG setup.
|
||||||
|
|
||||||
```bash
|
For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx).
|
||||||
# Clone and setup environment
|
|
||||||
git clone https://github.com/langflow-ai/openrag.git
|
|
||||||
cd openrag
|
|
||||||
make setup # Creates .env and installs dependencies
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Configure Environment
|
## Docker Deployment
|
||||||
|
|
||||||
Edit `.env` with your API keys and credentials:
|
If you prefer to use Docker to run OpenRAG, the repository includes two Docker Compose `.yml` files.
|
||||||
|
They deploy the same applications and containers, but to different environments.
|
||||||
|
|
||||||
```bash
|
- [`docker-compose.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose.yml) is an OpenRAG deployment for environments with GPU support. GPU support requires an NVIDIA GPU with CUDA support and compatible NVIDIA drivers installed on the OpenRAG host machine.
|
||||||
# Required
|
|
||||||
OPENAI_API_KEY=your_openai_api_key
|
|
||||||
OPENSEARCH_PASSWORD=your_secure_password
|
|
||||||
LANGFLOW_SUPERUSER=admin
|
|
||||||
LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
|
|
||||||
LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
|
|
||||||
LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
|
|
||||||
NUDGES_FLOW_ID=your_nudges_flow_id
|
|
||||||
```
|
|
||||||
See extended configuration, including ingestion and optional variables: [docs/reference/configuration.md](docs/docs/reference/configuration.md)
|
|
||||||
### 3. Start OpenRAG
|
|
||||||
|
|
||||||
```bash
|
- [`docker-compose-cpu.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) is a CPU-only version of OpenRAG for systems without GPU support. Use this Docker compose file for environments where GPU drivers aren't available.
|
||||||
# Full stack with GPU support
|
|
||||||
make dev
|
|
||||||
|
|
||||||
# Or CPU only
|
Both Docker deployments depend on `docling serve` to be running on port `5001` on the host machine. This enables [Mac MLX](https://opensource.apple.com/projects/mlx/) support for document processing. Installing OpenRAG with the TUI starts `docling serve` automatically, but for a Docker deployment you must manually start the `docling serve` process.
|
||||||
make dev-cpu
|
|
||||||
```
|
|
||||||
|
|
||||||
Access the services:
|
To deploy OpenRAG with Docker:
|
||||||
- **Frontend**: http://localhost:3000
|
|
||||||
- **Backend API**: http://localhost:8000
|
|
||||||
- **Langflow**: http://localhost:7860
|
|
||||||
- **OpenSearch**: http://localhost:9200
|
|
||||||
- **OpenSearch Dashboards**: http://localhost:5601
|
|
||||||
|
|
||||||
## 🖥️ TUI Interface
|
1. Clone the OpenRAG repository.
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/langflow-ai/openrag.git
|
||||||
|
cd openrag
|
||||||
|
```
|
||||||
|
|
||||||
OpenRAG includes a powerful Terminal User Interface (TUI) for easy setup, configuration, and monitoring. The TUI provides a user-friendly way to manage your OpenRAG installation without complex command-line operations.
|
2. Install dependencies.
|
||||||
|
```bash
|
||||||
|
uv sync
|
||||||
|
```
|
||||||
|
|
||||||

|
3. Start `docling serve` on the host machine.
|
||||||
|
```bash
|
||||||
|
uv run python scripts/docling_ctl.py start --port 5001
|
||||||
|
```
|
||||||
|
|
||||||
### Launching the TUI
|
4. Confirm `docling serve` is running.
|
||||||
|
```
|
||||||
|
uv run python scripts/docling_ctl.py status
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
Successful result:
|
||||||
# Install dependencies first
|
```bash
|
||||||
uv sync
|
Status: running
|
||||||
|
Endpoint: http://127.0.0.1:5001
|
||||||
|
Docs: http://127.0.0.1:5001/docs
|
||||||
|
PID: 27746
|
||||||
|
```
|
||||||
|
|
||||||
# Launch the TUI
|
5. Build and start all services.
|
||||||
uv run openrag
|
|
||||||
```
|
|
||||||
|
|
||||||
### TUI Features
|
For the GPU-accelerated deployment, run:
|
||||||
|
```bash
|
||||||
|
docker compose build
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
See the full TUI guide for features, navigation, and benefits: [docs/get-started/tui.mdx](docs/docs/get-started/tui.mdx)
|
For environments without GPU support, run:
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose-cpu.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
The OpenRAG Docker Compose file starts five containers:
|
||||||
|
| Container Name | Default Address | Purpose |
|
||||||
|
|---|---|---|
|
||||||
|
| OpenRAG Backend | http://localhost:8000 | FastAPI server and core functionality. |
|
||||||
|
| OpenRAG Frontend | http://localhost:3000 | React web interface for users. |
|
||||||
|
| Langflow | http://localhost:7860 | AI workflow engine and flow management. |
|
||||||
|
| OpenSearch | http://localhost:9200 | Vector database for document storage. |
|
||||||
|
| OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |
|
||||||
|
|
||||||
|
6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx).
|
||||||
|
|
||||||
|
To stop `docling serve`, run:
|
||||||
|
|
||||||
## 🐳 Docker Deployment
|
```bash
|
||||||
|
uv run python scripts/docling_ctl.py stop
|
||||||
|
```
|
||||||
|
|
||||||
### Standard Deployment
|
For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx).
|
||||||
|
|
||||||
```bash
|
## Troubleshooting
|
||||||
# Build and start all services
|
|
||||||
docker compose build
|
|
||||||
docker compose up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
### CPU-Only Deployment
|
For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx).
|
||||||
|
|
||||||
For environments without GPU support:
|
## Development
|
||||||
|
|
||||||
```bash
|
For developers wanting to contribute to OpenRAG or set up a development environment, see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||||
docker compose -f docker-compose-cpu.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
More deployment commands and tips: [docs/get-started/docker.mdx](docs/docs/get-started/docker.mdx)
|
|
||||||
|
|
||||||
## 🔧 Troubleshooting
|
|
||||||
|
|
||||||
### Podman on macOS
|
|
||||||
|
|
||||||
If using Podman on macOS, you may need to increase VM memory:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
podman machine stop
|
|
||||||
podman machine rm
|
|
||||||
podman machine init --memory 8192 # 8 GB example
|
|
||||||
podman machine start
|
|
||||||
```
|
|
||||||
|
|
||||||
### Common Issues
|
|
||||||
|
|
||||||
See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 🛠️ Development
|
|
||||||
|
|
||||||
For developers wanting to contribute to OpenRAG or set up a development environment, please see our comprehensive development guide:
|
|
||||||
|
|
||||||
**[📚 See CONTRIBUTING.md for detailed development instructions](CONTRIBUTING.md)**
|
|
||||||
|
|
||||||
The contributing guide includes:
|
|
||||||
- Complete development environment setup
|
|
||||||
- Local development workflows
|
|
||||||
- Testing and debugging procedures
|
|
||||||
- Code style guidelines
|
|
||||||
- Architecture overview
|
|
||||||
- Pull request guidelines
|
|
||||||
|
|
||||||
### Quick Development Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
make help # See all available commands
|
|
||||||
make setup # Initial development setup
|
|
||||||
make infra # Start infrastructure services
|
|
||||||
make backend # Run backend locally
|
|
||||||
make frontend # Run frontend locally
|
|
||||||
```
|
|
||||||
|
|
@ -74,7 +74,7 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./documents:/app/documents:Z
|
- ./documents:/app/documents:Z
|
||||||
- ./keys:/app/keys:Z
|
- ./keys:/app/keys:Z
|
||||||
- ./flows:/app/flows:Z
|
- ./flows:/app/flows:U,z
|
||||||
|
|
||||||
openrag-frontend:
|
openrag-frontend:
|
||||||
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
|
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
|
||||||
|
|
@ -91,7 +91,7 @@ services:
|
||||||
|
|
||||||
langflow:
|
langflow:
|
||||||
volumes:
|
volumes:
|
||||||
- ./flows:/app/flows:Z
|
- ./flows:/app/flows:U,z
|
||||||
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
|
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
|
||||||
# build:
|
# build:
|
||||||
# context: .
|
# context: .
|
||||||
|
|
@ -108,6 +108,7 @@ services:
|
||||||
- OWNER_NAME=None
|
- OWNER_NAME=None
|
||||||
- OWNER_EMAIL=None
|
- OWNER_EMAIL=None
|
||||||
- CONNECTOR_TYPE=system
|
- CONNECTOR_TYPE=system
|
||||||
|
- CONNECTOR_TYPE_URL=url
|
||||||
- OPENRAG-QUERY-FILTER="{}"
|
- OPENRAG-QUERY-FILTER="{}"
|
||||||
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
||||||
- FILENAME=None
|
- FILENAME=None
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./documents:/app/documents:Z
|
- ./documents:/app/documents:Z
|
||||||
- ./keys:/app/keys:Z
|
- ./keys:/app/keys:Z
|
||||||
- ./flows:/app/flows:z
|
- ./flows:/app/flows:U,z
|
||||||
gpus: all
|
gpus: all
|
||||||
|
|
||||||
openrag-frontend:
|
openrag-frontend:
|
||||||
|
|
@ -81,7 +81,6 @@ services:
|
||||||
# build:
|
# build:
|
||||||
# context: .
|
# context: .
|
||||||
# dockerfile: Dockerfile.frontend
|
# dockerfile: Dockerfile.frontend
|
||||||
#dockerfile: Dockerfile.frontend
|
|
||||||
container_name: openrag-frontend
|
container_name: openrag-frontend
|
||||||
depends_on:
|
depends_on:
|
||||||
- openrag-backend
|
- openrag-backend
|
||||||
|
|
@ -92,7 +91,7 @@ services:
|
||||||
|
|
||||||
langflow:
|
langflow:
|
||||||
volumes:
|
volumes:
|
||||||
- ./flows:/app/flows:z
|
- ./flows:/app/flows:U,z
|
||||||
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
|
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
|
||||||
# build:
|
# build:
|
||||||
# context: .
|
# context: .
|
||||||
|
|
@ -109,6 +108,7 @@ services:
|
||||||
- OWNER_NAME=None
|
- OWNER_NAME=None
|
||||||
- OWNER_EMAIL=None
|
- OWNER_EMAIL=None
|
||||||
- CONNECTOR_TYPE=system
|
- CONNECTOR_TYPE=system
|
||||||
|
- CONNECTOR_TYPE_URL=url
|
||||||
- OPENRAG-QUERY-FILTER="{}"
|
- OPENRAG-QUERY-FILTER="{}"
|
||||||
- FILENAME=None
|
- FILENAME=None
|
||||||
- MIMETYPE=None
|
- MIMETYPE=None
|
||||||
|
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
:::info
|
|
||||||
OpenRAG is is currently in public preview.
|
|
||||||
Development is ongoing, and the features and functionality are subject to change.
|
|
||||||
:::
|
|
||||||
|
|
@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
|
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
|
OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
|
||||||
|
|
||||||
|
|
@ -34,11 +31,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## Use the OpenRAG OpenSearch Agent flow
|
## Use the OpenRAG OpenSearch Agent flow {#flow}
|
||||||
|
|
||||||
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
|
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
|
||||||
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
|
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
|
||||||
This flow contains seven components connected together to chat with your data:
|
This flow contains eight components connected together to chat with your data:
|
||||||
|
|
||||||
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
|
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
|
||||||
The **Agent** behaves according to the prompt in the **Agent Instructions** field.
|
The **Agent** behaves according to the prompt in the **Agent Instructions** field.
|
||||||
|
|
@ -49,6 +46,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
|
||||||
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
|
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
|
||||||
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
|
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
|
||||||
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
|
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
|
||||||
|
* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
|
||||||
|
|
||||||
<PartialModifyFlows />
|
<PartialModifyFlows />
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
|
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
|
OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
|
||||||
More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
|
More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
|
||||||
|
|
@ -51,3 +48,30 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
|
||||||
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
|
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
|
||||||
|
|
||||||
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
|
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
|
||||||
|
|
||||||
|
## Knowledge ingestion flows
|
||||||
|
|
||||||
|
[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
|
||||||
|
|
||||||
|
The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
|
||||||
|
|
||||||
|
This flow contains ten components connected together to process and store documents in your knowledge base.
|
||||||
|
|
||||||
|
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
|
||||||
|
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
|
||||||
|
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
|
||||||
|
* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
|
||||||
|
* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
|
||||||
|
* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
|
||||||
|
* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
|
||||||
|
* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
|
||||||
|
|
||||||
|
<PartialModifyFlows />
|
||||||
|
|
||||||
|
### OpenSearch URL Ingestion flow {#url-flow}
|
||||||
|
|
||||||
|
An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
|
||||||
|
The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
|
||||||
|
|
||||||
|
For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
|
||||||
|
To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).
|
||||||
|
|
@ -7,17 +7,23 @@ import Icon from "@site/src/components/icon/icon";
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
|
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
|
OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
|
||||||
This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
|
This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
|
||||||
OpenSearch provides powerful hybrid search capabilities with enterprise-grade security and multi-tenancy support.
|
OpenSearch provides powerful hybrid search capabilities with enterprise-grade security and multi-tenancy support.
|
||||||
|
|
||||||
|
## Authentication and document access {#auth}
|
||||||
|
|
||||||
|
OpenRAG supports two authentication modes based on how you [install OpenRAG](/install), and which mode you choose affects document access.
|
||||||
|
|
||||||
|
**No-auth mode (Basic Setup)**: This mode uses a single anonymous JWT token for OpenSearch authentication, so documents uploaded to the `documents` index by one user are visible to all other users on the OpenRAG server.
|
||||||
|
|
||||||
|
**OAuth mode (Advanced Setup)**: Each OpenRAG user is granted a JWT token, and each document is tagged with user ownership. Documents are filtered by user ownership, ensuring users only see documents they uploaded or have access to.
|
||||||
|
|
||||||
## Ingest knowledge
|
## Ingest knowledge
|
||||||
|
|
||||||
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
|
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
|
||||||
|
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
|
||||||
|
|
||||||
### Direct file ingestion
|
### Direct file ingestion
|
||||||
|
|
||||||
|
|
@ -78,18 +84,6 @@ You can select multiples.
|
||||||
The ingestion process may take some time, depending on the size of your documents.
|
The ingestion process may take some time, depending on the size of your documents.
|
||||||
4. When ingestion is complete, your documents are available in the Knowledge screen.
|
4. When ingestion is complete, your documents are available in the Knowledge screen.
|
||||||
|
|
||||||
### Sync cloud connectors
|
|
||||||
|
|
||||||
Your connected data sources are found in the <Icon name="Settings2" aria-hidden="true"/> **Settings** page.
|
|
||||||
|
|
||||||
When you click **Sync Now** for a connected cloud service like Google Drive, OpenRAG scans your connected Google Drive account to find files that match your sync criteria. Sync criteria are controlled in **Sync Settings** on the same page. You can sync all files, or select a maximum number of files to sync.
|
|
||||||
|
|
||||||
For each file found, OpenRAG downloads, converts, and embeds the processed content into OpenSearch.
|
|
||||||
|
|
||||||
You can monitor the sync progress in the <Icon name="Bell" aria-hidden="true"/> **Tasks** sidebar.
|
|
||||||
|
|
||||||
Once processing is complete, the synced documents become available in your knowledge base and can be searched through the chat interface or Knowledge page.
|
|
||||||
|
|
||||||
## Explore knowledge
|
## Explore knowledge
|
||||||
|
|
||||||
The **Knowledge** page lists the documents OpenRAG has ingested into the OpenSearch vector database's `documents` index.
|
The **Knowledge** page lists the documents OpenRAG has ingested into the OpenSearch vector database's `documents` index.
|
||||||
|
|
@ -101,10 +95,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
|
||||||
|
|
||||||
<PartialModifyFlows />
|
<PartialModifyFlows />
|
||||||
|
|
||||||
### Knowledge ingestion settings
|
|
||||||
|
|
||||||
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
|
|
||||||
|
|
||||||
## Create knowledge filters
|
## Create knowledge filters
|
||||||
|
|
||||||
OpenRAG includes a knowledge filter system for organizing and managing document collections.
|
OpenRAG includes a knowledge filter system for organizing and managing document collections.
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,6 @@ slug: /get-started/docker
|
||||||
---
|
---
|
||||||
|
|
||||||
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
|
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
There are two different Docker Compose files.
|
There are two different Docker Compose files.
|
||||||
They deploy the same applications and containers, but to different environments.
|
They deploy the same applications and containers, but to different environments.
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,6 @@ slug: /install
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
|
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
[Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.
|
[Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.
|
||||||
|
|
||||||
|
|
@ -65,13 +62,15 @@ The OpenRAG wheel installs the Terminal User Interface (TUI) for configuring and
|
||||||
## Set up OpenRAG with the TUI {#setup}
|
## Set up OpenRAG with the TUI {#setup}
|
||||||
|
|
||||||
The TUI creates a `.env` file in your OpenRAG directory root and starts OpenRAG.
|
The TUI creates a `.env` file in your OpenRAG directory root and starts OpenRAG.
|
||||||
|
If the TUI detects a `.env` file in the OpenRAG root directory, it sources any variables from the `.env` file.
|
||||||
|
If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
|
||||||
|
|
||||||
**Basic Setup** generates all of the required values except the OpenAI API key.
|
**Basic Setup** generates all of the required values for OpenRAG except the OpenAI API key.
|
||||||
**Basic Setup** does not set up OAuth connections for ingestion from Google Drive, OneDrive, or AWS.
|
**Basic Setup** does not set up OAuth connections for ingestion from cloud providers.
|
||||||
For OAuth setup, use **Advanced Setup**.
|
For OAuth setup, use **Advanced Setup**.
|
||||||
|
|
||||||
If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
|
**Basic Setup** and **Advanced Setup** enforce the same authentication settings for the Langflow server, but manage document access differently. For more information, see [Authentication and document access](/knowledge#auth).
|
||||||
If the TUI detects a `.env` file in the OpenRAG root directory, it will source any variables from the `.env` file.
|
|
||||||
<Tabs groupId="Setup method">
|
<Tabs groupId="Setup method">
|
||||||
<TabItem value="Basic setup" label="Basic setup" default>
|
<TabItem value="Basic setup" label="Basic setup" default>
|
||||||
|
|
||||||
|
|
@ -90,6 +89,7 @@ If the TUI detects a `.env` file in the OpenRAG root directory, it will source a
|
||||||
7. Continue with [Application Onboarding](#application-onboarding).
|
7. Continue with [Application Onboarding](#application-onboarding).
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="Advanced setup" label="Advanced setup">
|
<TabItem value="Advanced setup" label="Advanced setup">
|
||||||
|
|
||||||
1. To install OpenRAG with **Advanced Setup**, click **Advanced Setup** or press <kbd>2</kbd>.
|
1. To install OpenRAG with **Advanced Setup**, click **Advanced Setup** or press <kbd>2</kbd>.
|
||||||
2. Click **Generate Passwords** to generate passwords for OpenSearch and Langflow.
|
2. Click **Generate Passwords** to generate passwords for OpenSearch and Langflow.
|
||||||
3. Paste your OpenAI API key in the OpenAI API key field.
|
3. Paste your OpenAI API key in the OpenAI API key field.
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,6 @@ slug: /quickstart
|
||||||
import Icon from "@site/src/components/icon/icon";
|
import Icon from "@site/src/components/icon/icon";
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.
|
Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,6 @@ title: Terminal User Interface (TUI) commands
|
||||||
slug: /get-started/tui
|
slug: /get-started/tui
|
||||||
---
|
---
|
||||||
|
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
|
The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
|
||||||
|
|
||||||

|

|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,6 @@ title: What is OpenRAG?
|
||||||
slug: /
|
slug: /
|
||||||
---
|
---
|
||||||
|
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
OpenRAG is an open-source package for building agentic RAG systems.
|
OpenRAG is an open-source package for building agentic RAG systems.
|
||||||
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
|
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,6 @@ slug: /support/troubleshoot
|
||||||
|
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
|
|
||||||
|
|
||||||
<PartialExternalPreview />
|
|
||||||
|
|
||||||
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
|
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
|
||||||
|
|
||||||
|
|
|
||||||
BIN
docs/static/img/opensearch-agent-flow.png
vendored
BIN
docs/static/img/opensearch-agent-flow.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 951 KiB After Width: | Height: | Size: 1,004 KiB |
File diff suppressed because one or more lines are too long
|
|
@ -232,6 +232,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"animated": false,
|
"animated": false,
|
||||||
|
"className": "",
|
||||||
"data": {
|
"data": {
|
||||||
"sourceHandle": {
|
"sourceHandle": {
|
||||||
"dataType": "EmbeddingModel",
|
"dataType": "EmbeddingModel",
|
||||||
|
|
@ -733,6 +734,10 @@
|
||||||
{
|
{
|
||||||
"key": "owner_email",
|
"key": "owner_email",
|
||||||
"value": "OWNER_EMAIL"
|
"value": "OWNER_EMAIL"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "connector_type",
|
||||||
|
"value": "CONNECTOR_TYPE_URL"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -1808,7 +1813,7 @@
|
||||||
],
|
],
|
||||||
"frozen": false,
|
"frozen": false,
|
||||||
"icon": "table",
|
"icon": "table",
|
||||||
"last_updated": "2025-10-03T20:31:36.023Z",
|
"last_updated": "2025-10-06T17:46:55.068Z",
|
||||||
"legacy": false,
|
"legacy": false,
|
||||||
"lf_version": "1.6.0",
|
"lf_version": "1.6.0",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
@ -2224,7 +2229,7 @@
|
||||||
],
|
],
|
||||||
"frozen": false,
|
"frozen": false,
|
||||||
"icon": "table",
|
"icon": "table",
|
||||||
"last_updated": "2025-10-03T20:31:36.025Z",
|
"last_updated": "2025-10-06T17:46:55.069Z",
|
||||||
"legacy": false,
|
"legacy": false,
|
||||||
"lf_version": "1.6.0",
|
"lf_version": "1.6.0",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
@ -2897,7 +2902,7 @@
|
||||||
],
|
],
|
||||||
"frozen": false,
|
"frozen": false,
|
||||||
"icon": "table",
|
"icon": "table",
|
||||||
"last_updated": "2025-10-03T20:31:36.026Z",
|
"last_updated": "2025-10-06T17:46:55.069Z",
|
||||||
"legacy": false,
|
"legacy": false,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"code_hash": "b4d6b19b6eef",
|
"code_hash": "b4d6b19b6eef",
|
||||||
|
|
@ -3310,7 +3315,7 @@
|
||||||
],
|
],
|
||||||
"frozen": false,
|
"frozen": false,
|
||||||
"icon": "binary",
|
"icon": "binary",
|
||||||
"last_updated": "2025-10-03T20:31:47.177Z",
|
"last_updated": "2025-10-06T17:46:54.996Z",
|
||||||
"legacy": false,
|
"legacy": false,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"code_hash": "8607e963fdef",
|
"code_hash": "8607e963fdef",
|
||||||
|
|
@ -3595,17 +3600,17 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"viewport": {
|
"viewport": {
|
||||||
"x": -407.1633937626607,
|
"x": -538.2311610019549,
|
||||||
"y": -577.5291936220412,
|
"y": -337.3313239657308,
|
||||||
"zoom": 0.5347553210574026
|
"zoom": 0.45546556043892106
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "This flow is to ingest the URL to open search.",
|
"description": "This flow is to ingest the URL to open search.",
|
||||||
"endpoint_name": null,
|
"endpoint_name": null,
|
||||||
"mcp_enabled": true,
|
|
||||||
"id": "72c3d17c-2dac-4a73-b48a-6518473d7830",
|
"id": "72c3d17c-2dac-4a73-b48a-6518473d7830",
|
||||||
|
"mcp_enabled": true,
|
||||||
"is_component": false,
|
"is_component": false,
|
||||||
"last_tested_version": "1.6.0",
|
"last_tested_version": "1.6.3.dev1",
|
||||||
"name": "OpenSearch URL Ingestion Flow",
|
"name": "OpenSearch URL Ingestion Flow",
|
||||||
"tags": [
|
"tags": [
|
||||||
"openai",
|
"openai",
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
|
||||||
{queryOverride && (
|
{queryOverride && (
|
||||||
<Button
|
<Button
|
||||||
variant="ghost"
|
variant="ghost"
|
||||||
className="h-full !px-1.5 !py-0"
|
className="h-full rounded-sm !px-1.5 !py-0"
|
||||||
type="button"
|
type="button"
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
setSearchQueryInput("");
|
setSearchQueryInput("");
|
||||||
|
|
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
|
||||||
<Button
|
<Button
|
||||||
variant="ghost"
|
variant="ghost"
|
||||||
className={cn(
|
className={cn(
|
||||||
"h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
|
"h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
|
||||||
searchQueryInput && "block"
|
searchQueryInput && "block"
|
||||||
)}
|
)}
|
||||||
type="submit"
|
type="submit"
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,7 @@ export default function ConnectorsPage() {
|
||||||
selectedFiles={selectedFiles}
|
selectedFiles={selectedFiles}
|
||||||
isAuthenticated={false} // This would come from auth context in real usage
|
isAuthenticated={false} // This would come from auth context in real usage
|
||||||
accessToken={undefined} // This would come from connected account
|
accessToken={undefined} // This would come from connected account
|
||||||
|
isIngesting={isSyncing}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
|
||||||
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
|
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
|
||||||
// import { Label } from "@/components/ui/label";
|
// import { Label } from "@/components/ui/label";
|
||||||
// import { Checkbox } from "@/components/ui/checkbox";
|
// import { Checkbox } from "@/components/ui/checkbox";
|
||||||
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
|
|
||||||
import { ProtectedRoute } from "@/components/protected-route";
|
import { ProtectedRoute } from "@/components/protected-route";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { Checkbox } from "@/components/ui/checkbox";
|
|
||||||
import { Input } from "@/components/ui/input";
|
|
||||||
import { Label } from "@/components/ui/label";
|
|
||||||
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
|
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
|
||||||
import { useTask } from "@/contexts/task-context";
|
|
||||||
import {
|
import {
|
||||||
type ChunkResult,
|
type ChunkResult,
|
||||||
type File,
|
type File,
|
||||||
|
|
@ -35,9 +30,9 @@ function ChunksPageContent() {
|
||||||
const { parsedFilterData, queryOverride } = useKnowledgeFilter();
|
const { parsedFilterData, queryOverride } = useKnowledgeFilter();
|
||||||
const filename = searchParams.get("filename");
|
const filename = searchParams.get("filename");
|
||||||
const [chunks, setChunks] = useState<ChunkResult[]>([]);
|
const [chunks, setChunks] = useState<ChunkResult[]>([]);
|
||||||
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
|
// const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
|
||||||
ChunkResult[]
|
// ChunkResult[]
|
||||||
>([]);
|
// >([]);
|
||||||
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
|
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
|
||||||
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
|
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
|
||||||
number | null
|
number | null
|
||||||
|
|
@ -83,13 +78,13 @@ function ChunksPageContent() {
|
||||||
}, [data, filename]);
|
}, [data, filename]);
|
||||||
|
|
||||||
// Set selected state for all checkboxes when selectAll changes
|
// Set selected state for all checkboxes when selectAll changes
|
||||||
useEffect(() => {
|
// useEffect(() => {
|
||||||
if (selectAll) {
|
// if (selectAll) {
|
||||||
setSelectedChunks(new Set(chunks.map((_, index) => index)));
|
// setSelectedChunks(new Set(chunks.map((_, index) => index)));
|
||||||
} else {
|
// } else {
|
||||||
setSelectedChunks(new Set());
|
// setSelectedChunks(new Set());
|
||||||
}
|
// }
|
||||||
}, [selectAll, setSelectedChunks, chunks]);
|
// }, [selectAll, setSelectedChunks, chunks]);
|
||||||
|
|
||||||
const handleBack = useCallback(() => {
|
const handleBack = useCallback(() => {
|
||||||
router.push("/knowledge");
|
router.push("/knowledge");
|
||||||
|
|
@ -126,26 +121,25 @@ function ChunksPageContent() {
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col h-full">
|
<div className="flex flex-col h-full">
|
||||||
<div className="flex flex-col h-full">
|
{/* Header */}
|
||||||
{/* Header */}
|
<div className="flex flex-col mb-6">
|
||||||
<div className="flex flex-col mb-6">
|
<div className="flex items-center gap-3 mb-6">
|
||||||
<div className="flex items-center gap-3 mb-6">
|
<Button
|
||||||
<Button
|
variant="ghost"
|
||||||
variant="ghost"
|
onClick={handleBack}
|
||||||
onClick={handleBack}
|
size="sm"
|
||||||
size="sm"
|
className="max-w-8 max-h-8 -m-2"
|
||||||
className="max-w-8 max-h-8 -m-2"
|
>
|
||||||
>
|
<ArrowLeft size={24} />
|
||||||
<ArrowLeft size={24} />
|
</Button>
|
||||||
</Button>
|
<h1 className="text-lg font-semibold">
|
||||||
<h1 className="text-lg font-semibold">
|
{/* Removes file extension from filename */}
|
||||||
{/* Removes file extension from filename */}
|
{filename.replace(/\.[^/.]+$/, "")}
|
||||||
{filename.replace(/\.[^/.]+$/, "")}
|
</h1>
|
||||||
</h1>
|
</div>
|
||||||
</div>
|
<div className="flex flex-1">
|
||||||
<div className="flex flex-1">
|
<KnowledgeSearchInput />
|
||||||
<KnowledgeSearchInput />
|
{/* <div className="flex items-center pl-4 gap-2">
|
||||||
{/* <div className="flex items-center pl-4 gap-2">
|
|
||||||
<Checkbox
|
<Checkbox
|
||||||
id="selectAllChunks"
|
id="selectAllChunks"
|
||||||
checked={selectAll}
|
checked={selectAll}
|
||||||
|
|
@ -160,11 +154,12 @@ function ChunksPageContent() {
|
||||||
Select all
|
Select all
|
||||||
</Label>
|
</Label>
|
||||||
</div> */}
|
</div> */}
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Content Area - matches knowledge page structure */}
|
<div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
|
||||||
<div className="flex-1 overflow-auto pr-6">
|
{/* Content Area */}
|
||||||
|
<div className="row-start-2 lg:row-start-1">
|
||||||
{isFetching ? (
|
{isFetching ? (
|
||||||
<div className="flex items-center justify-center h-64">
|
<div className="flex items-center justify-center h-64">
|
||||||
<div className="text-center">
|
<div className="text-center">
|
||||||
|
|
@ -185,7 +180,7 @@ function ChunksPageContent() {
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="space-y-4 pb-6">
|
<div className="space-y-4 pb-6">
|
||||||
{chunksFilteredByQuery.map((chunk, index) => (
|
{chunks.map((chunk, index) => (
|
||||||
<div
|
<div
|
||||||
key={chunk.filename + index}
|
key={chunk.filename + index}
|
||||||
className="bg-muted rounded-lg p-4 border border-border/50"
|
className="bg-muted rounded-lg p-4 border border-border/50"
|
||||||
|
|
@ -242,31 +237,30 @@ function ChunksPageContent() {
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
{/* Right panel - Summary (TODO), Technical details, */}
|
||||||
{/* Right panel - Summary (TODO), Technical details, */}
|
{chunks.length > 0 && (
|
||||||
{chunks.length > 0 && (
|
<div className="min-w-[200px]">
|
||||||
<div className="w-[320px] py-20 px-2">
|
<div className="mb-8">
|
||||||
<div className="mb-8">
|
<h2 className="text-xl font-semibold mb-4">Technical details</h2>
|
||||||
<h2 className="text-xl font-semibold mt-3 mb-4">
|
<dl>
|
||||||
Technical details
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
</h2>
|
<dt className="text-sm/6 text-muted-foreground">
|
||||||
<dl>
|
Total chunks
|
||||||
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
</dt>
|
||||||
<dt className="text-sm/6 text-muted-foreground">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
Total chunks
|
{chunks.length}
|
||||||
</dt>
|
</dd>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
</div>
|
||||||
{chunks.length}
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
</dd>
|
<dt className="text-sm/6 text-muted-foreground">
|
||||||
</div>
|
Avg length
|
||||||
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
</dt>
|
||||||
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
{averageChunkLength.toFixed(0)} chars
|
||||||
{averageChunkLength.toFixed(0)} chars
|
</dd>
|
||||||
</dd>
|
</div>
|
||||||
</div>
|
{/* TODO: Uncomment after data is available */}
|
||||||
{/* TODO: Uncomment after data is available */}
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
|
||||||
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
|
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
</dd>
|
</dd>
|
||||||
|
|
@ -276,54 +270,55 @@ function ChunksPageContent() {
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
</dd>
|
</dd>
|
||||||
</div> */}
|
</div> */}
|
||||||
</dl>
|
</dl>
|
||||||
</div>
|
</div>
|
||||||
<div className="mb-8">
|
<div className="mb-4">
|
||||||
<h2 className="text-xl font-semibold mt-2 mb-3">
|
<h2 className="text-xl font-semibold mt-2 mb-3">
|
||||||
Original document
|
Original document
|
||||||
</h2>
|
</h2>
|
||||||
<dl>
|
<dl>
|
||||||
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
<dt className="text-sm/6 text-muted-foreground">Name</dt>
|
<dt className="text-sm/6 text-muted-foreground">Name</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
{fileData?.filename}
|
{fileData?.filename}
|
||||||
</dd>
|
</dd>
|
||||||
</div> */}
|
</div> */}
|
||||||
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
<dt className="text-sm/6 text-muted-foreground">Type</dt>
|
<dt className="text-sm/6 text-muted-foreground">Type</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
|
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
|
||||||
</dd>
|
</dd>
|
||||||
</div>
|
</div>
|
||||||
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
<dt className="text-sm/6 text-muted-foreground">Size</dt>
|
<dt className="text-sm/6 text-muted-foreground">Size</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
{fileData?.size
|
{fileData?.size
|
||||||
? `${Math.round(fileData.size / 1024)} KB`
|
? `${Math.round(fileData.size / 1024)} KB`
|
||||||
: "Unknown"}
|
: "Unknown"}
|
||||||
</dd>
|
</dd>
|
||||||
</div>
|
</div>
|
||||||
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
|
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
N/A
|
N/A
|
||||||
</dd>
|
</dd>
|
||||||
</div> */}
|
</div> */}
|
||||||
{/* TODO: Uncomment after data is available */}
|
{/* TODO: Uncomment after data is available */}
|
||||||
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
<dt className="text-sm/6 text-muted-foreground">Source</dt>
|
<dt className="text-sm/6 text-muted-foreground">Source</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
|
||||||
</div> */}
|
</div> */}
|
||||||
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
|
||||||
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
|
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
|
||||||
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
|
||||||
N/A
|
N/A
|
||||||
</dd>
|
</dd>
|
||||||
</div> */}
|
</div> */}
|
||||||
</dl>
|
</dl>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
)}
|
||||||
)}
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -85,6 +85,7 @@ interface Connector {
|
||||||
connectionId?: string;
|
connectionId?: string;
|
||||||
access_token?: string;
|
access_token?: string;
|
||||||
selectedFiles?: GoogleDriveFile[] | OneDriveFile[];
|
selectedFiles?: GoogleDriveFile[] | OneDriveFile[];
|
||||||
|
available?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface SyncResult {
|
interface SyncResult {
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ export default function UploadProviderPage() {
|
||||||
|
|
||||||
const handleFileSelected = (files: CloudFile[]) => {
|
const handleFileSelected = (files: CloudFile[]) => {
|
||||||
setSelectedFiles(files);
|
setSelectedFiles(files);
|
||||||
console.log(`Selected ${files.length} files from ${provider}:`, files);
|
console.log(`Selected ${files.length} item(s) from ${provider}:`, files);
|
||||||
// You can add additional handling here like triggering sync, etc.
|
// You can add additional handling here like triggering sync, etc.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -376,19 +376,19 @@ export default function UploadProviderPage() {
|
||||||
loading={isIngesting}
|
loading={isIngesting}
|
||||||
disabled={!hasSelectedFiles || isIngesting}
|
disabled={!hasSelectedFiles || isIngesting}
|
||||||
>
|
>
|
||||||
{!hasSelectedFiles ? (
|
{hasSelectedFiles ? (
|
||||||
<>Ingest files</>
|
|
||||||
) : (
|
|
||||||
<>
|
<>
|
||||||
Ingest {selectedFiles.length} file
|
Ingest {selectedFiles.length} item
|
||||||
{selectedFiles.length > 1 ? "s" : ""}
|
{selectedFiles.length > 1 ? "s" : ""}
|
||||||
</>
|
</>
|
||||||
|
) : (
|
||||||
|
<>Ingest selected items</>
|
||||||
)}
|
)}
|
||||||
</Button>
|
</Button>
|
||||||
</TooltipTrigger>
|
</TooltipTrigger>
|
||||||
{!hasSelectedFiles ? (
|
{!hasSelectedFiles ? (
|
||||||
<TooltipContent side="left">
|
<TooltipContent side="left">
|
||||||
Select at least one file before ingesting
|
Select at least one item before ingesting
|
||||||
</TooltipContent>
|
</TooltipContent>
|
||||||
) : null}
|
) : null}
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
|
|
||||||
|
|
@ -201,7 +201,7 @@ export function CloudConnectorsDialog({
|
||||||
<DialogHeader>
|
<DialogHeader>
|
||||||
<DialogTitle>Cloud File Connectors</DialogTitle>
|
<DialogTitle>Cloud File Connectors</DialogTitle>
|
||||||
<DialogDescription>
|
<DialogDescription>
|
||||||
Select files from your connected cloud storage providers
|
Select files or folders from your connected cloud storage providers
|
||||||
</DialogDescription>
|
</DialogDescription>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
|
|
||||||
|
|
@ -232,7 +232,7 @@ export function CloudConnectorsDialog({
|
||||||
!connector.hasAccessToken
|
!connector.hasAccessToken
|
||||||
? connector.accessTokenError ||
|
? connector.accessTokenError ||
|
||||||
"Access token required - try reconnecting your account"
|
"Access token required - try reconnecting your account"
|
||||||
: `Select files from ${connector.name}`
|
: `Select files or folders from ${connector.name}`
|
||||||
}
|
}
|
||||||
onClick={e => {
|
onClick={e => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
@ -283,6 +283,7 @@ export function CloudConnectorsDialog({
|
||||||
accessToken={connectorAccessTokens[connector.type]}
|
accessToken={connectorAccessTokens[connector.type]}
|
||||||
onPickerStateChange={() => {}}
|
onPickerStateChange={() => {}}
|
||||||
clientId={connector.clientId}
|
clientId={connector.clientId}
|
||||||
|
isIngesting={false}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ export const FileList = ({
|
||||||
return (
|
return (
|
||||||
<div className="space-y-2 relative">
|
<div className="space-y-2 relative">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<p className="text-sm font-medium">Added files ({files.length})</p>
|
<p className="text-sm font-medium">Selected items ({files.length})</p>
|
||||||
<Button
|
<Button
|
||||||
ignoreTitleCase={true}
|
ignoreTitleCase={true}
|
||||||
onClick={onClearAll}
|
onClick={onClearAll}
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ export const PickerHeader = ({
|
||||||
return (
|
return (
|
||||||
<div className="text-sm text-muted-foreground p-4 bg-muted/20 rounded-md">
|
<div className="text-sm text-muted-foreground p-4 bg-muted/20 rounded-md">
|
||||||
Please connect to {getProviderName(provider)} first to select specific
|
Please connect to {getProviderName(provider)} first to select specific
|
||||||
files.
|
files or folders.
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -48,7 +48,7 @@ export const PickerHeader = ({
|
||||||
<Card>
|
<Card>
|
||||||
<CardContent className="flex flex-col items-center text-center py-8">
|
<CardContent className="flex flex-col items-center text-center py-8">
|
||||||
<p className="text-sm text-primary mb-4">
|
<p className="text-sm text-primary mb-4">
|
||||||
Select files from {getProviderName(provider)} to ingest.
|
Select files or folders from {getProviderName(provider)} to ingest.
|
||||||
</p>
|
</p>
|
||||||
<Button
|
<Button
|
||||||
onClick={onAddFiles}
|
onClick={onAddFiles}
|
||||||
|
|
@ -56,7 +56,7 @@ export const PickerHeader = ({
|
||||||
className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
|
className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
|
||||||
>
|
>
|
||||||
<Plus className="h-4 w-4" />
|
<Plus className="h-4 w-4" />
|
||||||
{isPickerOpen ? "Opening picker..." : "Add files"}
|
{isPickerOpen ? "Opening picker..." : "Add files or folders"}
|
||||||
</Button>
|
</Button>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
|
||||||
|
|
@ -52,12 +52,16 @@ export class GoogleDriveHandler {
|
||||||
try {
|
try {
|
||||||
this.onPickerStateChange?.(true);
|
this.onPickerStateChange?.(true);
|
||||||
|
|
||||||
|
// Create a view for regular documents
|
||||||
|
const docsView = new window.google.picker.DocsView()
|
||||||
|
.setIncludeFolders(true)
|
||||||
|
.setSelectFolderEnabled(true);
|
||||||
|
|
||||||
const picker = new window.google.picker.PickerBuilder()
|
const picker = new window.google.picker.PickerBuilder()
|
||||||
.addView(window.google.picker.ViewId.DOCS)
|
.addView(docsView)
|
||||||
.addView(window.google.picker.ViewId.FOLDERS)
|
|
||||||
.setOAuthToken(this.accessToken)
|
.setOAuthToken(this.accessToken)
|
||||||
.enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED)
|
.enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED)
|
||||||
.setTitle("Select files from Google Drive")
|
.setTitle("Select files or folders from Google Drive")
|
||||||
.setCallback(data => this.pickerCallback(data, onFileSelected))
|
.setCallback(data => this.pickerCallback(data, onFileSelected))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ declare global {
|
||||||
load: (callback: () => void) => void;
|
load: (callback: () => void) => void;
|
||||||
};
|
};
|
||||||
PickerBuilder: new () => GooglePickerBuilder;
|
PickerBuilder: new () => GooglePickerBuilder;
|
||||||
|
DocsView: new () => GoogleDocsView;
|
||||||
ViewId: {
|
ViewId: {
|
||||||
DOCS: string;
|
DOCS: string;
|
||||||
FOLDERS: string;
|
FOLDERS: string;
|
||||||
|
|
@ -83,8 +84,13 @@ declare global {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface GoogleDocsView {
|
||||||
|
setIncludeFolders: (include: boolean) => GoogleDocsView;
|
||||||
|
setSelectFolderEnabled: (enabled: boolean) => GoogleDocsView;
|
||||||
|
}
|
||||||
|
|
||||||
export interface GooglePickerBuilder {
|
export interface GooglePickerBuilder {
|
||||||
addView: (view: string) => GooglePickerBuilder;
|
addView: (view: GoogleDocsView | string) => GooglePickerBuilder;
|
||||||
setOAuthToken: (token: string) => GooglePickerBuilder;
|
setOAuthToken: (token: string) => GooglePickerBuilder;
|
||||||
setCallback: (
|
setCallback: (
|
||||||
callback: (data: GooglePickerData) => void
|
callback: (data: GooglePickerData) => void
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ import {
|
||||||
import { useAuth } from "@/contexts/auth-context";
|
import { useAuth } from "@/contexts/auth-context";
|
||||||
|
|
||||||
// Task interface is now imported from useGetTasksQuery
|
// Task interface is now imported from useGetTasksQuery
|
||||||
|
export type { Task };
|
||||||
|
|
||||||
export interface TaskFile {
|
export interface TaskFile {
|
||||||
filename: string;
|
filename: string;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=61.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "openrag"
|
name = "openrag"
|
||||||
version = "0.1.14.dev3"
|
version = "0.1.19"
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|
@ -31,6 +35,9 @@ dependencies = [
|
||||||
"docling-serve>=1.4.1",
|
"docling-serve>=1.4.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
openrag = "tui.main:run_tui"
|
openrag = "tui.main:run_tui"
|
||||||
|
|
||||||
|
|
|
||||||
91
scripts/docling_ctl.py
Normal file
91
scripts/docling_ctl.py
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Helper script to control docling-serve using DoclingManager for CI/testing."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add src to path so we can import DoclingManager
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||||
|
|
||||||
|
from tui.managers.docling_manager import DoclingManager
|
||||||
|
|
||||||
|
|
||||||
|
async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
|
||||||
|
"""Start docling-serve."""
|
||||||
|
manager = DoclingManager()
|
||||||
|
|
||||||
|
if manager.is_running():
|
||||||
|
print(f"Docling-serve is already running")
|
||||||
|
status = manager.get_status()
|
||||||
|
print(f"Endpoint: {status['endpoint']}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
|
||||||
|
print(f"Starting docling-serve on {host_msg}...")
|
||||||
|
success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
print(f"{message}")
|
||||||
|
status = manager.get_status()
|
||||||
|
print(f"Endpoint: {status['endpoint']}")
|
||||||
|
print(f"PID: {status['pid']}")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"{message}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
async def stop_docling():
|
||||||
|
"""Stop docling-serve."""
|
||||||
|
manager = DoclingManager()
|
||||||
|
|
||||||
|
if not manager.is_running():
|
||||||
|
print("Docling-serve is not running")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
print("Stopping docling-serve...")
|
||||||
|
success, message = await manager.stop()
|
||||||
|
|
||||||
|
if success:
|
||||||
|
print(f"{message}")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"{message}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
async def status_docling():
|
||||||
|
"""Get docling-serve status."""
|
||||||
|
manager = DoclingManager()
|
||||||
|
status = manager.get_status()
|
||||||
|
|
||||||
|
print(f"Status: {status['status']}")
|
||||||
|
if status['status'] == 'running':
|
||||||
|
print(f"Endpoint: {status['endpoint']}")
|
||||||
|
print(f"Docs: {status['docs_url']}")
|
||||||
|
print(f"PID: {status['pid']}")
|
||||||
|
|
||||||
|
return 0 if status['status'] == 'running' else 1
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
|
||||||
|
parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
|
||||||
|
parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
|
||||||
|
parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
|
||||||
|
parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.command == "start":
|
||||||
|
return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
|
||||||
|
elif args.command == "stop":
|
||||||
|
return await stop_docling()
|
||||||
|
elif args.command == "status":
|
||||||
|
return await status_docling()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(asyncio.run(main()))
|
||||||
120
src/api/docling.py
Normal file
120
src/api/docling.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
"""Docling service proxy endpoints."""
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import struct
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from starlette.requests import Request
|
||||||
|
from starlette.responses import JSONResponse
|
||||||
|
|
||||||
|
from utils.container_utils import (
|
||||||
|
detect_container_environment,
|
||||||
|
get_container_host,
|
||||||
|
guess_host_ip_for_containers,
|
||||||
|
)
|
||||||
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_gateway_ip_from_route() -> str | None:
|
||||||
|
"""Return the default gateway IP visible from the current network namespace."""
|
||||||
|
try:
|
||||||
|
with Path("/proc/net/route").open() as route_table:
|
||||||
|
next(route_table) # Skip header
|
||||||
|
for line in route_table:
|
||||||
|
fields = line.strip().split()
|
||||||
|
min_fields = 3 # interface, destination, gateway
|
||||||
|
if len(fields) >= min_fields and fields[1] == "00000000":
|
||||||
|
gateway_hex = fields[2]
|
||||||
|
gw_int = int(gateway_hex, 16)
|
||||||
|
gateway_ip = socket.inet_ntoa(struct.pack("<L", gw_int))
|
||||||
|
return gateway_ip
|
||||||
|
except (FileNotFoundError, PermissionError, IndexError, ValueError) as err:
|
||||||
|
logger.warning("Could not read routing table: %s", err)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def determine_docling_host() -> str:
|
||||||
|
"""Determine the host address used for docling health checks."""
|
||||||
|
container_type = detect_container_environment()
|
||||||
|
if container_type:
|
||||||
|
# Try HOST_DOCKER_INTERNAL env var first
|
||||||
|
container_host = get_container_host()
|
||||||
|
if container_host:
|
||||||
|
logger.info("Using container-aware host '%s'", container_host)
|
||||||
|
return container_host
|
||||||
|
|
||||||
|
# Try special hostnames (Docker Desktop and rootless podman)
|
||||||
|
import socket
|
||||||
|
for hostname in ["host.docker.internal", "host.containers.internal"]:
|
||||||
|
try:
|
||||||
|
socket.getaddrinfo(hostname, None)
|
||||||
|
logger.info("Using %s for container-to-host communication", hostname)
|
||||||
|
return hostname
|
||||||
|
except socket.gaierror:
|
||||||
|
logger.debug("%s not available", hostname)
|
||||||
|
|
||||||
|
# Try gateway IP detection (Docker on Linux)
|
||||||
|
gateway_ip = _get_gateway_ip_from_route()
|
||||||
|
if gateway_ip:
|
||||||
|
logger.info("Detected host gateway IP: %s", gateway_ip)
|
||||||
|
return gateway_ip
|
||||||
|
|
||||||
|
# Fallback to bridge IP
|
||||||
|
fallback_ip = guess_host_ip_for_containers(logger=logger)
|
||||||
|
logger.info("Falling back to container bridge host %s", fallback_ip)
|
||||||
|
return fallback_ip
|
||||||
|
|
||||||
|
# Running outside a container
|
||||||
|
logger.info("Running outside a container; using localhost")
|
||||||
|
return "localhost"
|
||||||
|
|
||||||
|
|
||||||
|
# Detect the host IP once at startup
|
||||||
|
HOST_IP = determine_docling_host()
|
||||||
|
DOCLING_SERVICE_URL = f"http://{HOST_IP}:5001"
|
||||||
|
|
||||||
|
|
||||||
|
async def health(request: Request) -> JSONResponse:
|
||||||
|
"""
|
||||||
|
Proxy health check to docling-serve.
|
||||||
|
This allows the frontend to check docling status via same-origin request.
|
||||||
|
"""
|
||||||
|
health_url = f"{DOCLING_SERVICE_URL}/health"
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(
|
||||||
|
health_url,
|
||||||
|
timeout=2.0
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
return JSONResponse({
|
||||||
|
"status": "healthy",
|
||||||
|
"host": HOST_IP
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
logger.warning("Docling health check failed", url=health_url, status_code=response.status_code)
|
||||||
|
return JSONResponse({
|
||||||
|
"status": "unhealthy",
|
||||||
|
"message": f"Health check failed with status: {response.status_code}",
|
||||||
|
"host": HOST_IP
|
||||||
|
}, status_code=503)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning("Docling health check timeout", url=health_url)
|
||||||
|
return JSONResponse({
|
||||||
|
"status": "unhealthy",
|
||||||
|
"message": "Connection timeout",
|
||||||
|
"host": HOST_IP
|
||||||
|
}, status_code=503)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Docling health check failed", url=health_url, error=str(e))
|
||||||
|
return JSONResponse({
|
||||||
|
"status": "unhealthy",
|
||||||
|
"message": str(e),
|
||||||
|
"host": HOST_IP
|
||||||
|
}, status_code=503)
|
||||||
|
|
@ -28,7 +28,6 @@ def require_auth(session_manager):
|
||||||
async def wrapper(request: Request):
|
async def wrapper(request: Request):
|
||||||
# In no-auth mode, bypass authentication entirely
|
# In no-auth mode, bypass authentication entirely
|
||||||
if is_no_auth_mode():
|
if is_no_auth_mode():
|
||||||
logger.debug("No-auth mode: Creating anonymous user")
|
|
||||||
# Create an anonymous user object so endpoints don't break
|
# Create an anonymous user object so endpoints don't break
|
||||||
from session_manager import User
|
from session_manager import User
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -36,7 +35,6 @@ def require_auth(session_manager):
|
||||||
from session_manager import AnonymousUser
|
from session_manager import AnonymousUser
|
||||||
request.state.user = AnonymousUser()
|
request.state.user = AnonymousUser()
|
||||||
request.state.jwt_token = None # No JWT in no-auth mode
|
request.state.jwt_token = None # No JWT in no-auth mode
|
||||||
logger.debug("Set user_id=anonymous, jwt_token=None")
|
|
||||||
return await handler(request)
|
return await handler(request)
|
||||||
|
|
||||||
user = get_current_user(request, session_manager)
|
user = get_current_user(request, session_manager)
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
|
||||||
from utils.document_processing import create_document_converter
|
from utils.document_processing import create_document_converter
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv(override=False)
|
||||||
load_dotenv("../")
|
load_dotenv("../", override=False)
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
|
||||||
def is_no_auth_mode():
|
def is_no_auth_mode():
|
||||||
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
|
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
|
||||||
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
|
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
|
||||||
logger.debug(
|
|
||||||
"Checking auth mode",
|
|
||||||
no_auth_mode=result,
|
|
||||||
has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
|
|
||||||
has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
|
|
||||||
)
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,20 @@
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
|
||||||
import time
|
import time
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict, List, Any, Optional, Iterable, Set
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterable, List, Optional, Set
|
||||||
|
|
||||||
from googleapiclient.errors import HttpError
|
from googleapiclient.errors import HttpError
|
||||||
from googleapiclient.http import MediaIoBaseDownload
|
from googleapiclient.http import MediaIoBaseDownload
|
||||||
|
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
# Project-specific base types (adjust imports to your project)
|
|
||||||
from ..base import BaseConnector, ConnectorDocument, DocumentACL
|
from ..base import BaseConnector, ConnectorDocument, DocumentACL
|
||||||
from .oauth import GoogleDriveOAuth
|
from .oauth import GoogleDriveOAuth
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
# Config model
|
# Config model
|
||||||
|
|
@ -32,8 +31,8 @@ class GoogleDriveConfig:
|
||||||
recursive: bool = True
|
recursive: bool = True
|
||||||
|
|
||||||
# Shared Drives control
|
# Shared Drives control
|
||||||
drive_id: Optional[str] = None # when set, we use corpora='drive'
|
drive_id: Optional[str] = None # when set, we use corpora='drive'
|
||||||
corpora: Optional[str] = None # 'user' | 'drive' | 'domain'; auto-picked if None
|
corpora: Optional[str] = None # 'user' | 'drive' | 'domain'; auto-picked if None
|
||||||
|
|
||||||
# Optional filtering
|
# Optional filtering
|
||||||
include_mime_types: Optional[List[str]] = None
|
include_mime_types: Optional[List[str]] = None
|
||||||
|
|
@ -80,7 +79,6 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
_FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files")
|
_FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files")
|
||||||
_FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders")
|
_FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders")
|
||||||
|
|
||||||
|
|
||||||
def emit(self, doc: ConnectorDocument) -> None:
|
def emit(self, doc: ConnectorDocument) -> None:
|
||||||
"""
|
"""
|
||||||
Emit a ConnectorDocument instance.
|
Emit a ConnectorDocument instance.
|
||||||
|
|
@ -100,7 +98,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
|
|
||||||
# Token file default (so callback & workers don’t need to pass it)
|
# Token file default (so callback & workers don’t need to pass it)
|
||||||
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||||
token_file = config.get("token_file") or str(project_root / "google_drive_token.json")
|
token_file = config.get("token_file") or str(
|
||||||
|
project_root / "google_drive_token.json"
|
||||||
|
)
|
||||||
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
|
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if not isinstance(client_id, str) or not client_id.strip():
|
if not isinstance(client_id, str) or not client_id.strip():
|
||||||
|
|
@ -115,7 +115,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Normalize incoming IDs from any of the supported alias keys
|
# Normalize incoming IDs from any of the supported alias keys
|
||||||
def _first_present_list(cfg: Dict[str, Any], keys: Iterable[str]) -> Optional[List[str]]:
|
def _first_present_list(
|
||||||
|
cfg: Dict[str, Any], keys: Iterable[str]
|
||||||
|
) -> Optional[List[str]]:
|
||||||
for k in keys:
|
for k in keys:
|
||||||
v = cfg.get(k)
|
v = cfg.get(k)
|
||||||
if v: # accept non-empty list
|
if v: # accept non-empty list
|
||||||
|
|
@ -151,6 +153,7 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
|
|
||||||
# Drive client is built in authenticate()
|
# Drive client is built in authenticate()
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
|
|
||||||
self.creds: Optional[Credentials] = None
|
self.creds: Optional[Credentials] = None
|
||||||
self.service: Any = None
|
self.service: Any = None
|
||||||
|
|
||||||
|
|
@ -214,7 +217,7 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
"id, name, mimeType, modifiedTime, createdTime, size, "
|
"id, name, mimeType, modifiedTime, createdTime, size, "
|
||||||
"webViewLink, parents, owners, driveId"
|
"webViewLink, parents, owners, driveId"
|
||||||
),
|
),
|
||||||
**self._drives_flags,
|
**self._drives_get_flags,
|
||||||
)
|
)
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
|
|
@ -285,7 +288,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
Fetch metadata for a file by ID (resolving shortcuts).
|
Fetch metadata for a file by ID (resolving shortcuts).
|
||||||
"""
|
"""
|
||||||
if self.service is None:
|
if self.service is None:
|
||||||
raise RuntimeError("Google Drive service is not initialized. Please authenticate first.")
|
raise RuntimeError(
|
||||||
|
"Google Drive service is not initialized. Please authenticate first."
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
meta = (
|
meta = (
|
||||||
self.service.files()
|
self.service.files()
|
||||||
|
|
@ -323,24 +328,40 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
def _iter_selected_items(self) -> List[Dict[str, Any]]:
|
def _iter_selected_items(self) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Return a de-duplicated list of file metadata for the selected scope:
|
Return a de-duplicated list of file metadata for the selected scope:
|
||||||
- explicit file_ids
|
- explicit file_ids (automatically expands folders to their contents)
|
||||||
- items inside folder_ids (with optional recursion)
|
- items inside folder_ids (with optional recursion)
|
||||||
Shortcuts are resolved to their targets automatically.
|
Shortcuts are resolved to their targets automatically.
|
||||||
"""
|
"""
|
||||||
seen: Set[str] = set()
|
seen: Set[str] = set()
|
||||||
items: List[Dict[str, Any]] = []
|
items: List[Dict[str, Any]] = []
|
||||||
|
folders_to_expand: List[str] = []
|
||||||
|
|
||||||
# Explicit files
|
# Process file_ids: separate actual files from folders
|
||||||
if self.cfg.file_ids:
|
if self.cfg.file_ids:
|
||||||
for fid in self.cfg.file_ids:
|
for fid in self.cfg.file_ids:
|
||||||
meta = self._get_file_meta_by_id(fid)
|
meta = self._get_file_meta_by_id(fid)
|
||||||
if meta and meta["id"] not in seen:
|
if not meta:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If it's a folder, add to folders_to_expand instead
|
||||||
|
if meta.get("mimeType") == "application/vnd.google-apps.folder":
|
||||||
|
logger.debug(
|
||||||
|
f"Item {fid} ({meta.get('name')}) is a folder, "
|
||||||
|
f"will expand to contents"
|
||||||
|
)
|
||||||
|
folders_to_expand.append(fid)
|
||||||
|
elif meta["id"] not in seen:
|
||||||
|
# It's a regular file, add it directly
|
||||||
seen.add(meta["id"])
|
seen.add(meta["id"])
|
||||||
items.append(meta)
|
items.append(meta)
|
||||||
|
|
||||||
# Folders
|
# Collect all folders to expand (from both file_ids and folder_ids)
|
||||||
if self.cfg.folder_ids:
|
if self.cfg.folder_ids:
|
||||||
folder_children = self._bfs_expand_folders(self.cfg.folder_ids)
|
folders_to_expand.extend(self.cfg.folder_ids)
|
||||||
|
|
||||||
|
# Expand all folders to their contents
|
||||||
|
if folders_to_expand:
|
||||||
|
folder_children = self._bfs_expand_folders(folders_to_expand)
|
||||||
for meta in folder_children:
|
for meta in folder_children:
|
||||||
meta = self._resolve_shortcut(meta)
|
meta = self._resolve_shortcut(meta)
|
||||||
if meta.get("id") in seen:
|
if meta.get("id") in seen:
|
||||||
|
|
@ -357,7 +378,11 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
|
|
||||||
items = self._filter_by_mime(items)
|
items = self._filter_by_mime(items)
|
||||||
# Exclude folders from final emits:
|
# Exclude folders from final emits:
|
||||||
items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"]
|
items = [
|
||||||
|
m
|
||||||
|
for m in items
|
||||||
|
if m.get("mimeType") != "application/vnd.google-apps.folder"
|
||||||
|
]
|
||||||
return items
|
return items
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
|
|
@ -389,29 +414,85 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes:
|
def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes:
|
||||||
"""
|
"""
|
||||||
Download bytes for a given file (exporting if Google-native).
|
Download bytes for a given file (exporting if Google-native).
|
||||||
|
Raises ValueError if the item is a folder (folders cannot be downloaded).
|
||||||
"""
|
"""
|
||||||
file_id = file_meta["id"]
|
file_id = file_meta["id"]
|
||||||
|
file_name = file_meta.get("name", "unknown")
|
||||||
mime_type = file_meta.get("mimeType") or ""
|
mime_type = file_meta.get("mimeType") or ""
|
||||||
|
|
||||||
# Google-native: export
|
logger.debug(
|
||||||
export_mime = self._pick_export_mime(mime_type)
|
f"Downloading file {file_id} ({file_name}) with mimetype: {mime_type}"
|
||||||
if mime_type.startswith("application/vnd.google-apps."):
|
)
|
||||||
# default fallback if not overridden
|
|
||||||
#if not export_mime:
|
# Folders cannot be downloaded or exported - this should never be reached
|
||||||
# export_mime = "application/pdf"
|
# as folders are automatically expanded in _iter_selected_items()
|
||||||
export_mime = "application/pdf"
|
if mime_type == "application/vnd.google-apps.folder":
|
||||||
|
raise ValueError(
|
||||||
|
f"Cannot download folder {file_id} ({file_name}). "
|
||||||
|
f"This is a bug - folders should be automatically expanded before download."
|
||||||
|
)
|
||||||
|
|
||||||
|
# According to https://stackoverflow.com/questions/65053558/google-drive-api-v3-files-export-method-throws-a-403-error-export-only-support
|
||||||
|
# export_media ONLY works for Google Docs Editors files (Docs, Sheets, Slides, Drawings)
|
||||||
|
# All other files (including other Google Apps types like Forms, Sites, Maps) must use get_media
|
||||||
|
|
||||||
|
# Define which Google Workspace files are exportable
|
||||||
|
exportable_types = {
|
||||||
|
"application/vnd.google-apps.document", # Google Docs
|
||||||
|
"application/vnd.google-apps.spreadsheet", # Google Sheets
|
||||||
|
"application/vnd.google-apps.presentation", # Google Slides
|
||||||
|
"application/vnd.google-apps.drawing", # Google Drawings
|
||||||
|
}
|
||||||
|
|
||||||
|
if mime_type in exportable_types:
|
||||||
|
# This is an exportable Google Workspace file - must use export_media
|
||||||
|
export_mime = self._pick_export_mime(mime_type)
|
||||||
|
if not export_mime:
|
||||||
|
# Default fallback for unsupported Google native types
|
||||||
|
export_mime = "application/pdf"
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Using export_media for {file_id} ({mime_type} -> {export_mime})"
|
||||||
|
)
|
||||||
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
||||||
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
request = self.service.files().export_media(
|
||||||
|
fileId=file_id, mimeType=export_mime
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
|
# This is a regular uploaded file (PDF, image, video, etc.) - use get_media
|
||||||
|
# Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
|
||||||
|
logger.debug(f"Using get_media for {file_id} ({mime_type})")
|
||||||
# Binary download (get_media also doesn't accept the Drive flags)
|
# Binary download (get_media also doesn't accept the Drive flags)
|
||||||
request = self.service.files().get_media(fileId=file_id)
|
request = self.service.files().get_media(fileId=file_id)
|
||||||
|
|
||||||
|
# Download the file with error handling for misclassified Google Docs
|
||||||
fh = io.BytesIO()
|
fh = io.BytesIO()
|
||||||
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
|
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
|
||||||
done = False
|
done = False
|
||||||
while not done:
|
|
||||||
status, done = downloader.next_chunk()
|
try:
|
||||||
# Optional: you can log progress via status.progress()
|
while not done:
|
||||||
|
status, done = downloader.next_chunk()
|
||||||
|
# Optional: you can log progress via status.progress()
|
||||||
|
except HttpError as e:
|
||||||
|
# If download fails with "fileNotDownloadable", it's a Docs Editor file
|
||||||
|
# that wasn't properly detected. Retry with export_media.
|
||||||
|
if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
|
||||||
|
logger.warning(
|
||||||
|
f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
|
||||||
|
f"Retrying with export_media (file might be a Google Doc)"
|
||||||
|
)
|
||||||
|
export_mime = "application/pdf"
|
||||||
|
request = self.service.files().export_media(
|
||||||
|
fileId=file_id, mimeType=export_mime
|
||||||
|
)
|
||||||
|
fh = io.BytesIO()
|
||||||
|
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
|
||||||
|
done = False
|
||||||
|
while not done:
|
||||||
|
status, done = downloader.next_chunk()
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
return fh.getvalue()
|
return fh.getvalue()
|
||||||
|
|
||||||
|
|
@ -430,7 +511,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
|
|
||||||
# If still not authenticated, bail (caller should kick off OAuth init)
|
# If still not authenticated, bail (caller should kick off OAuth init)
|
||||||
if not await self.oauth.is_authenticated():
|
if not await self.oauth.is_authenticated():
|
||||||
logger.debug("authenticate: no valid credentials; run OAuth init/callback first.")
|
logger.debug(
|
||||||
|
"authenticate: no valid credentials; run OAuth init/callback first."
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Build Drive service from OAuth helper
|
# Build Drive service from OAuth helper
|
||||||
|
|
@ -450,7 +533,7 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
self,
|
self,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
max_files: Optional[int] = None,
|
max_files: Optional[int] = None,
|
||||||
**kwargs
|
**kwargs,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
List files in the currently selected scope (file_ids/folder_ids/recursive).
|
List files in the currently selected scope (file_ids/folder_ids/recursive).
|
||||||
|
|
@ -487,11 +570,20 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
async def get_file_content(self, file_id: str) -> ConnectorDocument:
|
async def get_file_content(self, file_id: str) -> ConnectorDocument:
|
||||||
"""
|
"""
|
||||||
Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument.
|
Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument.
|
||||||
|
Raises FileNotFoundError if the ID is a folder (folders cannot be downloaded).
|
||||||
"""
|
"""
|
||||||
meta = self._get_file_meta_by_id(file_id)
|
meta = self._get_file_meta_by_id(file_id)
|
||||||
if not meta:
|
if not meta:
|
||||||
raise FileNotFoundError(f"Google Drive file not found: {file_id}")
|
raise FileNotFoundError(f"Google Drive file not found: {file_id}")
|
||||||
|
|
||||||
|
# Check if this is a folder - folders cannot be downloaded
|
||||||
|
if meta.get("mimeType") == "application/vnd.google-apps.folder":
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"Cannot download folder {file_id} ({meta.get('name')}). "
|
||||||
|
f"Folders must be expanded to list their contents. "
|
||||||
|
f"This ID should not have been passed to get_file_content()."
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
blob = self._download_file_bytes(meta)
|
blob = self._download_file_bytes(meta)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -527,7 +619,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
metadata={
|
metadata={
|
||||||
"parents": meta.get("parents"),
|
"parents": meta.get("parents"),
|
||||||
"driveId": meta.get("driveId"),
|
"driveId": meta.get("driveId"),
|
||||||
"size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
|
"size": int(meta.get("size", 0))
|
||||||
|
if str(meta.get("size", "")).isdigit()
|
||||||
|
else None,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return doc
|
return doc
|
||||||
|
|
@ -546,10 +640,14 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
# 1) Ensure we are authenticated and have a live Drive service
|
# 1) Ensure we are authenticated and have a live Drive service
|
||||||
ok = await self.authenticate()
|
ok = await self.authenticate()
|
||||||
if not ok:
|
if not ok:
|
||||||
raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated")
|
raise RuntimeError(
|
||||||
|
"GoogleDriveConnector.setup_subscription: not authenticated"
|
||||||
|
)
|
||||||
|
|
||||||
# 2) Resolve webhook address (no param in ABC, so pull from config/env)
|
# 2) Resolve webhook address (no param in ABC, so pull from config/env)
|
||||||
webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv("GOOGLE_DRIVE_WEBHOOK_URL")
|
webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv(
|
||||||
|
"GOOGLE_DRIVE_WEBHOOK_URL"
|
||||||
|
)
|
||||||
if not webhook_address:
|
if not webhook_address:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"GoogleDriveConnector.setup_subscription: webhook URL not configured. "
|
"GoogleDriveConnector.setup_subscription: webhook URL not configured. "
|
||||||
|
|
@ -600,7 +698,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
}
|
}
|
||||||
|
|
||||||
if not isinstance(channel_id, str) or not channel_id:
|
if not isinstance(channel_id, str) or not channel_id:
|
||||||
raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}")
|
raise RuntimeError(
|
||||||
|
f"Drive watch returned invalid channel id: {channel_id!r}"
|
||||||
|
)
|
||||||
|
|
||||||
return channel_id
|
return channel_id
|
||||||
|
|
||||||
|
|
@ -665,13 +765,20 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.service.channels().stop(body={"id": subscription_id, "resourceId": resource_id}).execute()
|
self.service.channels().stop(
|
||||||
|
body={"id": subscription_id, "resourceId": resource_id}
|
||||||
|
).execute()
|
||||||
|
|
||||||
# 4) Clear local bookkeeping
|
# 4) Clear local bookkeeping
|
||||||
if getattr(self, "_active_channel", None) and self._active_channel.get("channel_id") == subscription_id:
|
if (
|
||||||
|
getattr(self, "_active_channel", None)
|
||||||
|
and self._active_channel.get("channel_id") == subscription_id
|
||||||
|
):
|
||||||
self._active_channel = {}
|
self._active_channel = {}
|
||||||
|
|
||||||
if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict):
|
if hasattr(self, "_subscriptions") and isinstance(
|
||||||
|
self._subscriptions, dict
|
||||||
|
):
|
||||||
self._subscriptions.pop(subscription_id, None)
|
self._subscriptions.pop(subscription_id, None)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
@ -722,7 +829,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
selected_ids = set()
|
selected_ids = set()
|
||||||
try:
|
try:
|
||||||
logger.error(f"handle_webhook: scope build failed, proceeding unfiltered: {e}")
|
logger.error(
|
||||||
|
f"handle_webhook: scope build failed, proceeding unfiltered: {e}"
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -759,7 +868,11 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
# Filter to our selected scope if we have one; otherwise accept all
|
# Filter to our selected scope if we have one; otherwise accept all
|
||||||
if selected_ids and (rid not in selected_ids):
|
if selected_ids and (rid not in selected_ids):
|
||||||
# Shortcut target might be in scope even if the shortcut isn't
|
# Shortcut target might be in scope even if the shortcut isn't
|
||||||
tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None
|
tgt = (
|
||||||
|
fobj.get("shortcutDetails", {}).get("targetId")
|
||||||
|
if fobj
|
||||||
|
else None
|
||||||
|
)
|
||||||
if not (tgt and tgt in selected_ids):
|
if not (tgt and tgt in selected_ids):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -808,7 +921,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
blob = self._download_file_bytes(meta)
|
blob = self._download_file_bytes(meta)
|
||||||
except HttpError as e:
|
except HttpError as e:
|
||||||
# Skip/record failures
|
# Skip/record failures
|
||||||
logger.error(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}")
|
logger.error(
|
||||||
|
f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -838,7 +953,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
"webViewLink": meta.get("webViewLink"),
|
"webViewLink": meta.get("webViewLink"),
|
||||||
"parents": meta.get("parents"),
|
"parents": meta.get("parents"),
|
||||||
"driveId": meta.get("driveId"),
|
"driveId": meta.get("driveId"),
|
||||||
"size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
|
"size": int(meta.get("size", 0))
|
||||||
|
if str(meta.get("size", "")).isdigit()
|
||||||
|
else None,
|
||||||
},
|
},
|
||||||
content=blob,
|
content=blob,
|
||||||
)
|
)
|
||||||
|
|
@ -849,7 +966,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
# -------------------------
|
# -------------------------
|
||||||
def get_start_page_token(self) -> str:
|
def get_start_page_token(self) -> str:
|
||||||
# getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
|
# getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
|
||||||
resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
|
resp = (
|
||||||
|
self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
|
||||||
|
)
|
||||||
return resp["startPageToken"]
|
return resp["startPageToken"]
|
||||||
|
|
||||||
def poll_changes_and_sync(self) -> Optional[str]:
|
def poll_changes_and_sync(self) -> Optional[str]:
|
||||||
|
|
@ -888,7 +1007,10 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
# Match scope
|
# Match scope
|
||||||
if fid not in selected_ids:
|
if fid not in selected_ids:
|
||||||
# also consider shortcut target
|
# also consider shortcut target
|
||||||
if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut":
|
if (
|
||||||
|
file_obj.get("mimeType")
|
||||||
|
== "application/vnd.google-apps.shortcut"
|
||||||
|
):
|
||||||
tgt = file_obj.get("shortcutDetails", {}).get("targetId")
|
tgt = file_obj.get("shortcutDetails", {}).get("targetId")
|
||||||
if tgt and tgt in selected_ids:
|
if tgt and tgt in selected_ids:
|
||||||
pass
|
pass
|
||||||
|
|
@ -923,7 +1045,10 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
modified_time=parse_datetime(resolved.get("modifiedTime")),
|
modified_time=parse_datetime(resolved.get("modifiedTime")),
|
||||||
mimetype=str(resolved.get("mimeType", "")),
|
mimetype=str(resolved.get("mimeType", "")),
|
||||||
acl=DocumentACL(), # Set appropriate ACL if needed
|
acl=DocumentACL(), # Set appropriate ACL if needed
|
||||||
metadata={"parents": resolved.get("parents"), "driveId": resolved.get("driveId")},
|
metadata={
|
||||||
|
"parents": resolved.get("parents"),
|
||||||
|
"driveId": resolved.get("driveId"),
|
||||||
|
},
|
||||||
content=blob,
|
content=blob,
|
||||||
)
|
)
|
||||||
self.emit(doc)
|
self.emit(doc)
|
||||||
|
|
@ -945,7 +1070,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
# -------------------------
|
# -------------------------
|
||||||
# Optional: webhook stubs
|
# Optional: webhook stubs
|
||||||
# -------------------------
|
# -------------------------
|
||||||
def build_watch_body(self, webhook_address: str, channel_id: Optional[str] = None) -> Dict[str, Any]:
|
def build_watch_body(
|
||||||
|
self, webhook_address: str, channel_id: Optional[str] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Prepare the request body for changes.watch if you use webhooks.
|
Prepare the request body for changes.watch if you use webhooks.
|
||||||
"""
|
"""
|
||||||
|
|
@ -964,7 +1091,7 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
body = self.build_watch_body(webhook_address)
|
body = self.build_watch_body(webhook_address)
|
||||||
result = (
|
result = (
|
||||||
self.service.changes()
|
self.service.changes()
|
||||||
.watch(pageToken=page_token, body=body, **self._drives_flags)
|
.watch(pageToken=page_token, body=body, **self._drives_get_flags)
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
@ -974,7 +1101,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
Stop a previously started webhook watch.
|
Stop a previously started webhook watch.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.service.channels().stop(body={"id": channel_id, "resourceId": resource_id}).execute()
|
self.service.channels().stop(
|
||||||
|
body={"id": channel_id, "resourceId": resource_id}
|
||||||
|
).execute()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except HttpError as e:
|
except HttpError as e:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
# Create custom processor for connector files using Langflow
|
# Create custom processor for connector files using Langflow
|
||||||
|
|
@ -60,14 +58,14 @@ class LangflowConnectorService:
|
||||||
# Create temporary file from document content
|
# Create temporary file from document content
|
||||||
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
|
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
|
||||||
# Write document content to temp file
|
# Write document content to temp file
|
||||||
with open(tmp_path, 'wb') as f:
|
with open(tmp_path, "wb") as f:
|
||||||
f.write(document.content)
|
f.write(document.content)
|
||||||
|
|
||||||
# Step 1: Upload file to Langflow
|
# Step 1: Upload file to Langflow
|
||||||
logger.debug("Uploading file to Langflow", filename=document.filename)
|
logger.debug("Uploading file to Langflow", filename=document.filename)
|
||||||
content = document.content
|
content = document.content
|
||||||
file_tuple = (
|
file_tuple = (
|
||||||
document.filename.replace(" ", "_").replace("/", "_")+suffix,
|
document.filename.replace(" ", "_").replace("/", "_") + suffix,
|
||||||
content,
|
content,
|
||||||
document.mimetype or "application/octet-stream",
|
document.mimetype or "application/octet-stream",
|
||||||
)
|
)
|
||||||
|
|
@ -256,7 +254,10 @@ class LangflowConnectorService:
|
||||||
file_ids: List[str],
|
file_ids: List[str],
|
||||||
jwt_token: str = None,
|
jwt_token: str = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Sync specific files by their IDs using Langflow processing"""
|
"""
|
||||||
|
Sync specific files by their IDs using Langflow processing.
|
||||||
|
Automatically expands folders to their contents.
|
||||||
|
"""
|
||||||
if not self.task_service:
|
if not self.task_service:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"TaskService not available - connector sync requires task service dependency"
|
"TaskService not available - connector sync requires task service dependency"
|
||||||
|
|
@ -279,10 +280,50 @@ class LangflowConnectorService:
|
||||||
owner_name = user.name if user else None
|
owner_name = user.name if user else None
|
||||||
owner_email = user.email if user else None
|
owner_email = user.email if user else None
|
||||||
|
|
||||||
|
# Temporarily set file_ids in the connector's config so list_files() can use them
|
||||||
|
# Store the original values to restore later
|
||||||
|
cfg = getattr(connector, "cfg", None)
|
||||||
|
original_file_ids = None
|
||||||
|
original_folder_ids = None
|
||||||
|
|
||||||
|
if cfg is not None:
|
||||||
|
original_file_ids = getattr(cfg, "file_ids", None)
|
||||||
|
original_folder_ids = getattr(cfg, "folder_ids", None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Set the file_ids we want to sync in the connector's config
|
||||||
|
if cfg is not None:
|
||||||
|
cfg.file_ids = file_ids # type: ignore
|
||||||
|
cfg.folder_ids = None # type: ignore
|
||||||
|
|
||||||
|
# Get the expanded list of file IDs (folders will be expanded to their contents)
|
||||||
|
# This uses the connector's list_files() which calls _iter_selected_items()
|
||||||
|
result = await connector.list_files()
|
||||||
|
expanded_file_ids = [f["id"] for f in result.get("files", [])]
|
||||||
|
|
||||||
|
if not expanded_file_ids:
|
||||||
|
logger.warning(
|
||||||
|
f"No files found after expanding file_ids. "
|
||||||
|
f"Original IDs: {file_ids}. This may indicate all IDs were folders "
|
||||||
|
f"with no contents, or files that were filtered out."
|
||||||
|
)
|
||||||
|
# Return empty task rather than failing
|
||||||
|
raise ValueError("No files to sync after expanding folders")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to expand file_ids via list_files(): {e}")
|
||||||
|
# Fallback to original file_ids if expansion fails
|
||||||
|
expanded_file_ids = file_ids
|
||||||
|
finally:
|
||||||
|
# Restore original config values
|
||||||
|
if cfg is not None:
|
||||||
|
cfg.file_ids = original_file_ids # type: ignore
|
||||||
|
cfg.folder_ids = original_folder_ids # type: ignore
|
||||||
|
|
||||||
processor = LangflowConnectorFileProcessor(
|
processor = LangflowConnectorFileProcessor(
|
||||||
self,
|
self,
|
||||||
connection_id,
|
connection_id,
|
||||||
file_ids,
|
expanded_file_ids,
|
||||||
user_id,
|
user_id,
|
||||||
jwt_token=jwt_token,
|
jwt_token=jwt_token,
|
||||||
owner_name=owner_name,
|
owner_name=owner_name,
|
||||||
|
|
@ -291,7 +332,7 @@ class LangflowConnectorService:
|
||||||
|
|
||||||
# Create custom task using TaskService
|
# Create custom task using TaskService
|
||||||
task_id = await self.task_service.create_custom_task(
|
task_id = await self.task_service.create_custom_task(
|
||||||
user_id, file_ids, processor
|
user_id, expanded_file_ids, processor
|
||||||
)
|
)
|
||||||
|
|
||||||
return task_id
|
return task_id
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,11 @@
|
||||||
import tempfile
|
from typing import Any, Dict, List, Optional
|
||||||
import os
|
|
||||||
from typing import Dict, Any, List, Optional
|
|
||||||
|
|
||||||
from .base import BaseConnector, ConnectorDocument
|
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
from .base import BaseConnector, ConnectorDocument
|
||||||
from .google_drive import GoogleDriveConnector
|
|
||||||
from .sharepoint import SharePointConnector
|
|
||||||
from .onedrive import OneDriveConnector
|
|
||||||
from .connection_manager import ConnectionManager
|
from .connection_manager import ConnectionManager
|
||||||
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -56,9 +51,11 @@ class ConnectorService:
|
||||||
# Create temporary file from document content
|
# Create temporary file from document content
|
||||||
from utils.file_utils import auto_cleanup_tempfile
|
from utils.file_utils import auto_cleanup_tempfile
|
||||||
|
|
||||||
with auto_cleanup_tempfile(suffix=self._get_file_extension(document.mimetype)) as tmp_path:
|
with auto_cleanup_tempfile(
|
||||||
|
suffix=self._get_file_extension(document.mimetype)
|
||||||
|
) as tmp_path:
|
||||||
# Write document content to temp file
|
# Write document content to temp file
|
||||||
with open(tmp_path, 'wb') as f:
|
with open(tmp_path, "wb") as f:
|
||||||
f.write(document.content)
|
f.write(document.content)
|
||||||
|
|
||||||
# Use existing process_file_common function with connector document metadata
|
# Use existing process_file_common function with connector document metadata
|
||||||
|
|
@ -71,6 +68,7 @@ class ConnectorService:
|
||||||
|
|
||||||
# Process using consolidated processing pipeline
|
# Process using consolidated processing pipeline
|
||||||
from models.processors import TaskProcessor
|
from models.processors import TaskProcessor
|
||||||
|
|
||||||
processor = TaskProcessor(document_service=doc_service)
|
processor = TaskProcessor(document_service=doc_service)
|
||||||
result = await processor.process_document_standard(
|
result = await processor.process_document_standard(
|
||||||
file_path=tmp_path,
|
file_path=tmp_path,
|
||||||
|
|
@ -301,7 +299,10 @@ class ConnectorService:
|
||||||
file_ids: List[str],
|
file_ids: List[str],
|
||||||
jwt_token: str = None,
|
jwt_token: str = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Sync specific files by their IDs (used for webhook-triggered syncs)"""
|
"""
|
||||||
|
Sync specific files by their IDs (used for webhook-triggered syncs or manual selection).
|
||||||
|
Automatically expands folders to their contents.
|
||||||
|
"""
|
||||||
if not self.task_service:
|
if not self.task_service:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"TaskService not available - connector sync requires task service dependency"
|
"TaskService not available - connector sync requires task service dependency"
|
||||||
|
|
@ -324,14 +325,53 @@ class ConnectorService:
|
||||||
owner_name = user.name if user else None
|
owner_name = user.name if user else None
|
||||||
owner_email = user.email if user else None
|
owner_email = user.email if user else None
|
||||||
|
|
||||||
|
# Temporarily set file_ids in the connector's config so list_files() can use them
|
||||||
|
# Store the original values to restore later
|
||||||
|
original_file_ids = None
|
||||||
|
original_folder_ids = None
|
||||||
|
|
||||||
|
if hasattr(connector, "cfg"):
|
||||||
|
original_file_ids = getattr(connector.cfg, "file_ids", None)
|
||||||
|
original_folder_ids = getattr(connector.cfg, "folder_ids", None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Set the file_ids we want to sync in the connector's config
|
||||||
|
if hasattr(connector, "cfg"):
|
||||||
|
connector.cfg.file_ids = file_ids # type: ignore
|
||||||
|
connector.cfg.folder_ids = None # type: ignore
|
||||||
|
|
||||||
|
# Get the expanded list of file IDs (folders will be expanded to their contents)
|
||||||
|
# This uses the connector's list_files() which calls _iter_selected_items()
|
||||||
|
result = await connector.list_files()
|
||||||
|
expanded_file_ids = [f["id"] for f in result.get("files", [])]
|
||||||
|
|
||||||
|
if not expanded_file_ids:
|
||||||
|
logger.warning(
|
||||||
|
f"No files found after expanding file_ids. "
|
||||||
|
f"Original IDs: {file_ids}. This may indicate all IDs were folders "
|
||||||
|
f"with no contents, or files that were filtered out."
|
||||||
|
)
|
||||||
|
# Return empty task rather than failing
|
||||||
|
raise ValueError("No files to sync after expanding folders")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to expand file_ids via list_files(): {e}")
|
||||||
|
# Fallback to original file_ids if expansion fails
|
||||||
|
expanded_file_ids = file_ids
|
||||||
|
finally:
|
||||||
|
# Restore original config values
|
||||||
|
if hasattr(connector, "cfg"):
|
||||||
|
connector.cfg.file_ids = original_file_ids # type: ignore
|
||||||
|
connector.cfg.folder_ids = original_folder_ids # type: ignore
|
||||||
|
|
||||||
# Create custom processor for specific connector files
|
# Create custom processor for specific connector files
|
||||||
from models.processors import ConnectorFileProcessor
|
from models.processors import ConnectorFileProcessor
|
||||||
|
|
||||||
# We'll pass file_ids as the files_info, the processor will handle ID-only files
|
# Use expanded_file_ids which has folders already expanded
|
||||||
processor = ConnectorFileProcessor(
|
processor = ConnectorFileProcessor(
|
||||||
self,
|
self,
|
||||||
connection_id,
|
connection_id,
|
||||||
file_ids,
|
expanded_file_ids,
|
||||||
user_id,
|
user_id,
|
||||||
jwt_token=jwt_token,
|
jwt_token=jwt_token,
|
||||||
owner_name=owner_name,
|
owner_name=owner_name,
|
||||||
|
|
@ -340,7 +380,7 @@ class ConnectorService:
|
||||||
|
|
||||||
# Create custom task using TaskService
|
# Create custom task using TaskService
|
||||||
task_id = await self.task_service.create_custom_task(
|
task_id = await self.task_service.create_custom_task(
|
||||||
user_id, file_ids, processor
|
user_id, expanded_file_ids, processor
|
||||||
)
|
)
|
||||||
|
|
||||||
return task_id
|
return task_id
|
||||||
|
|
|
||||||
16
src/main.py
16
src/main.py
|
|
@ -131,7 +131,7 @@ async def configure_alerting_security():
|
||||||
# Don't fail startup if alerting config fails
|
# Don't fail startup if alerting config fails
|
||||||
|
|
||||||
|
|
||||||
async def _ensure_opensearch_index(self):
|
async def _ensure_opensearch_index():
|
||||||
"""Ensure OpenSearch index exists when using traditional connector service."""
|
"""Ensure OpenSearch index exists when using traditional connector service."""
|
||||||
try:
|
try:
|
||||||
# Check if index already exists
|
# Check if index already exists
|
||||||
|
|
@ -242,6 +242,9 @@ def generate_jwt_keys():
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Set restrictive permissions on private key (readable by owner only)
|
||||||
|
os.chmod(private_key_path, 0o600)
|
||||||
|
|
||||||
# Generate public key
|
# Generate public key
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
[
|
||||||
|
|
@ -257,12 +260,21 @@ def generate_jwt_keys():
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Set permissions on public key (readable by all)
|
||||||
|
os.chmod(public_key_path, 0o644)
|
||||||
|
|
||||||
logger.info("Generated RSA keys for JWT signing")
|
logger.info("Generated RSA keys for JWT signing")
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
logger.error("Failed to generate RSA keys", error=str(e))
|
logger.error("Failed to generate RSA keys", error=str(e))
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
logger.info("RSA keys already exist, skipping generation")
|
# Ensure correct permissions on existing keys
|
||||||
|
try:
|
||||||
|
os.chmod(private_key_path, 0o600)
|
||||||
|
os.chmod(public_key_path, 0o644)
|
||||||
|
logger.info("RSA keys already exist, ensured correct permissions")
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning("Failed to set permissions on existing keys", error=str(e))
|
||||||
|
|
||||||
|
|
||||||
async def init_index_when_ready():
|
async def init_index_when_ready():
|
||||||
|
|
|
||||||
|
|
@ -296,11 +296,16 @@ class AuthService:
|
||||||
try:
|
try:
|
||||||
if self.langflow_mcp_service and isinstance(jwt_token, str) and jwt_token.strip():
|
if self.langflow_mcp_service and isinstance(jwt_token, str) and jwt_token.strip():
|
||||||
global_vars = {"JWT": jwt_token}
|
global_vars = {"JWT": jwt_token}
|
||||||
|
global_vars["CONNECTOR_TYPE_URL"] = "url"
|
||||||
if user_info:
|
if user_info:
|
||||||
if user_info.get("id"):
|
if user_info.get("id"):
|
||||||
global_vars["OWNER"] = user_info.get("id")
|
global_vars["OWNER"] = user_info.get("id")
|
||||||
if user_info.get("name"):
|
if user_info.get("name"):
|
||||||
global_vars["OWNER_NAME"] = user_info.get("name")
|
# OWNER_NAME may contain spaces, which can cause issues in headers.
|
||||||
|
# Alternative: URL-encode the owner name to preserve spaces and special characters.
|
||||||
|
owner_name = user_info.get("name")
|
||||||
|
if owner_name:
|
||||||
|
global_vars["OWNER_NAME"] = str(f"\"{owner_name}\"")
|
||||||
if user_info.get("email"):
|
if user_info.get("email"):
|
||||||
global_vars["OWNER_EMAIL"] = user_info.get("email")
|
global_vars["OWNER_EMAIL"] = user_info.get("email")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,11 @@ class DocumentService:
|
||||||
from utils.file_utils import auto_cleanup_tempfile
|
from utils.file_utils import auto_cleanup_tempfile
|
||||||
import os
|
import os
|
||||||
|
|
||||||
with auto_cleanup_tempfile() as tmp_path:
|
# Preserve file extension for docling format detection
|
||||||
|
filename = upload_file.filename or "uploaded"
|
||||||
|
suffix = os.path.splitext(filename)[1] or ""
|
||||||
|
|
||||||
|
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
|
||||||
# Stream upload file to temporary file
|
# Stream upload file to temporary file
|
||||||
file_size = 0
|
file_size = 0
|
||||||
with open(tmp_path, 'wb') as tmp_file:
|
with open(tmp_path, 'wb') as tmp_file:
|
||||||
|
|
|
||||||
|
|
@ -242,6 +242,35 @@ class ModelsService:
|
||||||
headers["Authorization"] = f"Bearer {api_key}"
|
headers["Authorization"] = f"Bearer {api_key}"
|
||||||
if project_id:
|
if project_id:
|
||||||
headers["Project-ID"] = project_id
|
headers["Project-ID"] = project_id
|
||||||
|
|
||||||
|
# Validate credentials with a minimal completion request
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
|
||||||
|
validation_params = {"version": "2024-09-16"}
|
||||||
|
validation_payload = {
|
||||||
|
"input": "test",
|
||||||
|
"model_id": "ibm/granite-3-2b-instruct",
|
||||||
|
"project_id": project_id,
|
||||||
|
"parameters": {
|
||||||
|
"max_new_tokens": 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
validation_response = await client.post(
|
||||||
|
validation_url,
|
||||||
|
headers=headers,
|
||||||
|
params=validation_params,
|
||||||
|
json=validation_payload,
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if validation_response.status_code != 200:
|
||||||
|
raise Exception(
|
||||||
|
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("IBM Watson credentials validated successfully")
|
||||||
|
|
||||||
# Fetch foundation models using the correct endpoint
|
# Fetch foundation models using the correct endpoint
|
||||||
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
|
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1 +1,8 @@
|
||||||
"""OpenRAG Terminal User Interface package."""
|
"""OpenRAG Terminal User Interface package."""
|
||||||
|
|
||||||
|
from importlib.metadata import version
|
||||||
|
|
||||||
|
try:
|
||||||
|
__version__ = version("openrag")
|
||||||
|
except Exception:
|
||||||
|
__version__ = "unknown"
|
||||||
|
|
|
||||||
|
|
@ -1,121 +0,0 @@
|
||||||
services:
|
|
||||||
opensearch:
|
|
||||||
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
|
|
||||||
#build:
|
|
||||||
# context: .
|
|
||||||
# dockerfile: Dockerfile
|
|
||||||
container_name: os
|
|
||||||
depends_on:
|
|
||||||
- openrag-backend
|
|
||||||
environment:
|
|
||||||
- discovery.type=single-node
|
|
||||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
|
|
||||||
# Run security setup in background after OpenSearch starts
|
|
||||||
command: >
|
|
||||||
bash -c "
|
|
||||||
# Start OpenSearch in background
|
|
||||||
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
|
||||||
|
|
||||||
# Wait a bit for OpenSearch to start, then apply security config
|
|
||||||
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
|
||||||
|
|
||||||
# Wait for background processes
|
|
||||||
wait
|
|
||||||
"
|
|
||||||
ports:
|
|
||||||
- "9200:9200"
|
|
||||||
- "9600:9600"
|
|
||||||
|
|
||||||
dashboards:
|
|
||||||
image: opensearchproject/opensearch-dashboards:3.0.0
|
|
||||||
container_name: osdash
|
|
||||||
depends_on:
|
|
||||||
- opensearch
|
|
||||||
environment:
|
|
||||||
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
|
|
||||||
OPENSEARCH_USERNAME: "admin"
|
|
||||||
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
|
|
||||||
ports:
|
|
||||||
- "5601:5601"
|
|
||||||
|
|
||||||
openrag-backend:
|
|
||||||
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
|
|
||||||
#build:
|
|
||||||
#context: .
|
|
||||||
#dockerfile: Dockerfile.backend
|
|
||||||
container_name: openrag-backend
|
|
||||||
depends_on:
|
|
||||||
- langflow
|
|
||||||
environment:
|
|
||||||
- OPENSEARCH_HOST=opensearch
|
|
||||||
- LANGFLOW_URL=http://langflow:7860
|
|
||||||
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
|
|
||||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
|
||||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
|
||||||
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
|
|
||||||
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
|
|
||||||
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
|
|
||||||
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
|
|
||||||
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
|
|
||||||
- OPENSEARCH_PORT=9200
|
|
||||||
- OPENSEARCH_USERNAME=admin
|
|
||||||
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
||||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
|
|
||||||
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
|
|
||||||
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
|
|
||||||
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
|
|
||||||
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
|
|
||||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
|
||||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
|
||||||
volumes:
|
|
||||||
- ./documents:/app/documents:Z
|
|
||||||
- ./keys:/app/keys:Z
|
|
||||||
- ./flows:/app/flows:Z
|
|
||||||
|
|
||||||
openrag-frontend:
|
|
||||||
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
|
|
||||||
#build:
|
|
||||||
#context: .
|
|
||||||
#dockerfile: Dockerfile.frontend
|
|
||||||
container_name: openrag-frontend
|
|
||||||
depends_on:
|
|
||||||
- openrag-backend
|
|
||||||
environment:
|
|
||||||
- OPENRAG_BACKEND_HOST=openrag-backend
|
|
||||||
ports:
|
|
||||||
- "3000:3000"
|
|
||||||
|
|
||||||
langflow:
|
|
||||||
volumes:
|
|
||||||
- ./flows:/app/flows:Z
|
|
||||||
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
|
|
||||||
container_name: langflow
|
|
||||||
ports:
|
|
||||||
- "7860:7860"
|
|
||||||
environment:
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
||||||
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
|
|
||||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
|
||||||
- JWT=None
|
|
||||||
- OWNER=None
|
|
||||||
- OWNER_NAME=None
|
|
||||||
- OWNER_EMAIL=None
|
|
||||||
- CONNECTOR_TYPE=system
|
|
||||||
- OPENRAG-QUERY-FILTER="{}"
|
|
||||||
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
|
||||||
- FILENAME=None
|
|
||||||
- MIMETYPE=None
|
|
||||||
- FILESIZE=0
|
|
||||||
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
|
|
||||||
- LANGFLOW_LOG_LEVEL=DEBUG
|
|
||||||
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
|
||||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
|
||||||
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
|
|
||||||
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
|
|
||||||
# - DEFAULT_FOLDER_NAME=OpenRAG
|
|
||||||
- HIDE_GETTING_STARTED_PROGRESS=true
|
|
||||||
1
src/tui/_assets/docker-compose-cpu.yml
Symbolic link
1
src/tui/_assets/docker-compose-cpu.yml
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../docker-compose-cpu.yml
|
||||||
|
|
@ -1,121 +0,0 @@
|
||||||
services:
|
|
||||||
opensearch:
|
|
||||||
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
|
|
||||||
#build:
|
|
||||||
#context: .
|
|
||||||
#dockerfile: Dockerfile
|
|
||||||
container_name: os
|
|
||||||
depends_on:
|
|
||||||
- openrag-backend
|
|
||||||
environment:
|
|
||||||
- discovery.type=single-node
|
|
||||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
|
|
||||||
# Run security setup in background after OpenSearch starts
|
|
||||||
command: >
|
|
||||||
bash -c "
|
|
||||||
# Start OpenSearch in background
|
|
||||||
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
|
|
||||||
|
|
||||||
# Wait a bit for OpenSearch to start, then apply security config
|
|
||||||
sleep 10 && /usr/share/opensearch/setup-security.sh &
|
|
||||||
|
|
||||||
# Wait for background processes
|
|
||||||
wait
|
|
||||||
"
|
|
||||||
ports:
|
|
||||||
- "9200:9200"
|
|
||||||
- "9600:9600"
|
|
||||||
|
|
||||||
dashboards:
|
|
||||||
image: opensearchproject/opensearch-dashboards:3.0.0
|
|
||||||
container_name: osdash
|
|
||||||
depends_on:
|
|
||||||
- opensearch
|
|
||||||
environment:
|
|
||||||
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
|
|
||||||
OPENSEARCH_USERNAME: "admin"
|
|
||||||
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
|
|
||||||
ports:
|
|
||||||
- "5601:5601"
|
|
||||||
|
|
||||||
openrag-backend:
|
|
||||||
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
|
|
||||||
#build:
|
|
||||||
#context: .
|
|
||||||
#dockerfile: Dockerfile.backend
|
|
||||||
container_name: openrag-backend
|
|
||||||
depends_on:
|
|
||||||
- langflow
|
|
||||||
environment:
|
|
||||||
- OPENSEARCH_HOST=opensearch
|
|
||||||
- LANGFLOW_URL=http://langflow:7860
|
|
||||||
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
|
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
|
||||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
|
||||||
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
|
|
||||||
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
|
|
||||||
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
|
|
||||||
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
|
|
||||||
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
|
|
||||||
- OPENSEARCH_PORT=9200
|
|
||||||
- OPENSEARCH_USERNAME=admin
|
|
||||||
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
||||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
|
|
||||||
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
|
|
||||||
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
|
|
||||||
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
|
|
||||||
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
|
|
||||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
|
||||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
|
||||||
volumes:
|
|
||||||
- ./documents:/app/documents:Z
|
|
||||||
- ./keys:/app/keys:Z
|
|
||||||
- ./flows:/app/flows:Z
|
|
||||||
gpus: all
|
|
||||||
|
|
||||||
openrag-frontend:
|
|
||||||
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
|
|
||||||
#build:
|
|
||||||
#context: .
|
|
||||||
#dockerfile: Dockerfile.frontend
|
|
||||||
container_name: openrag-frontend
|
|
||||||
depends_on:
|
|
||||||
- openrag-backend
|
|
||||||
environment:
|
|
||||||
- OPENRAG_BACKEND_HOST=openrag-backend
|
|
||||||
ports:
|
|
||||||
- "3000:3000"
|
|
||||||
|
|
||||||
langflow:
|
|
||||||
volumes:
|
|
||||||
- ./flows:/app/flows:Z
|
|
||||||
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
|
|
||||||
container_name: langflow
|
|
||||||
ports:
|
|
||||||
- "7860:7860"
|
|
||||||
environment:
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
||||||
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
|
|
||||||
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
|
|
||||||
- JWT=None
|
|
||||||
- OWNER=None
|
|
||||||
- OWNER_NAME=None
|
|
||||||
- OWNER_EMAIL=None
|
|
||||||
- CONNECTOR_TYPE=system
|
|
||||||
- OPENRAG-QUERY-FILTER="{}"
|
|
||||||
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
|
|
||||||
- FILENAME=None
|
|
||||||
- MIMETYPE=None
|
|
||||||
- FILESIZE=0
|
|
||||||
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
|
|
||||||
- LANGFLOW_LOG_LEVEL=DEBUG
|
|
||||||
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
|
|
||||||
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
|
|
||||||
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
|
|
||||||
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
|
|
||||||
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
|
|
||||||
# - DEFAULT_FOLDER_NAME="OpenRAG"
|
|
||||||
- HIDE_GETTING_STARTED_PROGRESS=true
|
|
||||||
1
src/tui/_assets/docker-compose.yml
Symbolic link
1
src/tui/_assets/docker-compose.yml
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../docker-compose.yml
|
||||||
Binary file not shown.
1
src/tui/_assets/documents/2506.08231v1.pdf
Symbolic link
1
src/tui/_assets/documents/2506.08231v1.pdf
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../documents/2506.08231v1.pdf
|
||||||
Binary file not shown.
1
src/tui/_assets/documents/ai-human-resources.pdf
Symbolic link
1
src/tui/_assets/documents/ai-human-resources.pdf
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../documents/ai-human-resources.pdf
|
||||||
Binary file not shown.
1
src/tui/_assets/documents/warmup_ocr.pdf
Symbolic link
1
src/tui/_assets/documents/warmup_ocr.pdf
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../documents/warmup_ocr.pdf
|
||||||
1
src/tui/_assets/flows/components/ollama_embedding.json
Symbolic link
1
src/tui/_assets/flows/components/ollama_embedding.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../../flows/components/ollama_embedding.json
|
||||||
1
src/tui/_assets/flows/components/ollama_llm.json
Symbolic link
1
src/tui/_assets/flows/components/ollama_llm.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../../flows/components/ollama_llm.json
|
||||||
1
src/tui/_assets/flows/components/ollama_llm_text.json
Symbolic link
1
src/tui/_assets/flows/components/ollama_llm_text.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../../flows/components/ollama_llm_text.json
|
||||||
1
src/tui/_assets/flows/components/watsonx_embedding.json
Symbolic link
1
src/tui/_assets/flows/components/watsonx_embedding.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../../flows/components/watsonx_embedding.json
|
||||||
1
src/tui/_assets/flows/components/watsonx_llm.json
Symbolic link
1
src/tui/_assets/flows/components/watsonx_llm.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../../flows/components/watsonx_llm.json
|
||||||
1
src/tui/_assets/flows/components/watsonx_llm_text.json
Symbolic link
1
src/tui/_assets/flows/components/watsonx_llm_text.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../../flows/components/watsonx_llm_text.json
|
||||||
1
src/tui/_assets/flows/ingestion_flow.json
Symbolic link
1
src/tui/_assets/flows/ingestion_flow.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../flows/ingestion_flow.json
|
||||||
1
src/tui/_assets/flows/openrag_agent.json
Symbolic link
1
src/tui/_assets/flows/openrag_agent.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../flows/openrag_agent.json
|
||||||
1
src/tui/_assets/flows/openrag_ingest_docling.json
Symbolic link
1
src/tui/_assets/flows/openrag_ingest_docling.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../flows/openrag_ingest_docling.json
|
||||||
1
src/tui/_assets/flows/openrag_nudges.json
Symbolic link
1
src/tui/_assets/flows/openrag_nudges.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../flows/openrag_nudges.json
|
||||||
1
src/tui/_assets/flows/openrag_url_mcp.json
Symbolic link
1
src/tui/_assets/flows/openrag_url_mcp.json
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../flows/openrag_url_mcp.json
|
||||||
105
src/tui/main.py
105
src/tui/main.py
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Iterable, Optional
|
||||||
from textual.app import App, ComposeResult
|
from textual.app import App, ComposeResult
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
try:
|
try:
|
||||||
|
|
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
|
||||||
return True, "Runtime requirements satisfied"
|
return True, "Runtime requirements satisfied"
|
||||||
|
|
||||||
|
|
||||||
def copy_sample_documents():
|
def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
|
||||||
|
"""Copy packaged assets into destination and optionally overwrite existing files.
|
||||||
|
|
||||||
|
When ``force`` is True, files are refreshed if the packaged bytes differ.
|
||||||
|
"""
|
||||||
|
destination.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for resource in resource_tree.iterdir():
|
||||||
|
target_path = destination / resource.name
|
||||||
|
|
||||||
|
if resource.is_dir():
|
||||||
|
_copy_assets(resource, target_path, allowed_suffixes, force=force)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
|
||||||
|
continue
|
||||||
|
resource_bytes = resource.read_bytes()
|
||||||
|
|
||||||
|
if target_path.exists():
|
||||||
|
if not force:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
if target_path.read_bytes() == resource_bytes:
|
||||||
|
continue
|
||||||
|
except Exception as read_error:
|
||||||
|
logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
|
||||||
|
|
||||||
|
target_path.write_bytes(resource_bytes)
|
||||||
|
logger.info(f"Copied bundled asset: {target_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def copy_sample_documents(*, force: bool = False) -> None:
|
||||||
"""Copy sample documents from package to current directory if they don't exist."""
|
"""Copy sample documents from package to current directory if they don't exist."""
|
||||||
documents_dir = Path("documents")
|
documents_dir = Path("documents")
|
||||||
|
|
||||||
# Check if documents directory already exists and has files
|
|
||||||
if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
|
|
||||||
return # Documents already exist, don't overwrite
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get sample documents from package assets
|
|
||||||
assets_files = files("tui._assets.documents")
|
assets_files = files("tui._assets.documents")
|
||||||
|
_copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
|
||||||
# Create documents directory if it doesn't exist
|
|
||||||
documents_dir.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
# Copy each sample document
|
|
||||||
for resource in assets_files.iterdir():
|
|
||||||
if resource.is_file() and resource.name.endswith('.pdf'):
|
|
||||||
dest_path = documents_dir / resource.name
|
|
||||||
if not dest_path.exists():
|
|
||||||
content = resource.read_bytes()
|
|
||||||
dest_path.write_bytes(content)
|
|
||||||
logger.info(f"Copied sample document: {resource.name}")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Could not copy sample documents: {e}")
|
logger.debug(f"Could not copy sample documents: {e}")
|
||||||
# This is not a critical error - the app can work without sample documents
|
# This is not a critical error - the app can work without sample documents
|
||||||
|
|
||||||
|
|
||||||
|
def copy_sample_flows(*, force: bool = False) -> None:
|
||||||
|
"""Copy sample flows from package to current directory if they don't exist."""
|
||||||
|
flows_dir = Path("flows")
|
||||||
|
|
||||||
|
try:
|
||||||
|
assets_files = files("tui._assets.flows")
|
||||||
|
_copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Could not copy sample flows: {e}")
|
||||||
|
# The app can proceed without bundled flows
|
||||||
|
|
||||||
|
|
||||||
|
def copy_compose_files(*, force: bool = False) -> None:
|
||||||
|
"""Copy docker-compose templates into the workspace if they are missing."""
|
||||||
|
try:
|
||||||
|
assets_root = files("tui._assets")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Could not access compose assets: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
|
||||||
|
destination = Path(filename)
|
||||||
|
if destination.exists() and not force:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
resource = assets_root.joinpath(filename)
|
||||||
|
if not resource.is_file():
|
||||||
|
logger.debug(f"Compose template not found in assets: {filename}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
resource_bytes = resource.read_bytes()
|
||||||
|
if destination.exists():
|
||||||
|
try:
|
||||||
|
if destination.read_bytes() == resource_bytes:
|
||||||
|
continue
|
||||||
|
except Exception as read_error:
|
||||||
|
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
|
||||||
|
|
||||||
|
destination.write_bytes(resource_bytes)
|
||||||
|
logger.info(f"Copied docker-compose template: {filename}")
|
||||||
|
except Exception as error:
|
||||||
|
logger.debug(f"Could not copy compose file {filename}: {error}")
|
||||||
|
|
||||||
|
|
||||||
def run_tui():
|
def run_tui():
|
||||||
"""Run the OpenRAG TUI application."""
|
"""Run the OpenRAG TUI application."""
|
||||||
app = None
|
app = None
|
||||||
try:
|
try:
|
||||||
# Copy sample documents on first run
|
# Keep bundled assets aligned with the packaged versions
|
||||||
copy_sample_documents()
|
copy_sample_documents(force=True)
|
||||||
|
copy_sample_flows(force=True)
|
||||||
|
copy_compose_files(force=True)
|
||||||
|
|
||||||
app = OpenRAGTUI()
|
app = OpenRAGTUI()
|
||||||
app.run()
|
app.run()
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ import threading
|
||||||
import time
|
import time
|
||||||
from typing import Optional, Tuple, Dict, Any, List, AsyncIterator
|
from typing import Optional, Tuple, Dict, Any, List, AsyncIterator
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
from utils.container_utils import guess_host_ip_for_containers
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
@ -32,7 +31,8 @@ class DoclingManager:
|
||||||
|
|
||||||
self._process: Optional[subprocess.Popen] = None
|
self._process: Optional[subprocess.Popen] = None
|
||||||
self._port = 5001
|
self._port = 5001
|
||||||
self._host = guess_host_ip_for_containers(logger=logger) # Get appropriate host IP based on runtime
|
# Bind to all interfaces by default (can be overridden with DOCLING_BIND_HOST env var)
|
||||||
|
self._host = os.getenv('DOCLING_BIND_HOST', '0.0.0.0')
|
||||||
self._running = False
|
self._running = False
|
||||||
self._external_process = False
|
self._external_process = False
|
||||||
|
|
||||||
|
|
@ -150,16 +150,20 @@ class DoclingManager:
|
||||||
else:
|
else:
|
||||||
pid = self._load_pid()
|
pid = self._load_pid()
|
||||||
|
|
||||||
|
# Use localhost for display URLs when bound to 0.0.0.0
|
||||||
|
display_host = "localhost" if self._host == "0.0.0.0" else self._host
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "running",
|
"status": "running",
|
||||||
"port": self._port,
|
"port": self._port,
|
||||||
"host": self._host,
|
"host": self._host,
|
||||||
"endpoint": f"http://{self._host}:{self._port}",
|
"endpoint": f"http://{display_host}:{self._port}",
|
||||||
"docs_url": f"http://{self._host}:{self._port}/docs",
|
"docs_url": f"http://{display_host}:{self._port}/docs",
|
||||||
"ui_url": f"http://{self._host}:{self._port}/ui",
|
"ui_url": f"http://{display_host}:{self._port}/ui",
|
||||||
"pid": pid
|
"pid": pid
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
|
display_host = "localhost" if self._host == "0.0.0.0" else self._host
|
||||||
return {
|
return {
|
||||||
"status": "stopped",
|
"status": "stopped",
|
||||||
"port": self._port,
|
"port": self._port,
|
||||||
|
|
@ -176,10 +180,9 @@ class DoclingManager:
|
||||||
return False, "Docling serve is already running"
|
return False, "Docling serve is already running"
|
||||||
|
|
||||||
self._port = port
|
self._port = port
|
||||||
# Use provided host or the bridge IP we detected in __init__
|
# Use provided host or keep default from __init__
|
||||||
if host is not None:
|
if host is not None:
|
||||||
self._host = host
|
self._host = host
|
||||||
# else: keep self._host as already set in __init__
|
|
||||||
|
|
||||||
# Check if port is already in use before trying to start
|
# Check if port is already in use before trying to start
|
||||||
import socket
|
import socket
|
||||||
|
|
@ -293,7 +296,8 @@ class DoclingManager:
|
||||||
self._running = False
|
self._running = False
|
||||||
return False, f"Docling serve process exited immediately (code: {return_code})"
|
return False, f"Docling serve process exited immediately (code: {return_code})"
|
||||||
|
|
||||||
return True, f"Docling serve starting on http://{host}:{port}"
|
display_host = "localhost" if self._host == "0.0.0.0" else self._host
|
||||||
|
return True, f"Docling serve starting on http://{display_host}:{port}"
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return False, "docling-serve not available. Please install: uv add docling-serve"
|
return False, "docling-serve not available. Please install: uv add docling-serve"
|
||||||
|
|
@ -454,7 +458,8 @@ class DoclingManager:
|
||||||
async def follow_logs(self) -> AsyncIterator[str]:
|
async def follow_logs(self) -> AsyncIterator[str]:
|
||||||
"""Follow logs from the docling-serve process in real-time."""
|
"""Follow logs from the docling-serve process in real-time."""
|
||||||
# First yield status message and any existing logs
|
# First yield status message and any existing logs
|
||||||
status_msg = f"Docling serve is running on http://{self._host}:{self._port}"
|
display_host = "localhost" if self._host == "0.0.0.0" else self._host
|
||||||
|
status_msg = f"Docling serve is running on http://{display_host}:{self._port}"
|
||||||
|
|
||||||
with self._log_lock:
|
with self._log_lock:
|
||||||
if self._log_buffer:
|
if self._log_buffer:
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from rich.text import Text
|
||||||
from rich.align import Align
|
from rich.align import Align
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from .. import __version__
|
||||||
from ..managers.container_manager import ContainerManager, ServiceStatus
|
from ..managers.container_manager import ContainerManager, ServiceStatus
|
||||||
from ..managers.env_manager import EnvManager
|
from ..managers.env_manager import EnvManager
|
||||||
from ..managers.docling_manager import DoclingManager
|
from ..managers.docling_manager import DoclingManager
|
||||||
|
|
@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
|
||||||
╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═══╝╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝
|
╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═══╝╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝
|
||||||
"""
|
"""
|
||||||
welcome_text.append(ascii_art, style="bold white")
|
welcome_text.append(ascii_art, style="bold white")
|
||||||
welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
|
welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
|
||||||
|
welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
|
||||||
|
|
||||||
# Check if all services are running
|
# Check if all services are running
|
||||||
all_services_running = self.services_running and self.docling_running
|
all_services_running = self.services_running and self.docling_running
|
||||||
|
|
|
||||||
|
|
@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
log = logger or logging.getLogger(__name__)
|
log = logger or logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def can_bind_to_address(ip_addr: str) -> bool:
|
||||||
|
"""Test if we can bind to the given IP address."""
|
||||||
|
try:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
|
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
|
||||||
|
return True
|
||||||
|
except (OSError, socket.error) as e:
|
||||||
|
log.debug("Cannot bind to %s: %s", ip_addr, e)
|
||||||
|
return False
|
||||||
|
|
||||||
def run(cmd, timeout=2, text=True):
|
def run(cmd, timeout=2, text=True):
|
||||||
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
|
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
|
||||||
|
|
||||||
|
|
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
|
||||||
"Container-reachable host IP candidates: %s",
|
"Container-reachable host IP candidates: %s",
|
||||||
", ".join(ordered_candidates),
|
", ".join(ordered_candidates),
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
log.info("Container-reachable host IP: %s", ordered_candidates[0])
|
|
||||||
|
|
||||||
return ordered_candidates[0]
|
# Try each candidate and return the first one we can bind to
|
||||||
|
for ip_addr in ordered_candidates:
|
||||||
|
if can_bind_to_address(ip_addr):
|
||||||
|
if len(ordered_candidates) > 1:
|
||||||
|
log.info("Selected bindable host IP: %s", ip_addr)
|
||||||
|
else:
|
||||||
|
log.info("Container-reachable host IP: %s", ip_addr)
|
||||||
|
return ip_addr
|
||||||
|
log.debug("Skipping %s (cannot bind)", ip_addr)
|
||||||
|
|
||||||
|
# None of the candidates were bindable, fall back to 127.0.0.1
|
||||||
|
log.warning(
|
||||||
|
"None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
|
||||||
|
", ".join(ordered_candidates),
|
||||||
|
)
|
||||||
|
return "127.0.0.1"
|
||||||
|
|
||||||
log.warning(
|
log.warning(
|
||||||
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."
|
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."
|
||||||
|
|
|
||||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
# Test package
|
||||||
85
tests/conftest.py
Normal file
85
tests/conftest.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Force no-auth mode for testing by setting OAuth credentials to empty strings
|
||||||
|
# This ensures anonymous JWT tokens are created automatically
|
||||||
|
os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
|
||||||
|
os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
|
||||||
|
|
||||||
|
from src.config.settings import clients
|
||||||
|
from src.session_manager import SessionManager
|
||||||
|
from src.main import generate_jwt_keys
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def event_loop():
|
||||||
|
"""Create an instance of the default event loop for the test session."""
|
||||||
|
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||||
|
yield loop
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def opensearch_client():
|
||||||
|
"""OpenSearch client for testing - requires running OpenSearch."""
|
||||||
|
await clients.initialize()
|
||||||
|
yield clients.opensearch
|
||||||
|
# Cleanup test indices after tests
|
||||||
|
try:
|
||||||
|
await clients.opensearch.indices.delete(index="test_documents")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def session_manager():
|
||||||
|
"""Session manager for testing."""
|
||||||
|
# Generate RSA keys before creating SessionManager
|
||||||
|
generate_jwt_keys()
|
||||||
|
sm = SessionManager("test-secret-key")
|
||||||
|
print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
|
||||||
|
return sm
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_documents_dir():
|
||||||
|
"""Create a temporary directory with test documents."""
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
test_dir = Path(temp_dir)
|
||||||
|
|
||||||
|
# Create some test files in supported formats
|
||||||
|
(test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
|
||||||
|
(test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
|
||||||
|
(test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
|
||||||
|
|
||||||
|
# Create subdirectory with files
|
||||||
|
sub_dir = test_dir / "subdir"
|
||||||
|
sub_dir.mkdir()
|
||||||
|
(sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
|
||||||
|
|
||||||
|
yield test_dir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_single_file():
|
||||||
|
"""Create a single test file."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
|
||||||
|
f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
|
||||||
|
temp_path = f.name
|
||||||
|
|
||||||
|
yield temp_path
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
try:
|
||||||
|
os.unlink(temp_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
1
tests/integration/__init__.py
Normal file
1
tests/integration/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
# Integration tests package
|
||||||
296
tests/integration/test_api_endpoints.py
Normal file
296
tests/integration/test_api_endpoints.py
Normal file
|
|
@ -0,0 +1,296 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
|
||||||
|
"""Poll existing endpoints until the app and OpenSearch are ready.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
- GET /auth/me should return 200 immediately (confirms app is up).
|
||||||
|
- POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
|
||||||
|
"""
|
||||||
|
# First test OpenSearch JWT directly
|
||||||
|
from src.session_manager import SessionManager, AnonymousUser
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
import jwt as jwt_lib
|
||||||
|
sm = SessionManager("test")
|
||||||
|
test_token = sm.create_jwt_token(AnonymousUser())
|
||||||
|
token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
|
||||||
|
print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
|
||||||
|
print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
|
||||||
|
with open(sm.public_key_path, 'rb') as f:
|
||||||
|
pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
|
||||||
|
print(f"[DEBUG] Public key hash: {pub_key_hash}")
|
||||||
|
# Decode token to see claims
|
||||||
|
decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
|
||||||
|
print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
|
||||||
|
|
||||||
|
# Test OpenSearch JWT auth directly
|
||||||
|
opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
|
||||||
|
print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
|
||||||
|
async with httpx.AsyncClient(verify=False) as os_client:
|
||||||
|
r_os = await os_client.post(
|
||||||
|
f"{opensearch_url}/documents/_search",
|
||||||
|
headers={"Authorization": f"Bearer {test_token}"},
|
||||||
|
json={"query": {"match_all": {}}, "size": 0}
|
||||||
|
)
|
||||||
|
print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
|
||||||
|
if r_os.status_code == 401:
|
||||||
|
print(f"[DEBUG] ❌ OpenSearch rejected JWT! OIDC config not working.")
|
||||||
|
else:
|
||||||
|
print(f"[DEBUG] ✓ OpenSearch accepted JWT!")
|
||||||
|
|
||||||
|
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||||
|
last_err = None
|
||||||
|
while asyncio.get_event_loop().time() < deadline:
|
||||||
|
try:
|
||||||
|
r1 = await client.get("/auth/me")
|
||||||
|
print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
|
||||||
|
if r1.status_code in (401, 403):
|
||||||
|
raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
|
||||||
|
if r1.status_code != 200:
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
continue
|
||||||
|
# match_all readiness probe; no embeddings
|
||||||
|
r2 = await client.post("/search", json={"query": "*", "limit": 0})
|
||||||
|
print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
|
||||||
|
if r2.status_code in (401, 403):
|
||||||
|
print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
|
||||||
|
raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
|
||||||
|
if r2.status_code == 200:
|
||||||
|
print("[DEBUG] Service ready!")
|
||||||
|
return
|
||||||
|
last_err = r2.text
|
||||||
|
except AssertionError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
last_err = str(e)
|
||||||
|
print(f"[DEBUG] Exception during readiness check: {e}")
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
raise AssertionError(f"Service not ready in time: {last_err}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
|
||||||
|
"""Boot the ASGI app and exercise /upload and /search endpoints."""
|
||||||
|
# Ensure we route uploads to traditional processor and disable startup ingest
|
||||||
|
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
|
||||||
|
os.environ["DISABLE_STARTUP_INGEST"] = "true"
|
||||||
|
# Force no-auth mode so endpoints bypass authentication
|
||||||
|
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
|
||||||
|
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
|
||||||
|
|
||||||
|
# Import after env vars to ensure settings pick them up. Clear cached modules
|
||||||
|
import sys
|
||||||
|
# Clear cached modules so settings pick up env and router sees new flag
|
||||||
|
for mod in [
|
||||||
|
"src.api.router",
|
||||||
|
"api.router", # Also clear the non-src path
|
||||||
|
"src.api.connector_router",
|
||||||
|
"api.connector_router",
|
||||||
|
"src.config.settings",
|
||||||
|
"config.settings",
|
||||||
|
"src.auth_middleware",
|
||||||
|
"auth_middleware",
|
||||||
|
"src.main",
|
||||||
|
"api", # Clear the api package itself
|
||||||
|
"src.api",
|
||||||
|
"services", # Clear services that import clients
|
||||||
|
"src.services",
|
||||||
|
"services.search_service",
|
||||||
|
"src.services.search_service",
|
||||||
|
]:
|
||||||
|
sys.modules.pop(mod, None)
|
||||||
|
from src.main import create_app, startup_tasks
|
||||||
|
import src.api.router as upload_router
|
||||||
|
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
|
||||||
|
|
||||||
|
# Ensure a clean index before startup
|
||||||
|
await clients.initialize()
|
||||||
|
try:
|
||||||
|
await clients.opensearch.indices.delete(index=INDEX_NAME)
|
||||||
|
# Wait for deletion to complete
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
app = await create_app()
|
||||||
|
# Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
|
||||||
|
await startup_tasks(app.state.services)
|
||||||
|
|
||||||
|
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
|
||||||
|
from src.main import _ensure_opensearch_index
|
||||||
|
await _ensure_opensearch_index()
|
||||||
|
|
||||||
|
# Verify index is truly empty after startup
|
||||||
|
try:
|
||||||
|
count_response = await clients.opensearch.count(index=INDEX_NAME)
|
||||||
|
doc_count = count_response.get('count', 0)
|
||||||
|
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
|
||||||
|
except Exception as e:
|
||||||
|
# If count fails, the index might not exist yet, which is fine
|
||||||
|
pass
|
||||||
|
|
||||||
|
transport = httpx.ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
|
||||||
|
# Wait for app + OpenSearch readiness using existing endpoints
|
||||||
|
await wait_for_service_ready(client)
|
||||||
|
|
||||||
|
# Create a temporary markdown file to upload
|
||||||
|
file_path = tmp_path / "endpoint_test_doc.md"
|
||||||
|
file_text = (
|
||||||
|
"# Single Test Document\n\n"
|
||||||
|
"This is a test document about OpenRAG testing framework. "
|
||||||
|
"The content should be indexed and searchable in OpenSearch after processing."
|
||||||
|
)
|
||||||
|
file_path.write_text(file_text)
|
||||||
|
|
||||||
|
# POST via router (multipart)
|
||||||
|
files = {
|
||||||
|
"file": (
|
||||||
|
file_path.name,
|
||||||
|
file_path.read_bytes(),
|
||||||
|
"text/markdown",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
upload_resp = await client.post("/upload", files=files)
|
||||||
|
body = upload_resp.json()
|
||||||
|
assert upload_resp.status_code == 201, upload_resp.text
|
||||||
|
assert body.get("status") in {"indexed", "unchanged"}
|
||||||
|
assert isinstance(body.get("id"), str)
|
||||||
|
|
||||||
|
# Poll search for the specific content until it's indexed
|
||||||
|
async def _wait_for_indexed(timeout_s: float = 30.0):
|
||||||
|
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||||
|
while asyncio.get_event_loop().time() < deadline:
|
||||||
|
resp = await client.post(
|
||||||
|
"/search",
|
||||||
|
json={"query": "OpenRAG testing framework", "limit": 5},
|
||||||
|
)
|
||||||
|
if resp.status_code == 200 and resp.json().get("results"):
|
||||||
|
return resp
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
search_resp = await _wait_for_indexed()
|
||||||
|
|
||||||
|
# POST /search
|
||||||
|
assert search_resp.status_code == 200, search_resp.text
|
||||||
|
search_body = search_resp.json()
|
||||||
|
|
||||||
|
# Basic shape and at least one hit
|
||||||
|
assert isinstance(search_body.get("results"), list)
|
||||||
|
assert len(search_body["results"]) >= 0
|
||||||
|
# When hits exist, confirm our phrase is present in top result content
|
||||||
|
if search_body["results"]:
|
||||||
|
top = search_body["results"][0]
|
||||||
|
assert "text" in top or "content" in top
|
||||||
|
text = top.get("text") or top.get("content")
|
||||||
|
assert isinstance(text, str)
|
||||||
|
assert "testing" in text.lower()
|
||||||
|
finally:
|
||||||
|
# Explicitly close global clients to avoid aiohttp warnings
|
||||||
|
from src.config.settings import clients
|
||||||
|
try:
|
||||||
|
await clients.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
|
||||||
|
"""Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
|
||||||
|
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
|
||||||
|
os.environ["DISABLE_STARTUP_INGEST"] = "true"
|
||||||
|
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
|
||||||
|
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
for mod in [
|
||||||
|
"src.api.router",
|
||||||
|
"api.router", # Also clear the non-src path
|
||||||
|
"src.api.connector_router",
|
||||||
|
"api.connector_router",
|
||||||
|
"src.config.settings",
|
||||||
|
"config.settings",
|
||||||
|
"src.auth_middleware",
|
||||||
|
"auth_middleware",
|
||||||
|
"src.main",
|
||||||
|
"api", # Clear the api package itself
|
||||||
|
"src.api",
|
||||||
|
"services", # Clear services that import clients
|
||||||
|
"src.services",
|
||||||
|
"services.search_service",
|
||||||
|
"src.services.search_service",
|
||||||
|
]:
|
||||||
|
sys.modules.pop(mod, None)
|
||||||
|
from src.main import create_app, startup_tasks
|
||||||
|
import src.api.router as upload_router
|
||||||
|
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
|
||||||
|
|
||||||
|
# Ensure a clean index before startup
|
||||||
|
await clients.initialize()
|
||||||
|
try:
|
||||||
|
await clients.opensearch.indices.delete(index=INDEX_NAME)
|
||||||
|
# Wait for deletion to complete
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
app = await create_app()
|
||||||
|
await startup_tasks(app.state.services)
|
||||||
|
|
||||||
|
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
|
||||||
|
from src.main import _ensure_opensearch_index
|
||||||
|
await _ensure_opensearch_index()
|
||||||
|
|
||||||
|
# Verify index is truly empty after startup
|
||||||
|
try:
|
||||||
|
count_response = await clients.opensearch.count(index=INDEX_NAME)
|
||||||
|
doc_count = count_response.get('count', 0)
|
||||||
|
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
|
||||||
|
except Exception as e:
|
||||||
|
# If count fails, the index might not exist yet, which is fine
|
||||||
|
pass
|
||||||
|
transport = httpx.ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
|
||||||
|
await wait_for_service_ready(client)
|
||||||
|
|
||||||
|
file_path = tmp_path / "router_test_doc.md"
|
||||||
|
file_path.write_text("# Router Test\n\nThis file validates the upload router.")
|
||||||
|
|
||||||
|
files = {
|
||||||
|
"file": (
|
||||||
|
file_path.name,
|
||||||
|
file_path.read_bytes(),
|
||||||
|
"text/markdown",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = await client.post("/router/upload_ingest", files=files)
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
print(f"data: {data}")
|
||||||
|
if disable_langflow_ingest:
|
||||||
|
assert resp.status_code == 201 or resp.status_code == 202, resp.text
|
||||||
|
assert data.get("status") in {"indexed", "unchanged"}
|
||||||
|
assert isinstance(data.get("id"), str)
|
||||||
|
else:
|
||||||
|
assert resp.status_code == 201 or resp.status_code == 202, resp.text
|
||||||
|
assert isinstance(data.get("task_id"), str)
|
||||||
|
assert data.get("file_count") == 1
|
||||||
|
finally:
|
||||||
|
from src.config.settings import clients
|
||||||
|
try:
|
||||||
|
await clients.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
118
tests/integration/test_startup_ingest.py
Normal file
118
tests/integration/test_startup_ingest.py
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
|
||||||
|
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||||
|
last_err = None
|
||||||
|
while asyncio.get_event_loop().time() < deadline:
|
||||||
|
try:
|
||||||
|
r1 = await client.get("/auth/me")
|
||||||
|
if r1.status_code != 200:
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
continue
|
||||||
|
r2 = await client.post("/search", json={"query": "*", "limit": 0})
|
||||||
|
if r2.status_code == 200:
|
||||||
|
return
|
||||||
|
last_err = r2.text
|
||||||
|
except Exception as e:
|
||||||
|
last_err = str(e)
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
raise AssertionError(f"Service not ready in time: {last_err}")
|
||||||
|
|
||||||
|
|
||||||
|
def count_files_in_documents() -> int:
|
||||||
|
base_dir = Path(os.getcwd()) / "documents"
|
||||||
|
if not base_dir.is_dir():
|
||||||
|
return 0
|
||||||
|
return sum(1 for _ in base_dir.rglob("*") if _.is_file())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
|
||||||
|
# Ensure startup ingest runs and choose pipeline per param
|
||||||
|
os.environ["DISABLE_STARTUP_INGEST"] = "false"
|
||||||
|
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
|
||||||
|
"true" if disable_langflow_ingest else "false"
|
||||||
|
)
|
||||||
|
# Force no-auth mode for simpler endpoint access
|
||||||
|
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
|
||||||
|
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
|
||||||
|
|
||||||
|
# Reload settings to pick up env for this test run
|
||||||
|
import sys
|
||||||
|
|
||||||
|
for mod in [
|
||||||
|
"src.api.router",
|
||||||
|
"src.api.connector_router",
|
||||||
|
"src.config.settings",
|
||||||
|
"src.auth_middleware",
|
||||||
|
"src.main",
|
||||||
|
]:
|
||||||
|
sys.modules.pop(mod, None)
|
||||||
|
|
||||||
|
from src.main import create_app, startup_tasks
|
||||||
|
from src.config.settings import clients, INDEX_NAME
|
||||||
|
|
||||||
|
# Ensure a clean index before startup
|
||||||
|
await clients.initialize()
|
||||||
|
try:
|
||||||
|
await clients.opensearch.indices.delete(index=INDEX_NAME)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
app = await create_app()
|
||||||
|
# Trigger startup tasks explicitly
|
||||||
|
await startup_tasks(app.state.services)
|
||||||
|
|
||||||
|
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
|
||||||
|
from src.main import _ensure_opensearch_index
|
||||||
|
await _ensure_opensearch_index()
|
||||||
|
|
||||||
|
transport = httpx.ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
|
||||||
|
await wait_for_ready(client)
|
||||||
|
|
||||||
|
expected_files = count_files_in_documents()
|
||||||
|
|
||||||
|
# Poll /tasks until we see at least one startup ingest task
|
||||||
|
async def _wait_for_task(timeout_s: float = 60.0):
|
||||||
|
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||||
|
last = None
|
||||||
|
while asyncio.get_event_loop().time() < deadline:
|
||||||
|
resp = await client.get("/tasks")
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
last = data
|
||||||
|
tasks = data.get("tasks") if isinstance(data, dict) else None
|
||||||
|
if isinstance(tasks, list) and len(tasks) > 0:
|
||||||
|
return tasks
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
return last.get("tasks") if isinstance(last, dict) else last
|
||||||
|
|
||||||
|
tasks = await _wait_for_task()
|
||||||
|
if expected_files == 0:
|
||||||
|
return # Nothing to do
|
||||||
|
if not (isinstance(tasks, list) and len(tasks) > 0):
|
||||||
|
# Fallback: verify that documents were indexed as a sign of startup ingest
|
||||||
|
sr = await client.post("/search", json={"query": "*", "limit": 1})
|
||||||
|
assert sr.status_code == 200, sr.text
|
||||||
|
total = sr.json().get("total")
|
||||||
|
assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
|
||||||
|
return
|
||||||
|
newest = tasks[0]
|
||||||
|
assert "task_id" in newest
|
||||||
|
assert newest.get("total_files") == expected_files
|
||||||
|
finally:
|
||||||
|
# Explicitly close global clients to avoid aiohttp warnings
|
||||||
|
from src.config.settings import clients
|
||||||
|
try:
|
||||||
|
await clients.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
162
uv.lock
generated
162
uv.lock
generated
|
|
@ -2,10 +2,10 @@ version = 1
|
||||||
revision = 2
|
revision = 2
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"sys_platform == 'darwin'",
|
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
|
"sys_platform == 'darwin'",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -291,8 +291,8 @@ name = "click"
|
||||||
version = "8.2.1"
|
version = "8.2.1"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|
@ -312,6 +312,67 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "coverage"
|
||||||
|
version = "7.10.7"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cramjam"
|
name = "cramjam"
|
||||||
version = "2.11.0"
|
version = "2.11.0"
|
||||||
|
|
@ -454,8 +515,8 @@ name = "dill"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
|
||||||
|
|
@ -619,8 +680,8 @@ name = "docling-mcp"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|
@ -943,8 +1004,8 @@ name = "fsspec"
|
||||||
version = "2025.5.1"
|
version = "2025.5.1"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
|
||||||
|
|
@ -1264,8 +1325,8 @@ name = "huggingface-hub"
|
||||||
version = "0.33.2"
|
version = "0.33.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|
@ -1339,6 +1400,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
|
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iniconfig"
|
||||||
|
version = "2.1.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jinja2"
|
name = "jinja2"
|
||||||
version = "3.1.6"
|
version = "3.1.6"
|
||||||
|
|
@ -1960,8 +2030,8 @@ name = "multiprocess"
|
||||||
version = "0.70.18"
|
version = "0.70.18"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
|
||||||
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
"platform_machine == 'x86_64' and sys_platform == 'linux'",
|
||||||
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|
@ -2282,7 +2352,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openrag"
|
name = "openrag"
|
||||||
version = "0.1.14.dev3"
|
version = "0.1.19"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "agentd" },
|
{ name = "agentd" },
|
||||||
|
|
@ -2312,6 +2382,14 @@ dependencies = [
|
||||||
{ name = "uvicorn" },
|
{ name = "uvicorn" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[package.dev-dependencies]
|
||||||
|
dev = [
|
||||||
|
{ name = "pytest" },
|
||||||
|
{ name = "pytest-asyncio" },
|
||||||
|
{ name = "pytest-cov" },
|
||||||
|
{ name = "pytest-mock" },
|
||||||
|
]
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "agentd", specifier = ">=0.2.2" },
|
{ name = "agentd", specifier = ">=0.2.2" },
|
||||||
|
|
@ -2341,6 +2419,14 @@ requires-dist = [
|
||||||
{ name = "uvicorn", specifier = ">=0.35.0" },
|
{ name = "uvicorn", specifier = ">=0.35.0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[package.metadata.requires-dev]
|
||||||
|
dev = [
|
||||||
|
{ name = "pytest", specifier = ">=8" },
|
||||||
|
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
|
||||||
|
{ name = "pytest-cov", specifier = ">=4.0.0" },
|
||||||
|
{ name = "pytest-mock", specifier = ">=3.12.0" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "opensearch-py"
|
name = "opensearch-py"
|
||||||
version = "3.0.0"
|
version = "3.0.0"
|
||||||
|
|
@ -2836,6 +2922,60 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
|
{ url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest"
|
||||||
|
version = "8.4.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||||
|
{ name = "iniconfig" },
|
||||||
|
{ name = "packaging" },
|
||||||
|
{ name = "pluggy" },
|
||||||
|
{ name = "pygments" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest-asyncio"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "pytest" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest-cov"
|
||||||
|
version = "7.0.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "coverage" },
|
||||||
|
{ name = "pluggy" },
|
||||||
|
{ name = "pytest" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest-mock"
|
||||||
|
version = "3.15.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "pytest" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-bidi"
|
name = "python-bidi"
|
||||||
version = "0.6.6"
|
version = "0.6.6"
|
||||||
|
|
@ -3622,9 +3762,9 @@ name = "torch"
|
||||||
version = "2.8.0"
|
version = "2.8.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"sys_platform == 'darwin'",
|
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
|
"sys_platform == 'darwin'",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
|
{ name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
|
||||||
|
|
@ -3669,9 +3809,9 @@ name = "torchvision"
|
||||||
version = "0.23.0"
|
version = "0.23.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"sys_platform == 'darwin'",
|
|
||||||
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
"platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
|
"sys_platform == 'darwin'",
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
|
{ name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue