Merge branch 'main' into docs-remove-sync

This commit is contained in:
Mendon Kissling 2025-10-08 12:20:14 -04:00 committed by GitHub
commit 922c7b7e48
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 1293 additions and 521 deletions

View file

@ -1,59 +0,0 @@
name: Build Langflow Responses Multi-Arch
on:
workflow_dispatch:
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
arch: amd64
runs-on: ubuntu-latest
- platform: linux/arm64
arch: arm64
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
runs-on: ${{ matrix.runs-on }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push langflow (${{ matrix.arch }})
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.langflow
platforms: ${{ matrix.platform }}
push: true
tags: phact/langflow:responses-${{ matrix.arch }}
cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
manifest:
needs: build
runs-on: ubuntu-latest
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Create and push multi-arch manifest
run: |
docker buildx imagetools create -t phact/langflow:responses \
phact/langflow:responses-amd64 \
phact/langflow:responses-arm64

View file

@ -1,16 +1,95 @@
name: Build Multi-Architecture Docker Images
name: Release + Docker Images (multi-arch)
on:
push:
branches:
- main
paths:
- 'pyproject.toml'
workflow_dispatch:
inputs:
update_latest:
description: 'Update latest tags (production release)'
required: false
default: false
type: boolean
jobs:
build-python-packages:
runs-on: ubuntu-latest
outputs:
skip_release: ${{ steps.version.outputs.skip_release }}
version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
# Check if tag already exists
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping release"
echo "skip_release=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "skip_release=false" >> $GITHUB_OUTPUT
# Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "is_prerelease=false" >> $GITHUB_OUTPUT
echo "Release type: Production"
else
echo "is_prerelease=true" >> $GITHUB_OUTPUT
echo "Release type: Prerelease"
fi
- name: Build wheel and source distribution
if: steps.version.outputs.skip_release != 'true'
run: |
uv build
- name: List built artifacts
if: steps.version.outputs.skip_release != 'true'
run: |
ls -la dist/
echo "Built artifacts:"
for file in dist/*; do
echo " - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
done
- name: Upload build artifacts
if: steps.version.outputs.skip_release != 'true'
uses: actions/upload-artifact@v4
with:
name: python-packages
path: dist/
retention-days: 30
- name: Create Release
if: steps.version.outputs.skip_release != 'true'
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.version.outputs.version }}
name: Release ${{ steps.version.outputs.version }}
draft: false
prerelease: ${{ steps.version.outputs.is_prerelease }}
generate_release_notes: true
files: |
dist/*.whl
dist/*.tar.gz
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build:
needs: build-python-packages
if: needs.build-python-packages.outputs.skip_release != 'true'
strategy:
fail-fast: false
matrix:
@ -106,9 +185,9 @@ jobs:
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
manifest:
needs: build
needs: [build, build-python-packages]
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
steps:
- name: Checkout
uses: actions/checkout@v4
@ -146,8 +225,8 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
# Only update latest tags if version is numeric AND checkbox is checked
if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
# Only update latest tags if version is numeric
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "Updating latest tags for production release: $VERSION"
docker buildx imagetools create -t phact/openrag-backend:latest \
phact/openrag-backend:$VERSION-amd64 \
@ -165,5 +244,5 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
else
echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
echo "Skipping latest tags - version: $VERSION (not numeric)"
fi

54
.github/workflows/test-integration.yml vendored Normal file
View file

@ -0,0 +1,54 @@
name: Integration Tests
on:
pull_request:
push:
branches:
- main
jobs:
tests:
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb]
env:
# Prefer repository/environment variable first, then secret, then a sane fallback
OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
steps:
- run: df -h
#- name: "node-cleanup"
#run: |
# sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
# sudo docker image prune --all --force
# sudo docker builder prune -a
- run: df -h
- name: Checkout
uses: actions/checkout@v4
- name: Set up UV
uses: astral-sh/setup-uv@v3
with:
version: latest
- name: Python version
run: uv python install 3.13
- name: Install dependencies
run: uv sync
- name: Run integration tests
env:
OPENSEARCH_HOST: localhost
OPENSEARCH_PORT: 9200
OPENSEARCH_USERNAME: admin
OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }}
LOG_LEVEL: DEBUG
# Force no-auth mode so tests bypass OAuth
GOOGLE_OAUTH_CLIENT_ID: ""
GOOGLE_OAUTH_CLIENT_SECRET: ""
# Disable startup ingest noise unless a test enables it
DISABLE_STARTUP_INGEST: "true"
run: |
make test-ci
echo "Keys directory after tests:"
ls -la keys/ || echo "No keys directory"

2
.gitignore vendored
View file

@ -18,6 +18,8 @@ wheels/
1001*.pdf
*.json
!flows/*.json
!src/tui/_assets/flows/*.json
!src/tui/_assets/flows/components/*.json
.DS_Store
config/

1
MANIFEST.in Normal file
View file

@ -0,0 +1 @@
recursive-include src/tui/_assets *

149
Makefile
View file

@ -1,7 +1,17 @@
# OpenRAG Development Makefile
# Provides easy commands for development workflow
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
# Load variables from .env if present so `make` commands pick them up
ifneq (,$(wildcard .env))
include .env
# Export all simple KEY=VALUE pairs to the environment for child processes
export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env)
endif
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \
test test-integration test-ci \
backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \
shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
# Default target
help:
@ -32,14 +42,16 @@ help:
@echo " shell-lf - Shell into langflow container"
@echo ""
@echo "Testing:"
@echo " test - Run backend tests"
@echo " test - Run all backend tests"
@echo " test-integration - Run integration tests (requires infra)"
@echo " test-ci - Start infra, run integration tests, tear down"
@echo " lint - Run linting checks"
@echo ""
# Development environments
dev:
@echo "🚀 Starting OpenRAG with GPU support..."
docker-compose up -d
docker compose up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -49,7 +61,7 @@ dev:
dev-cpu:
@echo "🚀 Starting OpenRAG with CPU only..."
docker-compose -f docker-compose-cpu.yml up -d
docker compose -f docker-compose-cpu.yml up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -59,7 +71,7 @@ dev-cpu:
dev-local:
@echo "🔧 Starting infrastructure only (for local development)..."
docker-compose up -d opensearch dashboards langflow
docker compose up -d opensearch dashboards langflow
@echo "✅ Infrastructure started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -69,7 +81,7 @@ dev-local:
infra:
@echo "🔧 Starting infrastructure services only..."
docker-compose up -d opensearch dashboards langflow
docker compose up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -86,15 +98,15 @@ infra-cpu:
# Container management
stop:
@echo "🛑 Stopping all containers..."
docker-compose down
docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
docker compose down
docker compose -f docker-compose-cpu.yml down 2>/dev/null || true
restart: stop dev
clean: stop
@echo "🧹 Cleaning up containers and volumes..."
docker-compose down -v --remove-orphans
docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
docker compose down -v --remove-orphans
docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
docker system prune -f
# Local development
@ -114,7 +126,7 @@ install: install-be install-fe
install-be:
@echo "📦 Installing backend dependencies..."
uv sync
uv sync --extra torch-cu128
install-fe:
@echo "📦 Installing frontend dependencies..."
@ -123,7 +135,7 @@ install-fe:
# Building
build:
@echo "🔨 Building Docker images..."
docker-compose build
docker compose build
build-be:
@echo "🔨 Building backend image..."
@ -136,41 +148,124 @@ build-fe:
# Logging and debugging
logs:
@echo "📋 Showing all container logs..."
docker-compose logs -f
docker compose logs -f
logs-be:
@echo "📋 Showing backend logs..."
docker-compose logs -f openrag-backend
docker compose logs -f openrag-backend
logs-fe:
@echo "📋 Showing frontend logs..."
docker-compose logs -f openrag-frontend
docker compose logs -f openrag-frontend
logs-lf:
@echo "📋 Showing langflow logs..."
docker-compose logs -f langflow
docker compose logs -f langflow
logs-os:
@echo "📋 Showing opensearch logs..."
docker-compose logs -f opensearch
docker compose logs -f opensearch
# Shell access
shell-be:
@echo "🐚 Opening shell in backend container..."
docker-compose exec openrag-backend /bin/bash
docker compose exec openrag-backend /bin/bash
shell-lf:
@echo "🐚 Opening shell in langflow container..."
docker-compose exec langflow /bin/bash
docker compose exec langflow /bin/bash
shell-os:
@echo "🐚 Opening shell in opensearch container..."
docker-compose exec opensearch /bin/bash
docker compose exec opensearch /bin/bash
# Testing and quality
test:
@echo "🧪 Running backend tests..."
uv run pytest
@echo "🧪 Running all backend tests..."
uv run pytest tests/ -v
test-integration:
@echo "🧪 Running integration tests (requires infrastructure)..."
@echo "💡 Make sure to run 'make infra' first!"
uv run pytest tests/integration/ -v
# CI-friendly integration test target: brings up infra, waits, runs tests, tears down
test-ci:
@set -e; \
echo "Installing test dependencies..."; \
uv sync --group dev; \
if [ ! -f keys/private_key.pem ]; then \
echo "Generating RSA keys for JWT signing..."; \
uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \
else \
echo "RSA keys already exist, ensuring correct permissions..."; \
chmod 600 keys/private_key.pem 2>/dev/null || true; \
chmod 644 keys/public_key.pem 2>/dev/null || true; \
fi; \
echo "Cleaning up old containers and volumes..."; \
docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \
echo "Pulling latest images..."; \
docker compose -f docker-compose-cpu.yml pull; \
echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \
docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \
echo "Starting docling-serve..."; \
DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \
echo "Docling-serve started at $$DOCLING_ENDPOINT"; \
echo "Waiting for backend OIDC endpoint..."; \
for i in $$(seq 1 60); do \
docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Waiting for OpenSearch security config to be fully applied..."; \
for i in $$(seq 1 60); do \
if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \
echo "✓ Security configuration applied"; \
break; \
fi; \
sleep 2; \
done; \
echo "Verifying OIDC authenticator is active in OpenSearch..."; \
AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \
if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \
echo "✓ OIDC authenticator configured"; \
echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \
else \
echo "✗ OIDC authenticator NOT found in security config!"; \
echo "Security config:"; \
echo "$$AUTHC_CONFIG" | head -50; \
exit 1; \
fi; \
echo "Waiting for Langflow..."; \
for i in $$(seq 1 60); do \
curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \
for i in $$(seq 1 60); do \
curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Running integration tests"; \
LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \
GOOGLE_OAUTH_CLIENT_ID="" \
GOOGLE_OAUTH_CLIENT_SECRET="" \
OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \
OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \
DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \
uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \
TEST_RESULT=$$?; \
echo ""; \
echo "=== Post-test JWT diagnostics ==="; \
echo "Generating test JWT token..."; \
TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \
if [ -n "$$TEST_TOKEN" ]; then \
echo "Testing JWT against OpenSearch..."; \
HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \
echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \
fi; \
echo "================================="; \
echo ""; \
echo "Tearing down infra"; \
uv run python scripts/docling_ctl.py stop || true; \
docker compose down -v || true; \
exit $$TEST_RESULT
lint:
@echo "🔍 Running linting checks..."
@ -180,19 +275,19 @@ lint:
# Service status
status:
@echo "📊 Container status:"
@docker-compose ps 2>/dev/null || echo "No containers running"
@docker compose ps 2>/dev/null || echo "No containers running"
health:
@echo "🏥 Health check:"
@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
@echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
# Database operations
db-reset:
@echo "🗄️ Resetting OpenSearch indices..."
curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true
@echo "Indices reset. Restart backend to recreate."
# Flow management
@ -215,4 +310,4 @@ setup:
@echo "⚙️ Setting up development environment..."
@if [ ! -f .env ]; then cp .env.example .env && echo "📝 Created .env from template"; fi
@$(MAKE) install
@echo "✅ Setup complete! Run 'make dev' to start."
@echo "✅ Setup complete! Run 'make dev' to start."

View file

@ -1,4 +0,0 @@
:::info
OpenRAG is is currently in public preview.
Development is ongoing, and the features and functionality are subject to change.
:::

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
@ -34,11 +31,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
</details>
## Use the OpenRAG OpenSearch Agent flow
## Use the OpenRAG OpenSearch Agent flow {#flow}
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
This flow contains seven components connected together to chat with your data:
This flow contains eight components connected together to chat with your data:
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@ -49,6 +46,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
<PartialModifyFlows />

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
@ -50,4 +47,31 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
## Knowledge ingestion flows
[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
This flow contains ten components connected together to process and store documents in your knowledge base.
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
<PartialModifyFlows />
### OpenSearch URL Ingestion flow {#url-flow}
An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
@ -18,6 +15,7 @@ OpenSearch provides powerful hybrid search capabilities with enterprise-grade se
## Ingest knowledge
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
### Direct file ingestion
@ -89,10 +87,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
<PartialModifyFlows />
### Knowledge ingestion settings
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
## Create knowledge filters
OpenRAG includes a knowledge filter system for organizing and managing document collections.

View file

@ -4,9 +4,6 @@ slug: /get-started/docker
---
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
There are two different Docker Compose files.
They deploy the same applications and containers, but to different environments.

View file

@ -6,9 +6,6 @@ slug: /install
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
[Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.

View file

@ -6,9 +6,6 @@ slug: /quickstart
import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.

View file

@ -3,10 +3,6 @@ title: Terminal User Interface (TUI) commands
slug: /get-started/tui
---
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
![OpenRAG TUI Interface](@site/static/img/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)

View file

@ -3,10 +3,6 @@ title: What is OpenRAG?
slug: /
---
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG is an open-source package for building agentic RAG systems.
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.

View file

@ -5,9 +5,6 @@ slug: /support/troubleshoot
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 951 KiB

After

Width:  |  Height:  |  Size: 1,004 KiB

View file

@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
{queryOverride && (
<Button
variant="ghost"
className="h-full !px-1.5 !py-0"
className="h-full rounded-sm !px-1.5 !py-0"
type="button"
onClick={() => {
setSearchQueryInput("");
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
<Button
variant="ghost"
className={cn(
"h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
"h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
searchQueryInput && "block"
)}
type="submit"

View file

@ -326,4 +326,4 @@ export default function ProtectedAdminPage() {
<AdminPage />
</ProtectedRoute>
)
}
}

View file

@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import {
type ChunkResult,
type File,
@ -35,9 +30,9 @@ function ChunksPageContent() {
const { parsedFilterData, queryOverride } = useKnowledgeFilter();
const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[]
>([]);
// const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
// ChunkResult[]
// >([]);
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null
@ -126,26 +121,25 @@ function ChunksPageContent() {
return (
<div className="flex flex-col h-full">
<div className="flex flex-col h-full">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
<Checkbox
id="selectAllChunks"
checked={selectAll}
@ -160,11 +154,12 @@ function ChunksPageContent() {
Select all
</Label>
</div> */}
</div>
</div>
</div>
{/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-auto pr-6">
<div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
{/* Content Area */}
<div className="row-start-2 lg:row-start-1">
{isFetching ? (
<div className="flex items-center justify-center h-64">
<div className="text-center">
@ -185,7 +180,7 @@ function ChunksPageContent() {
</div>
) : (
<div className="space-y-4 pb-6">
{chunksFilteredByQuery.map((chunk, index) => (
{chunks.map((chunk, index) => (
<div
key={chunk.filename + index}
className="bg-muted rounded-lg p-4 border border-border/50"
@ -242,31 +237,30 @@ function ChunksPageContent() {
</div>
)}
</div>
</div>
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="w-[320px] py-20 px-2">
<div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4">
Technical details
</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="min-w-[200px]">
<div className="mb-8">
<h2 className="text-xl font-semibold mb-4">Technical details</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Avg length
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
@ -276,54 +270,55 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div> */}
</dl>
</div>
<div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</dl>
</div>
<div className="mb-4">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename}
</dd>
</div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
</dl>
</dl>
</div>
</div>
</div>
)}
)}
</div>
</div>
);
}

View file

@ -1,6 +1,10 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "openrag"
version = "0.1.15"
version = "0.1.19"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
@ -31,6 +35,9 @@ dependencies = [
"docling-serve>=1.4.1",
]
[dependency-groups]
dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
[project.scripts]
openrag = "tui.main:run_tui"

91
scripts/docling_ctl.py Normal file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""Helper script to control docling-serve using DoclingManager for CI/testing."""
import sys
import asyncio
import argparse
from pathlib import Path
# Add src to path so we can import DoclingManager
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from tui.managers.docling_manager import DoclingManager
async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
"""Start docling-serve."""
manager = DoclingManager()
if manager.is_running():
print(f"Docling-serve is already running")
status = manager.get_status()
print(f"Endpoint: {status['endpoint']}")
return 0
host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
print(f"Starting docling-serve on {host_msg}...")
success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
if success:
print(f"{message}")
status = manager.get_status()
print(f"Endpoint: {status['endpoint']}")
print(f"PID: {status['pid']}")
return 0
else:
print(f"{message}", file=sys.stderr)
return 1
async def stop_docling():
"""Stop docling-serve."""
manager = DoclingManager()
if not manager.is_running():
print("Docling-serve is not running")
return 0
print("Stopping docling-serve...")
success, message = await manager.stop()
if success:
print(f"{message}")
return 0
else:
print(f"{message}", file=sys.stderr)
return 1
async def status_docling():
"""Get docling-serve status."""
manager = DoclingManager()
status = manager.get_status()
print(f"Status: {status['status']}")
if status['status'] == 'running':
print(f"Endpoint: {status['endpoint']}")
print(f"Docs: {status['docs_url']}")
print(f"PID: {status['pid']}")
return 0 if status['status'] == 'running' else 1
async def main():
parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
args = parser.parse_args()
if args.command == "start":
return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
elif args.command == "stop":
return await stop_docling()
elif args.command == "status":
return await status_docling()
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

View file

@ -28,7 +28,6 @@ def require_auth(session_manager):
async def wrapper(request: Request):
# In no-auth mode, bypass authentication entirely
if is_no_auth_mode():
logger.debug("No-auth mode: Creating anonymous user")
# Create an anonymous user object so endpoints don't break
from session_manager import User
from datetime import datetime
@ -36,7 +35,6 @@ def require_auth(session_manager):
from session_manager import AnonymousUser
request.state.user = AnonymousUser()
request.state.jwt_token = None # No JWT in no-auth mode
logger.debug("Set user_id=anonymous, jwt_token=None")
return await handler(request)
user = get_current_user(request, session_manager)

View file

@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
from utils.document_processing import create_document_converter
from utils.logging_config import get_logger
load_dotenv()
load_dotenv("../")
load_dotenv(override=False)
load_dotenv("../", override=False)
logger = get_logger(__name__)
@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
def is_no_auth_mode():
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
logger.debug(
"Checking auth mode",
no_auth_mode=result,
has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
)
return result

View file

@ -131,7 +131,7 @@ async def configure_alerting_security():
# Don't fail startup if alerting config fails
async def _ensure_opensearch_index(self):
async def _ensure_opensearch_index():
"""Ensure OpenSearch index exists when using traditional connector service."""
try:
# Check if index already exists
@ -242,6 +242,9 @@ def generate_jwt_keys():
capture_output=True,
)
# Set restrictive permissions on private key (readable by owner only)
os.chmod(private_key_path, 0o600)
# Generate public key
subprocess.run(
[
@ -257,12 +260,21 @@ def generate_jwt_keys():
capture_output=True,
)
# Set permissions on public key (readable by all)
os.chmod(public_key_path, 0o644)
logger.info("Generated RSA keys for JWT signing")
except subprocess.CalledProcessError as e:
logger.error("Failed to generate RSA keys", error=str(e))
raise
else:
logger.info("RSA keys already exist, skipping generation")
# Ensure correct permissions on existing keys
try:
os.chmod(private_key_path, 0o600)
os.chmod(public_key_path, 0o644)
logger.info("RSA keys already exist, ensured correct permissions")
except OSError as e:
logger.warning("Failed to set permissions on existing keys", error=str(e))
async def init_index_when_ready():

View file

@ -126,7 +126,11 @@ class DocumentService:
from utils.file_utils import auto_cleanup_tempfile
import os
with auto_cleanup_tempfile() as tmp_path:
# Preserve file extension for docling format detection
filename = upload_file.filename or "uploaded"
suffix = os.path.splitext(filename)[1] or ""
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
# Stream upload file to temporary file
file_size = 0
with open(tmp_path, 'wb') as tmp_file:

View file

@ -1 +1,8 @@
"""OpenRAG Terminal User Interface package."""
from importlib.metadata import version
try:
__version__ = version("openrag")
except Exception:
__version__ = "unknown"

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
# context: .
# dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME=OpenRAG
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../../../docker-compose-cpu.yml

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
gpus: all
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME="OpenRAG"
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../../../docker-compose.yml

View file

@ -0,0 +1 @@
../../../../documents/2506.08231v1.pdf

View file

@ -0,0 +1 @@
../../../../documents/ai-human-resources.pdf

View file

@ -0,0 +1 @@
../../../../documents/warmup_ocr.pdf

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm_text.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm_text.json

View file

@ -0,0 +1 @@
../../../../flows/ingestion_flow.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_agent.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_ingest_docling.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_nudges.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_url_mcp.json

View file

@ -2,6 +2,7 @@
import sys
from pathlib import Path
from typing import Iterable, Optional
from textual.app import App, ComposeResult
from utils.logging_config import get_logger
try:
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
return True, "Runtime requirements satisfied"
def copy_sample_documents():
def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
"""Copy packaged assets into destination and optionally overwrite existing files.
When ``force`` is True, files are refreshed if the packaged bytes differ.
"""
destination.mkdir(parents=True, exist_ok=True)
for resource in resource_tree.iterdir():
target_path = destination / resource.name
if resource.is_dir():
_copy_assets(resource, target_path, allowed_suffixes, force=force)
continue
if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
continue
resource_bytes = resource.read_bytes()
if target_path.exists():
if not force:
continue
try:
if target_path.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
target_path.write_bytes(resource_bytes)
logger.info(f"Copied bundled asset: {target_path}")
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("documents")
# Check if documents directory already exists and has files
if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
return # Documents already exist, don't overwrite
try:
# Get sample documents from package assets
assets_files = files("tui._assets.documents")
# Create documents directory if it doesn't exist
documents_dir.mkdir(exist_ok=True)
# Copy each sample document
for resource in assets_files.iterdir():
if resource.is_file() and resource.name.endswith('.pdf'):
dest_path = documents_dir / resource.name
if not dest_path.exists():
content = resource.read_bytes()
dest_path.write_bytes(content)
logger.info(f"Copied sample document: {resource.name}")
_copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample documents: {e}")
# This is not a critical error - the app can work without sample documents
def copy_sample_flows(*, force: bool = False) -> None:
"""Copy sample flows from package to current directory if they don't exist."""
flows_dir = Path("flows")
try:
assets_files = files("tui._assets.flows")
_copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample flows: {e}")
# The app can proceed without bundled flows
def copy_compose_files(*, force: bool = False) -> None:
"""Copy docker-compose templates into the workspace if they are missing."""
try:
assets_root = files("tui._assets")
except Exception as e:
logger.debug(f"Could not access compose assets: {e}")
return
for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
destination = Path(filename)
if destination.exists() and not force:
continue
try:
resource = assets_root.joinpath(filename)
if not resource.is_file():
logger.debug(f"Compose template not found in assets: {filename}")
continue
resource_bytes = resource.read_bytes()
if destination.exists():
try:
if destination.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
destination.write_bytes(resource_bytes)
logger.info(f"Copied docker-compose template: {filename}")
except Exception as error:
logger.debug(f"Could not copy compose file {filename}: {error}")
def run_tui():
"""Run the OpenRAG TUI application."""
app = None
try:
# Copy sample documents on first run
copy_sample_documents()
# Keep bundled assets aligned with the packaged versions
copy_sample_documents(force=True)
copy_sample_flows(force=True)
copy_compose_files(force=True)
app = OpenRAGTUI()
app.run()

View file

@ -10,6 +10,7 @@ from rich.text import Text
from rich.align import Align
from dotenv import load_dotenv
from .. import __version__
from ..managers.container_manager import ContainerManager, ServiceStatus
from ..managers.env_manager import EnvManager
from ..managers.docling_manager import DoclingManager
@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
"""
welcome_text.append(ascii_art, style="bold white")
welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
# Check if all services are running
all_services_running = self.services_running and self.docling_running

View file

@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
import logging
import re
import shutil
import socket
import subprocess
log = logger or logging.getLogger(__name__)
def can_bind_to_address(ip_addr: str) -> bool:
"""Test if we can bind to the given IP address."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
return True
except (OSError, socket.error) as e:
log.debug("Cannot bind to %s: %s", ip_addr, e)
return False
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
"Container-reachable host IP candidates: %s",
", ".join(ordered_candidates),
)
else:
log.info("Container-reachable host IP: %s", ordered_candidates[0])
return ordered_candidates[0]
# Try each candidate and return the first one we can bind to
for ip_addr in ordered_candidates:
if can_bind_to_address(ip_addr):
if len(ordered_candidates) > 1:
log.info("Selected bindable host IP: %s", ip_addr)
else:
log.info("Container-reachable host IP: %s", ip_addr)
return ip_addr
log.debug("Skipping %s (cannot bind)", ip_addr)
# None of the candidates were bindable, fall back to 127.0.0.1
log.warning(
"None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
", ".join(ordered_candidates),
)
return "127.0.0.1"
log.warning(
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."

1
tests/__init__.py Normal file
View file

@ -0,0 +1 @@
# Test package

85
tests/conftest.py Normal file
View file

@ -0,0 +1,85 @@
import asyncio
import os
import tempfile
from pathlib import Path
import pytest
import pytest_asyncio
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Force no-auth mode for testing by setting OAuth credentials to empty strings
# This ensures anonymous JWT tokens are created automatically
os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
from src.config.settings import clients
from src.session_manager import SessionManager
from src.main import generate_jwt_keys
@pytest.fixture(scope="session")
def event_loop():
"""Create an instance of the default event loop for the test session."""
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
@pytest_asyncio.fixture
async def opensearch_client():
"""OpenSearch client for testing - requires running OpenSearch."""
await clients.initialize()
yield clients.opensearch
# Cleanup test indices after tests
try:
await clients.opensearch.indices.delete(index="test_documents")
except Exception:
pass
@pytest.fixture
def session_manager():
"""Session manager for testing."""
# Generate RSA keys before creating SessionManager
generate_jwt_keys()
sm = SessionManager("test-secret-key")
print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
return sm
@pytest.fixture
def test_documents_dir():
"""Create a temporary directory with test documents."""
with tempfile.TemporaryDirectory() as temp_dir:
test_dir = Path(temp_dir)
# Create some test files in supported formats
(test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
(test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
(test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
# Create subdirectory with files
sub_dir = test_dir / "subdir"
sub_dir.mkdir()
(sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
yield test_dir
@pytest.fixture
def test_single_file():
"""Create a single test file."""
with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
temp_path = f.name
yield temp_path
# Cleanup
try:
os.unlink(temp_path)
except FileNotFoundError:
pass

View file

@ -0,0 +1 @@
# Integration tests package

View file

@ -0,0 +1,296 @@
import asyncio
import os
from pathlib import Path
import httpx
import pytest
async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
"""Poll existing endpoints until the app and OpenSearch are ready.
Strategy:
- GET /auth/me should return 200 immediately (confirms app is up).
- POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
"""
# First test OpenSearch JWT directly
from src.session_manager import SessionManager, AnonymousUser
import os
import hashlib
import jwt as jwt_lib
sm = SessionManager("test")
test_token = sm.create_jwt_token(AnonymousUser())
token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
with open(sm.public_key_path, 'rb') as f:
pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
print(f"[DEBUG] Public key hash: {pub_key_hash}")
# Decode token to see claims
decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
# Test OpenSearch JWT auth directly
opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
async with httpx.AsyncClient(verify=False) as os_client:
r_os = await os_client.post(
f"{opensearch_url}/documents/_search",
headers={"Authorization": f"Bearer {test_token}"},
json={"query": {"match_all": {}}, "size": 0}
)
print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
if r_os.status_code == 401:
print(f"[DEBUG] ❌ OpenSearch rejected JWT! OIDC config not working.")
else:
print(f"[DEBUG] ✓ OpenSearch accepted JWT!")
deadline = asyncio.get_event_loop().time() + timeout_s
last_err = None
while asyncio.get_event_loop().time() < deadline:
try:
r1 = await client.get("/auth/me")
print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
if r1.status_code in (401, 403):
raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
if r1.status_code != 200:
await asyncio.sleep(0.5)
continue
# match_all readiness probe; no embeddings
r2 = await client.post("/search", json={"query": "*", "limit": 0})
print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
if r2.status_code in (401, 403):
print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
if r2.status_code == 200:
print("[DEBUG] Service ready!")
return
last_err = r2.text
except AssertionError:
raise
except Exception as e:
last_err = str(e)
print(f"[DEBUG] Exception during readiness check: {e}")
await asyncio.sleep(0.5)
raise AssertionError(f"Service not ready in time: {last_err}")
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
"""Boot the ASGI app and exercise /upload and /search endpoints."""
# Ensure we route uploads to traditional processor and disable startup ingest
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
os.environ["DISABLE_STARTUP_INGEST"] = "true"
# Force no-auth mode so endpoints bypass authentication
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
# Import after env vars to ensure settings pick them up. Clear cached modules
import sys
# Clear cached modules so settings pick up env and router sees new flag
for mod in [
"src.api.router",
"api.router", # Also clear the non-src path
"src.api.connector_router",
"api.connector_router",
"src.config.settings",
"config.settings",
"src.auth_middleware",
"auth_middleware",
"src.main",
"api", # Clear the api package itself
"src.api",
"services", # Clear services that import clients
"src.services",
"services.search_service",
"src.services.search_service",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
import src.api.router as upload_router
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
# Wait for deletion to complete
await asyncio.sleep(1)
except Exception:
pass
app = await create_app()
# Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
# Verify index is truly empty after startup
try:
count_response = await clients.opensearch.count(index=INDEX_NAME)
doc_count = count_response.get('count', 0)
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
except Exception as e:
# If count fails, the index might not exist yet, which is fine
pass
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
# Wait for app + OpenSearch readiness using existing endpoints
await wait_for_service_ready(client)
# Create a temporary markdown file to upload
file_path = tmp_path / "endpoint_test_doc.md"
file_text = (
"# Single Test Document\n\n"
"This is a test document about OpenRAG testing framework. "
"The content should be indexed and searchable in OpenSearch after processing."
)
file_path.write_text(file_text)
# POST via router (multipart)
files = {
"file": (
file_path.name,
file_path.read_bytes(),
"text/markdown",
)
}
upload_resp = await client.post("/upload", files=files)
body = upload_resp.json()
assert upload_resp.status_code == 201, upload_resp.text
assert body.get("status") in {"indexed", "unchanged"}
assert isinstance(body.get("id"), str)
# Poll search for the specific content until it's indexed
async def _wait_for_indexed(timeout_s: float = 30.0):
deadline = asyncio.get_event_loop().time() + timeout_s
while asyncio.get_event_loop().time() < deadline:
resp = await client.post(
"/search",
json={"query": "OpenRAG testing framework", "limit": 5},
)
if resp.status_code == 200 and resp.json().get("results"):
return resp
await asyncio.sleep(0.5)
return resp
search_resp = await _wait_for_indexed()
# POST /search
assert search_resp.status_code == 200, search_resp.text
search_body = search_resp.json()
# Basic shape and at least one hit
assert isinstance(search_body.get("results"), list)
assert len(search_body["results"]) >= 0
# When hits exist, confirm our phrase is present in top result content
if search_body["results"]:
top = search_body["results"][0]
assert "text" in top or "content" in top
text = top.get("text") or top.get("content")
assert isinstance(text, str)
assert "testing" in text.lower()
finally:
# Explicitly close global clients to avoid aiohttp warnings
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
"""Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
os.environ["DISABLE_STARTUP_INGEST"] = "true"
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
import sys
for mod in [
"src.api.router",
"api.router", # Also clear the non-src path
"src.api.connector_router",
"api.connector_router",
"src.config.settings",
"config.settings",
"src.auth_middleware",
"auth_middleware",
"src.main",
"api", # Clear the api package itself
"src.api",
"services", # Clear services that import clients
"src.services",
"services.search_service",
"src.services.search_service",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
import src.api.router as upload_router
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
# Wait for deletion to complete
await asyncio.sleep(1)
except Exception:
pass
app = await create_app()
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
# Verify index is truly empty after startup
try:
count_response = await clients.opensearch.count(index=INDEX_NAME)
doc_count = count_response.get('count', 0)
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
except Exception as e:
# If count fails, the index might not exist yet, which is fine
pass
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
await wait_for_service_ready(client)
file_path = tmp_path / "router_test_doc.md"
file_path.write_text("# Router Test\n\nThis file validates the upload router.")
files = {
"file": (
file_path.name,
file_path.read_bytes(),
"text/markdown",
)
}
resp = await client.post("/router/upload_ingest", files=files)
data = resp.json()
print(f"data: {data}")
if disable_langflow_ingest:
assert resp.status_code == 201 or resp.status_code == 202, resp.text
assert data.get("status") in {"indexed", "unchanged"}
assert isinstance(data.get("id"), str)
else:
assert resp.status_code == 201 or resp.status_code == 202, resp.text
assert isinstance(data.get("task_id"), str)
assert data.get("file_count") == 1
finally:
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass

View file

@ -0,0 +1,118 @@
import asyncio
import os
from pathlib import Path
import httpx
import pytest
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
deadline = asyncio.get_event_loop().time() + timeout_s
last_err = None
while asyncio.get_event_loop().time() < deadline:
try:
r1 = await client.get("/auth/me")
if r1.status_code != 200:
await asyncio.sleep(0.5)
continue
r2 = await client.post("/search", json={"query": "*", "limit": 0})
if r2.status_code == 200:
return
last_err = r2.text
except Exception as e:
last_err = str(e)
await asyncio.sleep(0.5)
raise AssertionError(f"Service not ready in time: {last_err}")
def count_files_in_documents() -> int:
base_dir = Path(os.getcwd()) / "documents"
if not base_dir.is_dir():
return 0
return sum(1 for _ in base_dir.rglob("*") if _.is_file())
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
# Ensure startup ingest runs and choose pipeline per param
os.environ["DISABLE_STARTUP_INGEST"] = "false"
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
"true" if disable_langflow_ingest else "false"
)
# Force no-auth mode for simpler endpoint access
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
# Reload settings to pick up env for this test run
import sys
for mod in [
"src.api.router",
"src.api.connector_router",
"src.config.settings",
"src.auth_middleware",
"src.main",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
from src.config.settings import clients, INDEX_NAME
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
except Exception:
pass
app = await create_app()
# Trigger startup tasks explicitly
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
await wait_for_ready(client)
expected_files = count_files_in_documents()
# Poll /tasks until we see at least one startup ingest task
async def _wait_for_task(timeout_s: float = 60.0):
deadline = asyncio.get_event_loop().time() + timeout_s
last = None
while asyncio.get_event_loop().time() < deadline:
resp = await client.get("/tasks")
if resp.status_code == 200:
data = resp.json()
last = data
tasks = data.get("tasks") if isinstance(data, dict) else None
if isinstance(tasks, list) and len(tasks) > 0:
return tasks
await asyncio.sleep(0.5)
return last.get("tasks") if isinstance(last, dict) else last
tasks = await _wait_for_task()
if expected_files == 0:
return # Nothing to do
if not (isinstance(tasks, list) and len(tasks) > 0):
# Fallback: verify that documents were indexed as a sign of startup ingest
sr = await client.post("/search", json={"query": "*", "limit": 1})
assert sr.status_code == 200, sr.text
total = sr.json().get("total")
assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
return
newest = tasks[0]
assert "task_id" in newest
assert newest.get("total_files") == expected_files
finally:
# Explicitly close global clients to avoid aiohttp warnings
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass

162
uv.lock generated
View file

@ -2,10 +2,10 @@ version = 1
revision = 2
requires-python = ">=3.13"
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
[[package]]
@ -291,8 +291,8 @@ name = "click"
version = "8.2.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -312,6 +312,67 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "coverage"
version = "7.10.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
{ url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
{ url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
{ url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
{ url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
{ url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
{ url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
{ url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
{ url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
{ url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
{ url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
{ url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
{ url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
{ url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
{ url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
{ url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
{ url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
{ url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
{ url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
{ url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
{ url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
{ url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
{ url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
{ url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
{ url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
{ url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
{ url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
{ url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
{ url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
{ url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
{ url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
{ url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
{ url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
{ url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
{ url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
{ url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
{ url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
{ url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
{ url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
{ url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
{ url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
{ url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
{ url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
{ url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
{ url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
{ url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
{ url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
{ url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
{ url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
{ url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
{ url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
{ url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
{ url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
]
[[package]]
name = "cramjam"
version = "2.11.0"
@ -454,8 +515,8 @@ name = "dill"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
@ -619,8 +680,8 @@ name = "docling-mcp"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -943,8 +1004,8 @@ name = "fsspec"
version = "2025.5.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
@ -1264,8 +1325,8 @@ name = "huggingface-hub"
version = "0.33.2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -1339,6 +1400,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
]
[[package]]
name = "jinja2"
version = "3.1.6"
@ -1960,8 +2030,8 @@ name = "multiprocess"
version = "0.70.18"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -2282,7 +2352,7 @@ wheels = [
[[package]]
name = "openrag"
version = "0.1.14.dev3"
version = "0.1.19"
source = { editable = "." }
dependencies = [
{ name = "agentd" },
@ -2312,6 +2382,14 @@ dependencies = [
{ name = "uvicorn" },
]
[package.dev-dependencies]
dev = [
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "pytest-mock" },
]
[package.metadata]
requires-dist = [
{ name = "agentd", specifier = ">=0.2.2" },
@ -2341,6 +2419,14 @@ requires-dist = [
{ name = "uvicorn", specifier = ">=0.35.0" },
]
[package.metadata.requires-dev]
dev = [
{ name = "pytest", specifier = ">=8" },
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
{ name = "pytest-cov", specifier = ">=4.0.0" },
{ name = "pytest-mock", specifier = ">=3.12.0" },
]
[[package]]
name = "opensearch-py"
version = "3.0.0"
@ -2836,6 +2922,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
]
[[package]]
name = "pytest"
version = "8.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
]
[[package]]
name = "pytest-asyncio"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
]
[[package]]
name = "pytest-cov"
version = "7.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "coverage" },
{ name = "pluggy" },
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
]
[[package]]
name = "pytest-mock"
version = "3.15.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
]
[[package]]
name = "python-bidi"
version = "0.6.6"
@ -3622,9 +3762,9 @@ name = "torch"
version = "2.8.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
dependencies = [
{ name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
@ -3669,9 +3809,9 @@ name = "torchvision"
version = "0.23.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
dependencies = [
{ name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },