Merge branch 'main' into docs-opensearch-jwt-values

This commit is contained in:
Mendon Kissling 2025-10-08 12:50:17 -04:00 committed by GitHub
commit c90366f8fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 1293 additions and 533 deletions

View file

@ -1,59 +0,0 @@
name: Build Langflow Responses Multi-Arch
on:
workflow_dispatch:
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
arch: amd64
runs-on: ubuntu-latest
- platform: linux/arm64
arch: arm64
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
runs-on: ${{ matrix.runs-on }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push langflow (${{ matrix.arch }})
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.langflow
platforms: ${{ matrix.platform }}
push: true
tags: phact/langflow:responses-${{ matrix.arch }}
cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
manifest:
needs: build
runs-on: ubuntu-latest
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Create and push multi-arch manifest
run: |
docker buildx imagetools create -t phact/langflow:responses \
phact/langflow:responses-amd64 \
phact/langflow:responses-arm64

View file

@ -1,16 +1,95 @@
name: Build Multi-Architecture Docker Images
name: Release + Docker Images (multi-arch)
on:
push:
branches:
- main
paths:
- 'pyproject.toml'
workflow_dispatch:
inputs:
update_latest:
description: 'Update latest tags (production release)'
required: false
default: false
type: boolean
jobs:
build-python-packages:
runs-on: ubuntu-latest
outputs:
skip_release: ${{ steps.version.outputs.skip_release }}
version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
# Check if tag already exists
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping release"
echo "skip_release=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "skip_release=false" >> $GITHUB_OUTPUT
# Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "is_prerelease=false" >> $GITHUB_OUTPUT
echo "Release type: Production"
else
echo "is_prerelease=true" >> $GITHUB_OUTPUT
echo "Release type: Prerelease"
fi
- name: Build wheel and source distribution
if: steps.version.outputs.skip_release != 'true'
run: |
uv build
- name: List built artifacts
if: steps.version.outputs.skip_release != 'true'
run: |
ls -la dist/
echo "Built artifacts:"
for file in dist/*; do
echo " - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
done
- name: Upload build artifacts
if: steps.version.outputs.skip_release != 'true'
uses: actions/upload-artifact@v4
with:
name: python-packages
path: dist/
retention-days: 30
- name: Create Release
if: steps.version.outputs.skip_release != 'true'
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.version.outputs.version }}
name: Release ${{ steps.version.outputs.version }}
draft: false
prerelease: ${{ steps.version.outputs.is_prerelease }}
generate_release_notes: true
files: |
dist/*.whl
dist/*.tar.gz
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build:
needs: build-python-packages
if: needs.build-python-packages.outputs.skip_release != 'true'
strategy:
fail-fast: false
matrix:
@ -106,9 +185,9 @@ jobs:
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
manifest:
needs: build
needs: [build, build-python-packages]
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
steps:
- name: Checkout
uses: actions/checkout@v4
@ -146,8 +225,8 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
# Only update latest tags if version is numeric AND checkbox is checked
if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
# Only update latest tags if version is numeric
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "Updating latest tags for production release: $VERSION"
docker buildx imagetools create -t phact/openrag-backend:latest \
phact/openrag-backend:$VERSION-amd64 \
@ -165,5 +244,5 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
else
echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
echo "Skipping latest tags - version: $VERSION (not numeric)"
fi

54
.github/workflows/test-integration.yml vendored Normal file
View file

@ -0,0 +1,54 @@
name: Integration Tests
on:
pull_request:
push:
branches:
- main
jobs:
tests:
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb]
env:
# Prefer repository/environment variable first, then secret, then a sane fallback
OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
steps:
- run: df -h
#- name: "node-cleanup"
#run: |
# sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
# sudo docker image prune --all --force
# sudo docker builder prune -a
- run: df -h
- name: Checkout
uses: actions/checkout@v4
- name: Set up UV
uses: astral-sh/setup-uv@v3
with:
version: latest
- name: Python version
run: uv python install 3.13
- name: Install dependencies
run: uv sync
- name: Run integration tests
env:
OPENSEARCH_HOST: localhost
OPENSEARCH_PORT: 9200
OPENSEARCH_USERNAME: admin
OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }}
LOG_LEVEL: DEBUG
# Force no-auth mode so tests bypass OAuth
GOOGLE_OAUTH_CLIENT_ID: ""
GOOGLE_OAUTH_CLIENT_SECRET: ""
# Disable startup ingest noise unless a test enables it
DISABLE_STARTUP_INGEST: "true"
run: |
make test-ci
echo "Keys directory after tests:"
ls -la keys/ || echo "No keys directory"

2
.gitignore vendored
View file

@ -18,6 +18,8 @@ wheels/
1001*.pdf
*.json
!flows/*.json
!src/tui/_assets/flows/*.json
!src/tui/_assets/flows/components/*.json
.DS_Store
config/

1
MANIFEST.in Normal file
View file

@ -0,0 +1 @@
recursive-include src/tui/_assets *

149
Makefile
View file

@ -1,7 +1,17 @@
# OpenRAG Development Makefile
# Provides easy commands for development workflow
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
# Load variables from .env if present so `make` commands pick them up
ifneq (,$(wildcard .env))
include .env
# Export all simple KEY=VALUE pairs to the environment for child processes
export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env)
endif
.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \
test test-integration test-ci \
backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \
shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
# Default target
help:
@ -32,14 +42,16 @@ help:
@echo " shell-lf - Shell into langflow container"
@echo ""
@echo "Testing:"
@echo " test - Run backend tests"
@echo " test - Run all backend tests"
@echo " test-integration - Run integration tests (requires infra)"
@echo " test-ci - Start infra, run integration tests, tear down"
@echo " lint - Run linting checks"
@echo ""
# Development environments
dev:
@echo "🚀 Starting OpenRAG with GPU support..."
docker-compose up -d
docker compose up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -49,7 +61,7 @@ dev:
dev-cpu:
@echo "🚀 Starting OpenRAG with CPU only..."
docker-compose -f docker-compose-cpu.yml up -d
docker compose -f docker-compose-cpu.yml up -d
@echo "✅ Services started!"
@echo " Backend: http://localhost:8000"
@echo " Frontend: http://localhost:3000"
@ -59,7 +71,7 @@ dev-cpu:
dev-local:
@echo "🔧 Starting infrastructure only (for local development)..."
docker-compose up -d opensearch dashboards langflow
docker compose up -d opensearch dashboards langflow
@echo "✅ Infrastructure started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -69,7 +81,7 @@ dev-local:
infra:
@echo "🔧 Starting infrastructure services only..."
docker-compose up -d opensearch dashboards langflow
docker compose up -d opensearch dashboards langflow
@echo "✅ Infrastructure services started!"
@echo " Langflow: http://localhost:7860"
@echo " OpenSearch: http://localhost:9200"
@ -86,15 +98,15 @@ infra-cpu:
# Container management
stop:
@echo "🛑 Stopping all containers..."
docker-compose down
docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
docker compose down
docker compose -f docker-compose-cpu.yml down 2>/dev/null || true
restart: stop dev
clean: stop
@echo "🧹 Cleaning up containers and volumes..."
docker-compose down -v --remove-orphans
docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
docker compose down -v --remove-orphans
docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
docker system prune -f
# Local development
@ -114,7 +126,7 @@ install: install-be install-fe
install-be:
@echo "📦 Installing backend dependencies..."
uv sync
uv sync --extra torch-cu128
install-fe:
@echo "📦 Installing frontend dependencies..."
@ -123,7 +135,7 @@ install-fe:
# Building
build:
@echo "🔨 Building Docker images..."
docker-compose build
docker compose build
build-be:
@echo "🔨 Building backend image..."
@ -136,41 +148,124 @@ build-fe:
# Logging and debugging
logs:
@echo "📋 Showing all container logs..."
docker-compose logs -f
docker compose logs -f
logs-be:
@echo "📋 Showing backend logs..."
docker-compose logs -f openrag-backend
docker compose logs -f openrag-backend
logs-fe:
@echo "📋 Showing frontend logs..."
docker-compose logs -f openrag-frontend
docker compose logs -f openrag-frontend
logs-lf:
@echo "📋 Showing langflow logs..."
docker-compose logs -f langflow
docker compose logs -f langflow
logs-os:
@echo "📋 Showing opensearch logs..."
docker-compose logs -f opensearch
docker compose logs -f opensearch
# Shell access
shell-be:
@echo "🐚 Opening shell in backend container..."
docker-compose exec openrag-backend /bin/bash
docker compose exec openrag-backend /bin/bash
shell-lf:
@echo "🐚 Opening shell in langflow container..."
docker-compose exec langflow /bin/bash
docker compose exec langflow /bin/bash
shell-os:
@echo "🐚 Opening shell in opensearch container..."
docker-compose exec opensearch /bin/bash
docker compose exec opensearch /bin/bash
# Testing and quality
test:
@echo "🧪 Running backend tests..."
uv run pytest
@echo "🧪 Running all backend tests..."
uv run pytest tests/ -v
test-integration:
@echo "🧪 Running integration tests (requires infrastructure)..."
@echo "💡 Make sure to run 'make infra' first!"
uv run pytest tests/integration/ -v
# CI-friendly integration test target: brings up infra, waits, runs tests, tears down
test-ci:
@set -e; \
echo "Installing test dependencies..."; \
uv sync --group dev; \
if [ ! -f keys/private_key.pem ]; then \
echo "Generating RSA keys for JWT signing..."; \
uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \
else \
echo "RSA keys already exist, ensuring correct permissions..."; \
chmod 600 keys/private_key.pem 2>/dev/null || true; \
chmod 644 keys/public_key.pem 2>/dev/null || true; \
fi; \
echo "Cleaning up old containers and volumes..."; \
docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \
echo "Pulling latest images..."; \
docker compose -f docker-compose-cpu.yml pull; \
echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \
docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \
echo "Starting docling-serve..."; \
DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \
echo "Docling-serve started at $$DOCLING_ENDPOINT"; \
echo "Waiting for backend OIDC endpoint..."; \
for i in $$(seq 1 60); do \
docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Waiting for OpenSearch security config to be fully applied..."; \
for i in $$(seq 1 60); do \
if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \
echo "✓ Security configuration applied"; \
break; \
fi; \
sleep 2; \
done; \
echo "Verifying OIDC authenticator is active in OpenSearch..."; \
AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \
if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \
echo "✓ OIDC authenticator configured"; \
echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \
else \
echo "✗ OIDC authenticator NOT found in security config!"; \
echo "Security config:"; \
echo "$$AUTHC_CONFIG" | head -50; \
exit 1; \
fi; \
echo "Waiting for Langflow..."; \
for i in $$(seq 1 60); do \
curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \
for i in $$(seq 1 60); do \
curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \
done; \
echo "Running integration tests"; \
LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \
GOOGLE_OAUTH_CLIENT_ID="" \
GOOGLE_OAUTH_CLIENT_SECRET="" \
OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \
OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \
DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \
uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \
TEST_RESULT=$$?; \
echo ""; \
echo "=== Post-test JWT diagnostics ==="; \
echo "Generating test JWT token..."; \
TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \
if [ -n "$$TEST_TOKEN" ]; then \
echo "Testing JWT against OpenSearch..."; \
HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \
echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \
fi; \
echo "================================="; \
echo ""; \
echo "Tearing down infra"; \
uv run python scripts/docling_ctl.py stop || true; \
docker compose down -v || true; \
exit $$TEST_RESULT
lint:
@echo "🔍 Running linting checks..."
@ -180,19 +275,19 @@ lint:
# Service status
status:
@echo "📊 Container status:"
@docker-compose ps 2>/dev/null || echo "No containers running"
@docker compose ps 2>/dev/null || echo "No containers running"
health:
@echo "🏥 Health check:"
@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
@echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
# Database operations
db-reset:
@echo "🗄️ Resetting OpenSearch indices..."
curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true
curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true
@echo "Indices reset. Restart backend to recreate."
# Flow management
@ -215,4 +310,4 @@ setup:
@echo "⚙️ Setting up development environment..."
@if [ ! -f .env ]; then cp .env.example .env && echo "📝 Created .env from template"; fi
@$(MAKE) install
@echo "✅ Setup complete! Run 'make dev' to start."
@echo "✅ Setup complete! Run 'make dev' to start."

View file

@ -1,4 +0,0 @@
:::info
OpenRAG is is currently in public preview.
Development is ongoing, and the features and functionality are subject to change.
:::

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
@ -34,11 +31,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
</details>
## Use the OpenRAG OpenSearch Agent flow
## Use the OpenRAG OpenSearch Agent flow {#flow}
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
This flow contains seven components connected together to chat with your data:
This flow contains eight components connected together to chat with your data:
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@ -49,6 +46,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
<PartialModifyFlows />

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
@ -50,4 +47,31 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
## Knowledge ingestion flows
[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
This flow contains ten components connected together to process and store documents in your knowledge base.
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
<PartialModifyFlows />
### OpenSearch URL Ingestion flow {#url-flow}
An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).

View file

@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
@ -26,6 +23,7 @@ OpenRAG supports two authentication modes based on how you [install OpenRAG](/in
## Ingest knowledge
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
### Direct file ingestion
@ -86,18 +84,6 @@ You can select multiples.
The ingestion process may take some time, depending on the size of your documents.
4. When ingestion is complete, your documents are available in the Knowledge screen.
### Sync cloud connectors
Your connected data sources are found in the <Icon name="Settings2" aria-hidden="true"/> **Settings** page.
When you click **Sync Now** for a connected cloud service like Google Drive, OpenRAG scans your connected Google Drive account to find files that match your sync criteria. Sync criteria are controlled in **Sync Settings** on the same page. You can sync all files, or select a maximum number of files to sync.
For each file found, OpenRAG downloads, converts, and embeds the processed content into OpenSearch.
You can monitor the sync progress in the <Icon name="Bell" aria-hidden="true"/> **Tasks** sidebar.
Once processing is complete, the synced documents become available in your knowledge base and can be searched through the chat interface or Knowledge page.
## Explore knowledge
The **Knowledge** page lists the documents OpenRAG has ingested into the OpenSearch vector database's `documents` index.
@ -109,10 +95,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
<PartialModifyFlows />
### Knowledge ingestion settings
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
## Create knowledge filters
OpenRAG includes a knowledge filter system for organizing and managing document collections.

View file

@ -4,9 +4,6 @@ slug: /get-started/docker
---
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
There are two different Docker Compose files.
They deploy the same applications and containers, but to different environments.

View file

@ -6,9 +6,6 @@ slug: /install
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
[Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.

View file

@ -6,9 +6,6 @@ slug: /quickstart
import Icon from "@site/src/components/icon/icon";
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.

View file

@ -3,10 +3,6 @@ title: Terminal User Interface (TUI) commands
slug: /get-started/tui
---
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
![OpenRAG TUI Interface](@site/static/img/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)

View file

@ -3,10 +3,6 @@ title: What is OpenRAG?
slug: /
---
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
OpenRAG is an open-source package for building agentic RAG systems.
It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.

View file

@ -5,9 +5,6 @@ slug: /support/troubleshoot
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
<PartialExternalPreview />
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 951 KiB

After

Width:  |  Height:  |  Size: 1,004 KiB

View file

@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
{queryOverride && (
<Button
variant="ghost"
className="h-full !px-1.5 !py-0"
className="h-full rounded-sm !px-1.5 !py-0"
type="button"
onClick={() => {
setSearchQueryInput("");
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
<Button
variant="ghost"
className={cn(
"h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
"h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
searchQueryInput && "block"
)}
type="submit"

View file

@ -326,4 +326,4 @@ export default function ProtectedAdminPage() {
<AdminPage />
</ProtectedRoute>
)
}
}

View file

@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import {
type ChunkResult,
type File,
@ -35,9 +30,9 @@ function ChunksPageContent() {
const { parsedFilterData, queryOverride } = useKnowledgeFilter();
const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[]
>([]);
// const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
// ChunkResult[]
// >([]);
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null
@ -126,26 +121,25 @@ function ChunksPageContent() {
return (
<div className="flex flex-col h-full">
<div className="flex flex-col h-full">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
<Checkbox
id="selectAllChunks"
checked={selectAll}
@ -160,11 +154,12 @@ function ChunksPageContent() {
Select all
</Label>
</div> */}
</div>
</div>
</div>
{/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-auto pr-6">
<div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
{/* Content Area */}
<div className="row-start-2 lg:row-start-1">
{isFetching ? (
<div className="flex items-center justify-center h-64">
<div className="text-center">
@ -185,7 +180,7 @@ function ChunksPageContent() {
</div>
) : (
<div className="space-y-4 pb-6">
{chunksFilteredByQuery.map((chunk, index) => (
{chunks.map((chunk, index) => (
<div
key={chunk.filename + index}
className="bg-muted rounded-lg p-4 border border-border/50"
@ -242,31 +237,30 @@ function ChunksPageContent() {
</div>
)}
</div>
</div>
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="w-[320px] py-20 px-2">
<div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4">
Technical details
</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="min-w-[200px]">
<div className="mb-8">
<h2 className="text-xl font-semibold mb-4">Technical details</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Avg length
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
@ -276,54 +270,55 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div> */}
</dl>
</div>
<div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</dl>
</div>
<div className="mb-4">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename}
</dd>
</div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
</dl>
</dl>
</div>
</div>
</div>
)}
)}
</div>
</div>
);
}

View file

@ -1,6 +1,10 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "openrag"
version = "0.1.15"
version = "0.1.19"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
@ -31,6 +35,9 @@ dependencies = [
"docling-serve>=1.4.1",
]
[dependency-groups]
dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
[project.scripts]
openrag = "tui.main:run_tui"

91
scripts/docling_ctl.py Normal file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""Helper script to control docling-serve using DoclingManager for CI/testing."""
import sys
import asyncio
import argparse
from pathlib import Path
# Add src to path so we can import DoclingManager
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from tui.managers.docling_manager import DoclingManager
async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
"""Start docling-serve."""
manager = DoclingManager()
if manager.is_running():
print(f"Docling-serve is already running")
status = manager.get_status()
print(f"Endpoint: {status['endpoint']}")
return 0
host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
print(f"Starting docling-serve on {host_msg}...")
success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
if success:
print(f"{message}")
status = manager.get_status()
print(f"Endpoint: {status['endpoint']}")
print(f"PID: {status['pid']}")
return 0
else:
print(f"{message}", file=sys.stderr)
return 1
async def stop_docling():
"""Stop docling-serve."""
manager = DoclingManager()
if not manager.is_running():
print("Docling-serve is not running")
return 0
print("Stopping docling-serve...")
success, message = await manager.stop()
if success:
print(f"{message}")
return 0
else:
print(f"{message}", file=sys.stderr)
return 1
async def status_docling():
"""Get docling-serve status."""
manager = DoclingManager()
status = manager.get_status()
print(f"Status: {status['status']}")
if status['status'] == 'running':
print(f"Endpoint: {status['endpoint']}")
print(f"Docs: {status['docs_url']}")
print(f"PID: {status['pid']}")
return 0 if status['status'] == 'running' else 1
async def main():
parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
args = parser.parse_args()
if args.command == "start":
return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
elif args.command == "stop":
return await stop_docling()
elif args.command == "status":
return await status_docling()
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

View file

@ -28,7 +28,6 @@ def require_auth(session_manager):
async def wrapper(request: Request):
# In no-auth mode, bypass authentication entirely
if is_no_auth_mode():
logger.debug("No-auth mode: Creating anonymous user")
# Create an anonymous user object so endpoints don't break
from session_manager import User
from datetime import datetime
@ -36,7 +35,6 @@ def require_auth(session_manager):
from session_manager import AnonymousUser
request.state.user = AnonymousUser()
request.state.jwt_token = None # No JWT in no-auth mode
logger.debug("Set user_id=anonymous, jwt_token=None")
return await handler(request)
user = get_current_user(request, session_manager)

View file

@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
from utils.document_processing import create_document_converter
from utils.logging_config import get_logger
load_dotenv()
load_dotenv("../")
load_dotenv(override=False)
load_dotenv("../", override=False)
logger = get_logger(__name__)
@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
def is_no_auth_mode():
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
logger.debug(
"Checking auth mode",
no_auth_mode=result,
has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
)
return result

View file

@ -131,7 +131,7 @@ async def configure_alerting_security():
# Don't fail startup if alerting config fails
async def _ensure_opensearch_index(self):
async def _ensure_opensearch_index():
"""Ensure OpenSearch index exists when using traditional connector service."""
try:
# Check if index already exists
@ -242,6 +242,9 @@ def generate_jwt_keys():
capture_output=True,
)
# Set restrictive permissions on private key (readable by owner only)
os.chmod(private_key_path, 0o600)
# Generate public key
subprocess.run(
[
@ -257,12 +260,21 @@ def generate_jwt_keys():
capture_output=True,
)
# Set permissions on public key (readable by all)
os.chmod(public_key_path, 0o644)
logger.info("Generated RSA keys for JWT signing")
except subprocess.CalledProcessError as e:
logger.error("Failed to generate RSA keys", error=str(e))
raise
else:
logger.info("RSA keys already exist, skipping generation")
# Ensure correct permissions on existing keys
try:
os.chmod(private_key_path, 0o600)
os.chmod(public_key_path, 0o644)
logger.info("RSA keys already exist, ensured correct permissions")
except OSError as e:
logger.warning("Failed to set permissions on existing keys", error=str(e))
async def init_index_when_ready():

View file

@ -126,7 +126,11 @@ class DocumentService:
from utils.file_utils import auto_cleanup_tempfile
import os
with auto_cleanup_tempfile() as tmp_path:
# Preserve file extension for docling format detection
filename = upload_file.filename or "uploaded"
suffix = os.path.splitext(filename)[1] or ""
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
# Stream upload file to temporary file
file_size = 0
with open(tmp_path, 'wb') as tmp_file:

View file

@ -1 +1,8 @@
"""OpenRAG Terminal User Interface package."""
from importlib.metadata import version
try:
__version__ = version("openrag")
except Exception:
__version__ = "unknown"

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
# context: .
# dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME=OpenRAG
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../../../docker-compose-cpu.yml

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
gpus: all
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME="OpenRAG"
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../../../docker-compose.yml

View file

@ -0,0 +1 @@
../../../../documents/2506.08231v1.pdf

View file

@ -0,0 +1 @@
../../../../documents/ai-human-resources.pdf

View file

@ -0,0 +1 @@
../../../../documents/warmup_ocr.pdf

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm_text.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm_text.json

View file

@ -0,0 +1 @@
../../../../flows/ingestion_flow.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_agent.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_ingest_docling.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_nudges.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_url_mcp.json

View file

@ -2,6 +2,7 @@
import sys
from pathlib import Path
from typing import Iterable, Optional
from textual.app import App, ComposeResult
from utils.logging_config import get_logger
try:
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
return True, "Runtime requirements satisfied"
def copy_sample_documents():
def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
"""Copy packaged assets into destination and optionally overwrite existing files.
When ``force`` is True, files are refreshed if the packaged bytes differ.
"""
destination.mkdir(parents=True, exist_ok=True)
for resource in resource_tree.iterdir():
target_path = destination / resource.name
if resource.is_dir():
_copy_assets(resource, target_path, allowed_suffixes, force=force)
continue
if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
continue
resource_bytes = resource.read_bytes()
if target_path.exists():
if not force:
continue
try:
if target_path.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
target_path.write_bytes(resource_bytes)
logger.info(f"Copied bundled asset: {target_path}")
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("documents")
# Check if documents directory already exists and has files
if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
return # Documents already exist, don't overwrite
try:
# Get sample documents from package assets
assets_files = files("tui._assets.documents")
# Create documents directory if it doesn't exist
documents_dir.mkdir(exist_ok=True)
# Copy each sample document
for resource in assets_files.iterdir():
if resource.is_file() and resource.name.endswith('.pdf'):
dest_path = documents_dir / resource.name
if not dest_path.exists():
content = resource.read_bytes()
dest_path.write_bytes(content)
logger.info(f"Copied sample document: {resource.name}")
_copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample documents: {e}")
# This is not a critical error - the app can work without sample documents
def copy_sample_flows(*, force: bool = False) -> None:
"""Copy sample flows from package to current directory if they don't exist."""
flows_dir = Path("flows")
try:
assets_files = files("tui._assets.flows")
_copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample flows: {e}")
# The app can proceed without bundled flows
def copy_compose_files(*, force: bool = False) -> None:
"""Copy docker-compose templates into the workspace if they are missing."""
try:
assets_root = files("tui._assets")
except Exception as e:
logger.debug(f"Could not access compose assets: {e}")
return
for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
destination = Path(filename)
if destination.exists() and not force:
continue
try:
resource = assets_root.joinpath(filename)
if not resource.is_file():
logger.debug(f"Compose template not found in assets: {filename}")
continue
resource_bytes = resource.read_bytes()
if destination.exists():
try:
if destination.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
destination.write_bytes(resource_bytes)
logger.info(f"Copied docker-compose template: {filename}")
except Exception as error:
logger.debug(f"Could not copy compose file {filename}: {error}")
def run_tui():
"""Run the OpenRAG TUI application."""
app = None
try:
# Copy sample documents on first run
copy_sample_documents()
# Keep bundled assets aligned with the packaged versions
copy_sample_documents(force=True)
copy_sample_flows(force=True)
copy_compose_files(force=True)
app = OpenRAGTUI()
app.run()

View file

@ -10,6 +10,7 @@ from rich.text import Text
from rich.align import Align
from dotenv import load_dotenv
from .. import __version__
from ..managers.container_manager import ContainerManager, ServiceStatus
from ..managers.env_manager import EnvManager
from ..managers.docling_manager import DoclingManager
@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
"""
welcome_text.append(ascii_art, style="bold white")
welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
# Check if all services are running
all_services_running = self.services_running and self.docling_running

View file

@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
import logging
import re
import shutil
import socket
import subprocess
log = logger or logging.getLogger(__name__)
def can_bind_to_address(ip_addr: str) -> bool:
"""Test if we can bind to the given IP address."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
return True
except (OSError, socket.error) as e:
log.debug("Cannot bind to %s: %s", ip_addr, e)
return False
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
"Container-reachable host IP candidates: %s",
", ".join(ordered_candidates),
)
else:
log.info("Container-reachable host IP: %s", ordered_candidates[0])
return ordered_candidates[0]
# Try each candidate and return the first one we can bind to
for ip_addr in ordered_candidates:
if can_bind_to_address(ip_addr):
if len(ordered_candidates) > 1:
log.info("Selected bindable host IP: %s", ip_addr)
else:
log.info("Container-reachable host IP: %s", ip_addr)
return ip_addr
log.debug("Skipping %s (cannot bind)", ip_addr)
# None of the candidates were bindable, fall back to 127.0.0.1
log.warning(
"None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
", ".join(ordered_candidates),
)
return "127.0.0.1"
log.warning(
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."

1
tests/__init__.py Normal file
View file

@ -0,0 +1 @@
# Test package

85
tests/conftest.py Normal file
View file

@ -0,0 +1,85 @@
import asyncio
import os
import tempfile
from pathlib import Path
import pytest
import pytest_asyncio
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Force no-auth mode for testing by setting OAuth credentials to empty strings
# This ensures anonymous JWT tokens are created automatically
os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
from src.config.settings import clients
from src.session_manager import SessionManager
from src.main import generate_jwt_keys
@pytest.fixture(scope="session")
def event_loop():
"""Create an instance of the default event loop for the test session."""
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
@pytest_asyncio.fixture
async def opensearch_client():
"""OpenSearch client for testing - requires running OpenSearch."""
await clients.initialize()
yield clients.opensearch
# Cleanup test indices after tests
try:
await clients.opensearch.indices.delete(index="test_documents")
except Exception:
pass
@pytest.fixture
def session_manager():
"""Session manager for testing."""
# Generate RSA keys before creating SessionManager
generate_jwt_keys()
sm = SessionManager("test-secret-key")
print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
return sm
@pytest.fixture
def test_documents_dir():
"""Create a temporary directory with test documents."""
with tempfile.TemporaryDirectory() as temp_dir:
test_dir = Path(temp_dir)
# Create some test files in supported formats
(test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
(test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
(test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
# Create subdirectory with files
sub_dir = test_dir / "subdir"
sub_dir.mkdir()
(sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
yield test_dir
@pytest.fixture
def test_single_file():
"""Create a single test file."""
with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
temp_path = f.name
yield temp_path
# Cleanup
try:
os.unlink(temp_path)
except FileNotFoundError:
pass

View file

@ -0,0 +1 @@
# Integration tests package

View file

@ -0,0 +1,296 @@
import asyncio
import os
from pathlib import Path
import httpx
import pytest
async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
"""Poll existing endpoints until the app and OpenSearch are ready.
Strategy:
- GET /auth/me should return 200 immediately (confirms app is up).
- POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
"""
# First test OpenSearch JWT directly
from src.session_manager import SessionManager, AnonymousUser
import os
import hashlib
import jwt as jwt_lib
sm = SessionManager("test")
test_token = sm.create_jwt_token(AnonymousUser())
token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
with open(sm.public_key_path, 'rb') as f:
pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
print(f"[DEBUG] Public key hash: {pub_key_hash}")
# Decode token to see claims
decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
# Test OpenSearch JWT auth directly
opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
async with httpx.AsyncClient(verify=False) as os_client:
r_os = await os_client.post(
f"{opensearch_url}/documents/_search",
headers={"Authorization": f"Bearer {test_token}"},
json={"query": {"match_all": {}}, "size": 0}
)
print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
if r_os.status_code == 401:
print(f"[DEBUG] ❌ OpenSearch rejected JWT! OIDC config not working.")
else:
print(f"[DEBUG] ✓ OpenSearch accepted JWT!")
deadline = asyncio.get_event_loop().time() + timeout_s
last_err = None
while asyncio.get_event_loop().time() < deadline:
try:
r1 = await client.get("/auth/me")
print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
if r1.status_code in (401, 403):
raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
if r1.status_code != 200:
await asyncio.sleep(0.5)
continue
# match_all readiness probe; no embeddings
r2 = await client.post("/search", json={"query": "*", "limit": 0})
print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
if r2.status_code in (401, 403):
print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
if r2.status_code == 200:
print("[DEBUG] Service ready!")
return
last_err = r2.text
except AssertionError:
raise
except Exception as e:
last_err = str(e)
print(f"[DEBUG] Exception during readiness check: {e}")
await asyncio.sleep(0.5)
raise AssertionError(f"Service not ready in time: {last_err}")
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
"""Boot the ASGI app and exercise /upload and /search endpoints."""
# Ensure we route uploads to traditional processor and disable startup ingest
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
os.environ["DISABLE_STARTUP_INGEST"] = "true"
# Force no-auth mode so endpoints bypass authentication
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
# Import after env vars to ensure settings pick them up. Clear cached modules
import sys
# Clear cached modules so settings pick up env and router sees new flag
for mod in [
"src.api.router",
"api.router", # Also clear the non-src path
"src.api.connector_router",
"api.connector_router",
"src.config.settings",
"config.settings",
"src.auth_middleware",
"auth_middleware",
"src.main",
"api", # Clear the api package itself
"src.api",
"services", # Clear services that import clients
"src.services",
"services.search_service",
"src.services.search_service",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
import src.api.router as upload_router
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
# Wait for deletion to complete
await asyncio.sleep(1)
except Exception:
pass
app = await create_app()
# Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
# Verify index is truly empty after startup
try:
count_response = await clients.opensearch.count(index=INDEX_NAME)
doc_count = count_response.get('count', 0)
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
except Exception as e:
# If count fails, the index might not exist yet, which is fine
pass
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
# Wait for app + OpenSearch readiness using existing endpoints
await wait_for_service_ready(client)
# Create a temporary markdown file to upload
file_path = tmp_path / "endpoint_test_doc.md"
file_text = (
"# Single Test Document\n\n"
"This is a test document about OpenRAG testing framework. "
"The content should be indexed and searchable in OpenSearch after processing."
)
file_path.write_text(file_text)
# POST via router (multipart)
files = {
"file": (
file_path.name,
file_path.read_bytes(),
"text/markdown",
)
}
upload_resp = await client.post("/upload", files=files)
body = upload_resp.json()
assert upload_resp.status_code == 201, upload_resp.text
assert body.get("status") in {"indexed", "unchanged"}
assert isinstance(body.get("id"), str)
# Poll search for the specific content until it's indexed
async def _wait_for_indexed(timeout_s: float = 30.0):
deadline = asyncio.get_event_loop().time() + timeout_s
while asyncio.get_event_loop().time() < deadline:
resp = await client.post(
"/search",
json={"query": "OpenRAG testing framework", "limit": 5},
)
if resp.status_code == 200 and resp.json().get("results"):
return resp
await asyncio.sleep(0.5)
return resp
search_resp = await _wait_for_indexed()
# POST /search
assert search_resp.status_code == 200, search_resp.text
search_body = search_resp.json()
# Basic shape and at least one hit
assert isinstance(search_body.get("results"), list)
assert len(search_body["results"]) >= 0
# When hits exist, confirm our phrase is present in top result content
if search_body["results"]:
top = search_body["results"][0]
assert "text" in top or "content" in top
text = top.get("text") or top.get("content")
assert isinstance(text, str)
assert "testing" in text.lower()
finally:
# Explicitly close global clients to avoid aiohttp warnings
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
"""Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
os.environ["DISABLE_STARTUP_INGEST"] = "true"
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
import sys
for mod in [
"src.api.router",
"api.router", # Also clear the non-src path
"src.api.connector_router",
"api.connector_router",
"src.config.settings",
"config.settings",
"src.auth_middleware",
"auth_middleware",
"src.main",
"api", # Clear the api package itself
"src.api",
"services", # Clear services that import clients
"src.services",
"services.search_service",
"src.services.search_service",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
import src.api.router as upload_router
from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
# Wait for deletion to complete
await asyncio.sleep(1)
except Exception:
pass
app = await create_app()
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
# Verify index is truly empty after startup
try:
count_response = await clients.opensearch.count(index=INDEX_NAME)
doc_count = count_response.get('count', 0)
assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
except Exception as e:
# If count fails, the index might not exist yet, which is fine
pass
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
await wait_for_service_ready(client)
file_path = tmp_path / "router_test_doc.md"
file_path.write_text("# Router Test\n\nThis file validates the upload router.")
files = {
"file": (
file_path.name,
file_path.read_bytes(),
"text/markdown",
)
}
resp = await client.post("/router/upload_ingest", files=files)
data = resp.json()
print(f"data: {data}")
if disable_langflow_ingest:
assert resp.status_code == 201 or resp.status_code == 202, resp.text
assert data.get("status") in {"indexed", "unchanged"}
assert isinstance(data.get("id"), str)
else:
assert resp.status_code == 201 or resp.status_code == 202, resp.text
assert isinstance(data.get("task_id"), str)
assert data.get("file_count") == 1
finally:
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass

View file

@ -0,0 +1,118 @@
import asyncio
import os
from pathlib import Path
import httpx
import pytest
async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
deadline = asyncio.get_event_loop().time() + timeout_s
last_err = None
while asyncio.get_event_loop().time() < deadline:
try:
r1 = await client.get("/auth/me")
if r1.status_code != 200:
await asyncio.sleep(0.5)
continue
r2 = await client.post("/search", json={"query": "*", "limit": 0})
if r2.status_code == 200:
return
last_err = r2.text
except Exception as e:
last_err = str(e)
await asyncio.sleep(0.5)
raise AssertionError(f"Service not ready in time: {last_err}")
def count_files_in_documents() -> int:
base_dir = Path(os.getcwd()) / "documents"
if not base_dir.is_dir():
return 0
return sum(1 for _ in base_dir.rglob("*") if _.is_file())
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
# Ensure startup ingest runs and choose pipeline per param
os.environ["DISABLE_STARTUP_INGEST"] = "false"
os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
"true" if disable_langflow_ingest else "false"
)
# Force no-auth mode for simpler endpoint access
os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
# Reload settings to pick up env for this test run
import sys
for mod in [
"src.api.router",
"src.api.connector_router",
"src.config.settings",
"src.auth_middleware",
"src.main",
]:
sys.modules.pop(mod, None)
from src.main import create_app, startup_tasks
from src.config.settings import clients, INDEX_NAME
# Ensure a clean index before startup
await clients.initialize()
try:
await clients.opensearch.indices.delete(index=INDEX_NAME)
except Exception:
pass
app = await create_app()
# Trigger startup tasks explicitly
await startup_tasks(app.state.services)
# Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
from src.main import _ensure_opensearch_index
await _ensure_opensearch_index()
transport = httpx.ASGITransport(app=app)
try:
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
await wait_for_ready(client)
expected_files = count_files_in_documents()
# Poll /tasks until we see at least one startup ingest task
async def _wait_for_task(timeout_s: float = 60.0):
deadline = asyncio.get_event_loop().time() + timeout_s
last = None
while asyncio.get_event_loop().time() < deadline:
resp = await client.get("/tasks")
if resp.status_code == 200:
data = resp.json()
last = data
tasks = data.get("tasks") if isinstance(data, dict) else None
if isinstance(tasks, list) and len(tasks) > 0:
return tasks
await asyncio.sleep(0.5)
return last.get("tasks") if isinstance(last, dict) else last
tasks = await _wait_for_task()
if expected_files == 0:
return # Nothing to do
if not (isinstance(tasks, list) and len(tasks) > 0):
# Fallback: verify that documents were indexed as a sign of startup ingest
sr = await client.post("/search", json={"query": "*", "limit": 1})
assert sr.status_code == 200, sr.text
total = sr.json().get("total")
assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
return
newest = tasks[0]
assert "task_id" in newest
assert newest.get("total_files") == expected_files
finally:
# Explicitly close global clients to avoid aiohttp warnings
from src.config.settings import clients
try:
await clients.close()
except Exception:
pass

162
uv.lock generated
View file

@ -2,10 +2,10 @@ version = 1
revision = 2
requires-python = ">=3.13"
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
[[package]]
@ -291,8 +291,8 @@ name = "click"
version = "8.2.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -312,6 +312,67 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "coverage"
version = "7.10.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
{ url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
{ url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
{ url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
{ url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
{ url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
{ url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
{ url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
{ url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
{ url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
{ url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
{ url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
{ url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
{ url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
{ url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
{ url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
{ url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
{ url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
{ url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
{ url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
{ url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
{ url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
{ url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
{ url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
{ url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
{ url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
{ url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
{ url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
{ url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
{ url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
{ url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
{ url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
{ url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
{ url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
{ url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
{ url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
{ url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
{ url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
{ url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
{ url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
{ url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
{ url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
{ url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
{ url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
{ url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
{ url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
{ url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
{ url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
{ url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
{ url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
{ url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
{ url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
{ url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
]
[[package]]
name = "cramjam"
version = "2.11.0"
@ -454,8 +515,8 @@ name = "dill"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
@ -619,8 +680,8 @@ name = "docling-mcp"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -943,8 +1004,8 @@ name = "fsspec"
version = "2025.5.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
@ -1264,8 +1325,8 @@ name = "huggingface-hub"
version = "0.33.2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -1339,6 +1400,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
]
[[package]]
name = "jinja2"
version = "3.1.6"
@ -1960,8 +2030,8 @@ name = "multiprocess"
version = "0.70.18"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"platform_machine == 'x86_64' and sys_platform == 'linux'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
]
dependencies = [
@ -2282,7 +2352,7 @@ wheels = [
[[package]]
name = "openrag"
version = "0.1.14.dev3"
version = "0.1.19"
source = { editable = "." }
dependencies = [
{ name = "agentd" },
@ -2312,6 +2382,14 @@ dependencies = [
{ name = "uvicorn" },
]
[package.dev-dependencies]
dev = [
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "pytest-mock" },
]
[package.metadata]
requires-dist = [
{ name = "agentd", specifier = ">=0.2.2" },
@ -2341,6 +2419,14 @@ requires-dist = [
{ name = "uvicorn", specifier = ">=0.35.0" },
]
[package.metadata.requires-dev]
dev = [
{ name = "pytest", specifier = ">=8" },
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
{ name = "pytest-cov", specifier = ">=4.0.0" },
{ name = "pytest-mock", specifier = ">=3.12.0" },
]
[[package]]
name = "opensearch-py"
version = "3.0.0"
@ -2836,6 +2922,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
]
[[package]]
name = "pytest"
version = "8.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
]
[[package]]
name = "pytest-asyncio"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
]
[[package]]
name = "pytest-cov"
version = "7.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "coverage" },
{ name = "pluggy" },
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
]
[[package]]
name = "pytest-mock"
version = "3.15.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
]
[[package]]
name = "python-bidi"
version = "0.6.6"
@ -3622,9 +3762,9 @@ name = "torch"
version = "2.8.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
dependencies = [
{ name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
@ -3669,9 +3809,9 @@ name = "torchvision"
version = "0.23.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"sys_platform == 'darwin'",
"platform_machine == 'aarch64' and sys_platform == 'linux'",
"(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
"sys_platform == 'darwin'",
]
dependencies = [
{ name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },