This commit is contained in:
phact 2025-10-08 11:16:35 -04:00
commit af137b39c4
30 changed files with 340 additions and 436 deletions

View file

@ -1,59 +0,0 @@
name: Build Langflow Responses Multi-Arch
on:
workflow_dispatch:
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
arch: amd64
runs-on: ubuntu-latest
- platform: linux/arm64
arch: arm64
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
runs-on: ${{ matrix.runs-on }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push langflow (${{ matrix.arch }})
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.langflow
platforms: ${{ matrix.platform }}
push: true
tags: phact/langflow:responses-${{ matrix.arch }}
cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
manifest:
needs: build
runs-on: ubuntu-latest
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Create and push multi-arch manifest
run: |
docker buildx imagetools create -t phact/langflow:responses \
phact/langflow:responses-amd64 \
phact/langflow:responses-arm64

View file

@ -1,16 +1,95 @@
name: Build Multi-Architecture Docker Images
name: Release + Docker Images (multi-arch)
on:
push:
branches:
- main
paths:
- 'pyproject.toml'
workflow_dispatch:
inputs:
update_latest:
description: 'Update latest tags (production release)'
required: false
default: false
type: boolean
jobs:
build-python-packages:
runs-on: ubuntu-latest
outputs:
skip_release: ${{ steps.version.outputs.skip_release }}
version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
# Check if tag already exists
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping release"
echo "skip_release=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "skip_release=false" >> $GITHUB_OUTPUT
# Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "is_prerelease=false" >> $GITHUB_OUTPUT
echo "Release type: Production"
else
echo "is_prerelease=true" >> $GITHUB_OUTPUT
echo "Release type: Prerelease"
fi
- name: Build wheel and source distribution
if: steps.version.outputs.skip_release != 'true'
run: |
uv build
- name: List built artifacts
if: steps.version.outputs.skip_release != 'true'
run: |
ls -la dist/
echo "Built artifacts:"
for file in dist/*; do
echo " - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
done
- name: Upload build artifacts
if: steps.version.outputs.skip_release != 'true'
uses: actions/upload-artifact@v4
with:
name: python-packages
path: dist/
retention-days: 30
- name: Create Release
if: steps.version.outputs.skip_release != 'true'
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.version.outputs.version }}
name: Release ${{ steps.version.outputs.version }}
draft: false
prerelease: ${{ steps.version.outputs.is_prerelease }}
generate_release_notes: true
files: |
dist/*.whl
dist/*.tar.gz
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build:
needs: build-python-packages
if: needs.build-python-packages.outputs.skip_release != 'true'
strategy:
fail-fast: false
matrix:
@ -106,9 +185,9 @@ jobs:
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
manifest:
needs: build
needs: [build, build-python-packages]
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
steps:
- name: Checkout
uses: actions/checkout@v4
@ -146,8 +225,8 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
# Only update latest tags if version is numeric AND checkbox is checked
if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
# Only update latest tags if version is numeric
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "Updating latest tags for production release: $VERSION"
docker buildx imagetools create -t phact/openrag-backend:latest \
phact/openrag-backend:$VERSION-amd64 \
@ -165,5 +244,5 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64
else
echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
echo "Skipping latest tags - version: $VERSION (not numeric)"
fi

2
.gitignore vendored
View file

@ -18,6 +18,8 @@ wheels/
1001*.pdf
*.json
!flows/*.json
!src/tui/_assets/flows/*.json
!src/tui/_assets/flows/components/*.json
.DS_Store
config/

1
MANIFEST.in Normal file
View file

@ -0,0 +1 @@
recursive-include src/tui/_assets *

View file

@ -34,11 +34,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
</details>
## Use the OpenRAG OpenSearch Agent flow
## Use the OpenRAG OpenSearch Agent flow {#flow}
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
This flow contains seven components connected together to chat with your data:
This flow contains eight components connected together to chat with your data:
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@ -49,6 +49,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
<PartialModifyFlows />

View file

@ -50,4 +50,31 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
## Knowledge ingestion flows
[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
This flow contains ten components connected together to process and store documents in your knowledge base.
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
<PartialModifyFlows />
### OpenSearch URL Ingestion flow {#url-flow}
An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).

View file

@ -18,6 +18,7 @@ OpenSearch provides powerful hybrid search capabilities with enterprise-grade se
## Ingest knowledge
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
### Direct file ingestion
@ -101,10 +102,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
<PartialModifyFlows />
### Knowledge ingestion settings
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
## Create knowledge filters
OpenRAG includes a knowledge filter system for organizing and managing document collections.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 951 KiB

After

Width:  |  Height:  |  Size: 1,004 KiB

View file

@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
{queryOverride && (
<Button
variant="ghost"
className="h-full !px-1.5 !py-0"
className="h-full rounded-sm !px-1.5 !py-0"
type="button"
onClick={() => {
setSearchQueryInput("");
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
<Button
variant="ghost"
className={cn(
"h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
"h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
searchQueryInput && "block"
)}
type="submit"

View file

@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import {
type ChunkResult,
type File,
@ -35,9 +30,9 @@ function ChunksPageContent() {
const { parsedFilterData, queryOverride } = useKnowledgeFilter();
const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[]
>([]);
// const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
// ChunkResult[]
// >([]);
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null
@ -126,26 +121,25 @@ function ChunksPageContent() {
return (
<div className="flex flex-col h-full">
<div className="flex flex-col h-full">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
{/* Header */}
<div className="flex flex-col mb-6">
<div className="flex items-center gap-3 mb-6">
<Button
variant="ghost"
onClick={handleBack}
size="sm"
className="max-w-8 max-h-8 -m-2"
>
<ArrowLeft size={24} />
</Button>
<h1 className="text-lg font-semibold">
{/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")}
</h1>
</div>
<div className="flex flex-1">
<KnowledgeSearchInput />
{/* <div className="flex items-center pl-4 gap-2">
<Checkbox
id="selectAllChunks"
checked={selectAll}
@ -160,11 +154,12 @@ function ChunksPageContent() {
Select all
</Label>
</div> */}
</div>
</div>
</div>
{/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-auto pr-6">
<div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
{/* Content Area */}
<div className="row-start-2 lg:row-start-1">
{isFetching ? (
<div className="flex items-center justify-center h-64">
<div className="text-center">
@ -185,7 +180,7 @@ function ChunksPageContent() {
</div>
) : (
<div className="space-y-4 pb-6">
{chunksFilteredByQuery.map((chunk, index) => (
{chunks.map((chunk, index) => (
<div
key={chunk.filename + index}
className="bg-muted rounded-lg p-4 border border-border/50"
@ -242,31 +237,30 @@ function ChunksPageContent() {
</div>
)}
</div>
</div>
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="w-[320px] py-20 px-2">
<div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4">
Technical details
</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && (
<div className="min-w-[200px]">
<div className="mb-8">
<h2 className="text-xl font-semibold mb-4">Technical details</h2>
<dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Total chunks
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">
Avg length
</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
@ -276,54 +270,55 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd>
</div> */}
</dl>
</div>
<div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</dl>
</div>
<div className="mb-4">
<h2 className="text-xl font-semibold mt-2 mb-3">
Original document
</h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename}
</dd>
</div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd>
</div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size
? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"}
</dd>
</div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A
</dd>
</div> */}
</dl>
</dl>
</div>
</div>
</div>
)}
)}
</div>
</div>
);
}

View file

@ -1,6 +1,10 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "openrag"
version = "0.1.15"
version = "0.1.18"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
# context: .
# dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME=OpenRAG
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../docker-compose-cpu.yml

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
gpus: all
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME="OpenRAG"
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../docker-compose.yml

View file

@ -0,0 +1 @@
../../../../documents/2506.08231v1.pdf

View file

@ -0,0 +1 @@
../../../../documents/ai-human-resources.pdf

View file

@ -0,0 +1 @@
../../../../documents/warmup_ocr.pdf

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm_text.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm_text.json

View file

@ -0,0 +1 @@
../../../../flows/ingestion_flow.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_agent.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_ingest_docling.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_nudges.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_url_mcp.json

View file

@ -2,6 +2,7 @@
import sys
from pathlib import Path
from typing import Iterable, Optional
from textual.app import App, ComposeResult
from utils.logging_config import get_logger
try:
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
return True, "Runtime requirements satisfied"
def copy_sample_documents():
def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
"""Copy packaged assets into destination and optionally overwrite existing files.
When ``force`` is True, files are refreshed if the packaged bytes differ.
"""
destination.mkdir(parents=True, exist_ok=True)
for resource in resource_tree.iterdir():
target_path = destination / resource.name
if resource.is_dir():
_copy_assets(resource, target_path, allowed_suffixes, force=force)
continue
if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
continue
resource_bytes = resource.read_bytes()
if target_path.exists():
if not force:
continue
try:
if target_path.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
target_path.write_bytes(resource_bytes)
logger.info(f"Copied bundled asset: {target_path}")
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("documents")
# Check if documents directory already exists and has files
if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
return # Documents already exist, don't overwrite
try:
# Get sample documents from package assets
assets_files = files("tui._assets.documents")
# Create documents directory if it doesn't exist
documents_dir.mkdir(exist_ok=True)
# Copy each sample document
for resource in assets_files.iterdir():
if resource.is_file() and resource.name.endswith('.pdf'):
dest_path = documents_dir / resource.name
if not dest_path.exists():
content = resource.read_bytes()
dest_path.write_bytes(content)
logger.info(f"Copied sample document: {resource.name}")
_copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample documents: {e}")
# This is not a critical error - the app can work without sample documents
def copy_sample_flows(*, force: bool = False) -> None:
"""Copy sample flows from package to current directory if they don't exist."""
flows_dir = Path("flows")
try:
assets_files = files("tui._assets.flows")
_copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample flows: {e}")
# The app can proceed without bundled flows
def copy_compose_files(*, force: bool = False) -> None:
"""Copy docker-compose templates into the workspace if they are missing."""
try:
assets_root = files("tui._assets")
except Exception as e:
logger.debug(f"Could not access compose assets: {e}")
return
for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
destination = Path(filename)
if destination.exists() and not force:
continue
try:
resource = assets_root.joinpath(filename)
if not resource.is_file():
logger.debug(f"Compose template not found in assets: {filename}")
continue
resource_bytes = resource.read_bytes()
if destination.exists():
try:
if destination.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
destination.write_bytes(resource_bytes)
logger.info(f"Copied docker-compose template: {filename}")
except Exception as error:
logger.debug(f"Could not copy compose file {filename}: {error}")
def run_tui():
"""Run the OpenRAG TUI application."""
app = None
try:
# Copy sample documents on first run
copy_sample_documents()
# Keep bundled assets aligned with the packaged versions
copy_sample_documents(force=True)
copy_sample_flows(force=True)
copy_compose_files(force=True)
app = OpenRAGTUI()
app.run()

View file

@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
import logging
import re
import shutil
import socket
import subprocess
log = logger or logging.getLogger(__name__)
def can_bind_to_address(ip_addr: str) -> bool:
"""Test if we can bind to the given IP address."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
return True
except (OSError, socket.error) as e:
log.debug("Cannot bind to %s: %s", ip_addr, e)
return False
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
"Container-reachable host IP candidates: %s",
", ".join(ordered_candidates),
)
else:
log.info("Container-reachable host IP: %s", ordered_candidates[0])
return ordered_candidates[0]
# Try each candidate and return the first one we can bind to
for ip_addr in ordered_candidates:
if can_bind_to_address(ip_addr):
if len(ordered_candidates) > 1:
log.info("Selected bindable host IP: %s", ip_addr)
else:
log.info("Container-reachable host IP: %s", ip_addr)
return ip_addr
log.debug("Skipping %s (cannot bind)", ip_addr)
# None of the candidates were bindable, fall back to 127.0.0.1
log.warning(
"None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
", ".join(ordered_candidates),
)
return "127.0.0.1"
log.warning(
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."

2
uv.lock generated
View file

@ -2352,7 +2352,7 @@ wheels = [
[[package]]
name = "openrag"
version = "0.1.15"
version = "0.1.18"
source = { editable = "." }
dependencies = [
{ name = "agentd" },