This commit is contained in:
phact 2025-10-08 11:16:35 -04:00
commit af137b39c4
30 changed files with 340 additions and 436 deletions

View file

@ -1,59 +0,0 @@
name: Build Langflow Responses Multi-Arch
on:
workflow_dispatch:
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
arch: amd64
runs-on: ubuntu-latest
- platform: linux/arm64
arch: arm64
runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
runs-on: ${{ matrix.runs-on }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push langflow (${{ matrix.arch }})
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.langflow
platforms: ${{ matrix.platform }}
push: true
tags: phact/langflow:responses-${{ matrix.arch }}
cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
manifest:
needs: build
runs-on: ubuntu-latest
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Create and push multi-arch manifest
run: |
docker buildx imagetools create -t phact/langflow:responses \
phact/langflow:responses-amd64 \
phact/langflow:responses-arm64

View file

@ -1,16 +1,95 @@
name: Build Multi-Architecture Docker Images name: Release + Docker Images (multi-arch)
on: on:
push:
branches:
- main
paths:
- 'pyproject.toml'
workflow_dispatch: workflow_dispatch:
inputs:
update_latest:
description: 'Update latest tags (production release)'
required: false
default: false
type: boolean
jobs: jobs:
build-python-packages:
runs-on: ubuntu-latest
outputs:
skip_release: ${{ steps.version.outputs.skip_release }}
version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
# Check if tag already exists
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping release"
echo "skip_release=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "skip_release=false" >> $GITHUB_OUTPUT
# Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "is_prerelease=false" >> $GITHUB_OUTPUT
echo "Release type: Production"
else
echo "is_prerelease=true" >> $GITHUB_OUTPUT
echo "Release type: Prerelease"
fi
- name: Build wheel and source distribution
if: steps.version.outputs.skip_release != 'true'
run: |
uv build
- name: List built artifacts
if: steps.version.outputs.skip_release != 'true'
run: |
ls -la dist/
echo "Built artifacts:"
for file in dist/*; do
echo " - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
done
- name: Upload build artifacts
if: steps.version.outputs.skip_release != 'true'
uses: actions/upload-artifact@v4
with:
name: python-packages
path: dist/
retention-days: 30
- name: Create Release
if: steps.version.outputs.skip_release != 'true'
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.version.outputs.version }}
name: Release ${{ steps.version.outputs.version }}
draft: false
prerelease: ${{ steps.version.outputs.is_prerelease }}
generate_release_notes: true
files: |
dist/*.whl
dist/*.tar.gz
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build: build:
needs: build-python-packages
if: needs.build-python-packages.outputs.skip_release != 'true'
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
@ -106,9 +185,9 @@ jobs:
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }} cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
manifest: manifest:
needs: build needs: [build, build-python-packages]
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -146,8 +225,8 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \ phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64 phact/openrag-opensearch:$VERSION-arm64
# Only update latest tags if version is numeric AND checkbox is checked # Only update latest tags if version is numeric
if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
echo "Updating latest tags for production release: $VERSION" echo "Updating latest tags for production release: $VERSION"
docker buildx imagetools create -t phact/openrag-backend:latest \ docker buildx imagetools create -t phact/openrag-backend:latest \
phact/openrag-backend:$VERSION-amd64 \ phact/openrag-backend:$VERSION-amd64 \
@ -165,5 +244,5 @@ jobs:
phact/openrag-opensearch:$VERSION-amd64 \ phact/openrag-opensearch:$VERSION-amd64 \
phact/openrag-opensearch:$VERSION-arm64 phact/openrag-opensearch:$VERSION-arm64
else else
echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}" echo "Skipping latest tags - version: $VERSION (not numeric)"
fi fi

2
.gitignore vendored
View file

@ -18,6 +18,8 @@ wheels/
1001*.pdf 1001*.pdf
*.json *.json
!flows/*.json !flows/*.json
!src/tui/_assets/flows/*.json
!src/tui/_assets/flows/components/*.json
.DS_Store .DS_Store
config/ config/

1
MANIFEST.in Normal file
View file

@ -0,0 +1 @@
recursive-include src/tui/_assets *

View file

@ -34,11 +34,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
</details> </details>
## Use the OpenRAG OpenSearch Agent flow ## Use the OpenRAG OpenSearch Agent flow {#flow}
If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow. If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**. To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
This flow contains seven components connected together to chat with your data: This flow contains eight components connected together to chat with your data:
* The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response. * The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
The **Agent** behaves according to the prompt in the **Agent Instructions** field. The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@ -49,6 +49,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
* The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`. * The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`.
This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through. This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
* The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application. * The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
<PartialModifyFlows /> <PartialModifyFlows />

View file

@ -50,4 +50,31 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API. The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58). For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
## Knowledge ingestion flows
[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
This flow contains ten components connected together to process and store documents in your knowledge base.
* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
<PartialModifyFlows />
### OpenSearch URL Ingestion flow {#url-flow}
An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).

View file

@ -18,6 +18,7 @@ OpenSearch provides powerful hybrid search capabilities with enterprise-grade se
## Ingest knowledge ## Ingest knowledge
OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors. OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
### Direct file ingestion ### Direct file ingestion
@ -101,10 +102,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
<PartialModifyFlows /> <PartialModifyFlows />
### Knowledge ingestion settings
To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
## Create knowledge filters ## Create knowledge filters
OpenRAG includes a knowledge filter system for organizing and managing document collections. OpenRAG includes a knowledge filter system for organizing and managing document collections.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 951 KiB

After

Width:  |  Height:  |  Size: 1,004 KiB

View file

@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
{queryOverride && ( {queryOverride && (
<Button <Button
variant="ghost" variant="ghost"
className="h-full !px-1.5 !py-0" className="h-full rounded-sm !px-1.5 !py-0"
type="button" type="button"
onClick={() => { onClick={() => {
setSearchQueryInput(""); setSearchQueryInput("");
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
<Button <Button
variant="ghost" variant="ghost"
className={cn( className={cn(
"h-full !px-1.5 !py-0 hidden group-focus-within/input:block", "h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
searchQueryInput && "block" searchQueryInput && "block"
)} )}
type="submit" type="submit"

View file

@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react"; import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label"; // import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox"; // import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import { import {
type ChunkResult, type ChunkResult,
type File, type File,
@ -35,9 +30,9 @@ function ChunksPageContent() {
const { parsedFilterData, queryOverride } = useKnowledgeFilter(); const { parsedFilterData, queryOverride } = useKnowledgeFilter();
const filename = searchParams.get("filename"); const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]); const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< // const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[] // ChunkResult[]
>([]); // >([]);
// const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set()); // const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null number | null
@ -126,26 +121,25 @@ function ChunksPageContent() {
return ( return (
<div className="flex flex-col h-full"> <div className="flex flex-col h-full">
<div className="flex flex-col h-full"> {/* Header */}
{/* Header */} <div className="flex flex-col mb-6">
<div className="flex flex-col mb-6"> <div className="flex items-center gap-3 mb-6">
<div className="flex items-center gap-3 mb-6"> <Button
<Button variant="ghost"
variant="ghost" onClick={handleBack}
onClick={handleBack} size="sm"
size="sm" className="max-w-8 max-h-8 -m-2"
className="max-w-8 max-h-8 -m-2" >
> <ArrowLeft size={24} />
<ArrowLeft size={24} /> </Button>
</Button> <h1 className="text-lg font-semibold">
<h1 className="text-lg font-semibold"> {/* Removes file extension from filename */}
{/* Removes file extension from filename */} {filename.replace(/\.[^/.]+$/, "")}
{filename.replace(/\.[^/.]+$/, "")} </h1>
</h1> </div>
</div> <div className="flex flex-1">
<div className="flex flex-1"> <KnowledgeSearchInput />
<KnowledgeSearchInput /> {/* <div className="flex items-center pl-4 gap-2">
{/* <div className="flex items-center pl-4 gap-2">
<Checkbox <Checkbox
id="selectAllChunks" id="selectAllChunks"
checked={selectAll} checked={selectAll}
@ -160,11 +154,12 @@ function ChunksPageContent() {
Select all Select all
</Label> </Label>
</div> */} </div> */}
</div>
</div> </div>
</div>
{/* Content Area - matches knowledge page structure */} <div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
<div className="flex-1 overflow-auto pr-6"> {/* Content Area */}
<div className="row-start-2 lg:row-start-1">
{isFetching ? ( {isFetching ? (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
@ -185,7 +180,7 @@ function ChunksPageContent() {
</div> </div>
) : ( ) : (
<div className="space-y-4 pb-6"> <div className="space-y-4 pb-6">
{chunksFilteredByQuery.map((chunk, index) => ( {chunks.map((chunk, index) => (
<div <div
key={chunk.filename + index} key={chunk.filename + index}
className="bg-muted rounded-lg p-4 border border-border/50" className="bg-muted rounded-lg p-4 border border-border/50"
@ -242,31 +237,30 @@ function ChunksPageContent() {
</div> </div>
)} )}
</div> </div>
</div> {/* Right panel - Summary (TODO), Technical details, */}
{/* Right panel - Summary (TODO), Technical details, */} {chunks.length > 0 && (
{chunks.length > 0 && ( <div className="min-w-[200px]">
<div className="w-[320px] py-20 px-2"> <div className="mb-8">
<div className="mb-8"> <h2 className="text-xl font-semibold mb-4">Technical details</h2>
<h2 className="text-xl font-semibold mt-3 mb-4"> <dl>
Technical details <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</h2> <dt className="text-sm/6 text-muted-foreground">
<dl> Total chunks
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> </dt>
<dt className="text-sm/6 text-muted-foreground"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
Total chunks {chunks.length}
</dt> </dd>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> </div>
{chunks.length} <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</dd> <dt className="text-sm/6 text-muted-foreground">
</div> Avg length
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> </dt>
<dt className="text-sm/6 text-muted-foreground">Avg length</dt> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> {averageChunkLength.toFixed(0)} chars
{averageChunkLength.toFixed(0)} chars </dd>
</dd> </div>
</div> {/* TODO: Uncomment after data is available */}
{/* TODO: Uncomment after data is available */} {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt> <dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
@ -276,54 +270,55 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div> </div>
<div className="mb-8"> <div className="mb-4">
<h2 className="text-xl font-semibold mt-2 mb-3"> <h2 className="text-xl font-semibold mt-2 mb-3">
Original document Original document
</h2> </h2>
<dl> <dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt> <dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename} {fileData?.filename}
</dd> </dd>
</div> */} </div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt> <dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt> <dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size {fileData?.size
? `${Math.round(fileData.size / 1024)} KB` ? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"} : "Unknown"}
</dd> </dd>
</div> </div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt> <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> */} </div> */}
{/* TODO: Uncomment after data is available */} {/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt> <dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */} </div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt> <dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div>
</div> </div>
</div> )}
)} </div>
</div> </div>
); );
} }

View file

@ -1,6 +1,10 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project] [project]
name = "openrag" name = "openrag"
version = "0.1.15" version = "0.1.18"
description = "Add your description here" description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
# context: .
# dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME=OpenRAG
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../docker-compose-cpu.yml

View file

@ -1,122 +0,0 @@
services:
opensearch:
image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile
container_name: os
depends_on:
- openrag-backend
environment:
- discovery.type=single-node
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
# Run security setup in background after OpenSearch starts
command: >
bash -c "
# Start OpenSearch in background
/usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
# Wait a bit for OpenSearch to start, then apply security config
sleep 10 && /usr/share/opensearch/setup-security.sh &
# Wait for background processes
wait
"
ports:
- "9200:9200"
- "9600:9600"
dashboards:
image: opensearchproject/opensearch-dashboards:3.0.0
container_name: osdash
depends_on:
- opensearch
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
OPENSEARCH_USERNAME: "admin"
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
ports:
- "5601:5601"
openrag-backend:
image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.backend
container_name: openrag-backend
depends_on:
- langflow
environment:
- OPENSEARCH_HOST=opensearch
- LANGFLOW_URL=http://langflow:7860
- LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
- LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
- LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
- DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
- NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
- OPENSEARCH_PORT=9200
- OPENSEARCH_USERNAME=admin
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- NVIDIA_VISIBLE_DEVICES=all
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
- MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
- MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
- WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./documents:/app/documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:Z
gpus: all
openrag-frontend:
image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
#build:
#context: .
#dockerfile: Dockerfile.frontend
container_name: openrag-frontend
depends_on:
- openrag-backend
environment:
- OPENRAG_BACKEND_HOST=openrag-backend
ports:
- "3000:3000"
langflow:
volumes:
- ./flows:/app/flows:Z
image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
container_name: langflow
ports:
- "7860:7860"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LANGFLOW_LOAD_FLOWS_PATH=/app/flows
- LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
- JWT=None
- OWNER=None
- OWNER_NAME=None
- OWNER_EMAIL=None
- CONNECTOR_TYPE=system
- CONNECTOR_TYPE_URL=url
- OPENRAG-QUERY-FILTER="{}"
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- FILENAME=None
- MIMETYPE=None
- FILESIZE=0
- LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
- LANGFLOW_LOG_LEVEL=DEBUG
- LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
- LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
- LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
- LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
- LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
# - DEFAULT_FOLDER_NAME="OpenRAG"
- HIDE_GETTING_STARTED_PROGRESS=true

View file

@ -0,0 +1 @@
../docker-compose.yml

View file

@ -0,0 +1 @@
../../../../documents/2506.08231v1.pdf

View file

@ -0,0 +1 @@
../../../../documents/ai-human-resources.pdf

View file

@ -0,0 +1 @@
../../../../documents/warmup_ocr.pdf

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/ollama_llm_text.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_embedding.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm.json

View file

@ -0,0 +1 @@
../../../../../flows/components/watsonx_llm_text.json

View file

@ -0,0 +1 @@
../../../../flows/ingestion_flow.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_agent.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_ingest_docling.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_nudges.json

View file

@ -0,0 +1 @@
../../../../flows/openrag_url_mcp.json

View file

@ -2,6 +2,7 @@
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Iterable, Optional
from textual.app import App, ComposeResult from textual.app import App, ComposeResult
from utils.logging_config import get_logger from utils.logging_config import get_logger
try: try:
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
return True, "Runtime requirements satisfied" return True, "Runtime requirements satisfied"
def copy_sample_documents(): def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
"""Copy packaged assets into destination and optionally overwrite existing files.
When ``force`` is True, files are refreshed if the packaged bytes differ.
"""
destination.mkdir(parents=True, exist_ok=True)
for resource in resource_tree.iterdir():
target_path = destination / resource.name
if resource.is_dir():
_copy_assets(resource, target_path, allowed_suffixes, force=force)
continue
if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
continue
resource_bytes = resource.read_bytes()
if target_path.exists():
if not force:
continue
try:
if target_path.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
target_path.write_bytes(resource_bytes)
logger.info(f"Copied bundled asset: {target_path}")
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist.""" """Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("documents") documents_dir = Path("documents")
# Check if documents directory already exists and has files
if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
return # Documents already exist, don't overwrite
try: try:
# Get sample documents from package assets
assets_files = files("tui._assets.documents") assets_files = files("tui._assets.documents")
_copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
# Create documents directory if it doesn't exist
documents_dir.mkdir(exist_ok=True)
# Copy each sample document
for resource in assets_files.iterdir():
if resource.is_file() and resource.name.endswith('.pdf'):
dest_path = documents_dir / resource.name
if not dest_path.exists():
content = resource.read_bytes()
dest_path.write_bytes(content)
logger.info(f"Copied sample document: {resource.name}")
except Exception as e: except Exception as e:
logger.debug(f"Could not copy sample documents: {e}") logger.debug(f"Could not copy sample documents: {e}")
# This is not a critical error - the app can work without sample documents # This is not a critical error - the app can work without sample documents
def copy_sample_flows(*, force: bool = False) -> None:
"""Copy sample flows from package to current directory if they don't exist."""
flows_dir = Path("flows")
try:
assets_files = files("tui._assets.flows")
_copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
except Exception as e:
logger.debug(f"Could not copy sample flows: {e}")
# The app can proceed without bundled flows
def copy_compose_files(*, force: bool = False) -> None:
"""Copy docker-compose templates into the workspace if they are missing."""
try:
assets_root = files("tui._assets")
except Exception as e:
logger.debug(f"Could not access compose assets: {e}")
return
for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
destination = Path(filename)
if destination.exists() and not force:
continue
try:
resource = assets_root.joinpath(filename)
if not resource.is_file():
logger.debug(f"Compose template not found in assets: {filename}")
continue
resource_bytes = resource.read_bytes()
if destination.exists():
try:
if destination.read_bytes() == resource_bytes:
continue
except Exception as read_error:
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
destination.write_bytes(resource_bytes)
logger.info(f"Copied docker-compose template: {filename}")
except Exception as error:
logger.debug(f"Could not copy compose file {filename}: {error}")
def run_tui(): def run_tui():
"""Run the OpenRAG TUI application.""" """Run the OpenRAG TUI application."""
app = None app = None
try: try:
# Copy sample documents on first run # Keep bundled assets aligned with the packaged versions
copy_sample_documents() copy_sample_documents(force=True)
copy_sample_flows(force=True)
copy_compose_files(force=True)
app = OpenRAGTUI() app = OpenRAGTUI()
app.run() app.run()

View file

@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
import logging import logging
import re import re
import shutil import shutil
import socket
import subprocess import subprocess
log = logger or logging.getLogger(__name__) log = logger or logging.getLogger(__name__)
def can_bind_to_address(ip_addr: str) -> bool:
"""Test if we can bind to the given IP address."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((ip_addr, 0)) # Port 0 = let OS choose a free port
return True
except (OSError, socket.error) as e:
log.debug("Cannot bind to %s: %s", ip_addr, e)
return False
def run(cmd, timeout=2, text=True): def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout) return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
"Container-reachable host IP candidates: %s", "Container-reachable host IP candidates: %s",
", ".join(ordered_candidates), ", ".join(ordered_candidates),
) )
else:
log.info("Container-reachable host IP: %s", ordered_candidates[0])
return ordered_candidates[0] # Try each candidate and return the first one we can bind to
for ip_addr in ordered_candidates:
if can_bind_to_address(ip_addr):
if len(ordered_candidates) > 1:
log.info("Selected bindable host IP: %s", ip_addr)
else:
log.info("Container-reachable host IP: %s", ip_addr)
return ip_addr
log.debug("Skipping %s (cannot bind)", ip_addr)
# None of the candidates were bindable, fall back to 127.0.0.1
log.warning(
"None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
", ".join(ordered_candidates),
)
return "127.0.0.1"
log.warning( log.warning(
"No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container." "No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."

2
uv.lock generated
View file

@ -2352,7 +2352,7 @@ wheels = [
[[package]] [[package]]
name = "openrag" name = "openrag"
version = "0.1.15" version = "0.1.18"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "agentd" }, { name = "agentd" },