Merge pull request #637 from langflow-ai/feat-centralized-storage

feat: Centralized storage location for OpenRAG
This commit is contained in:
Sebastián Estévez 2025-12-16 15:04:27 -05:00 committed by GitHub
commit 30b8c9a3ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 625 additions and 162 deletions

View file

@ -14,6 +14,7 @@ jobs:
outputs:
skip_release: ${{ steps.version.outputs.skip_release }}
version: ${{ steps.version.outputs.version }}
docker_version: ${{ steps.version.outputs.docker_version }}
is_prerelease: ${{ steps.version.outputs.is_prerelease }}
steps:
- name: Checkout
@ -26,6 +27,12 @@ jobs:
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
# Normalize version per PEP 440 for Docker tags
# e.g., "0.1.53-rc2" -> "0.1.53rc2" to match Python's importlib.metadata
DOCKER_VERSION=$(echo "$VERSION" | sed -E 's/-?(rc|alpha|beta|dev|post)/\1/g')
echo "docker_version=$DOCKER_VERSION" >> $GITHUB_OUTPUT
echo "Docker Version: $DOCKER_VERSION"
# Check if tag already exists
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping release"
@ -117,13 +124,6 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -141,7 +141,7 @@ jobs:
file: ${{ matrix.file }}
platforms: ${{ matrix.platform }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ matrix.tag }}:${{ steps.version.outputs.version }}-${{ matrix.arch }}
tags: ${{ matrix.tag }}:${{ needs.check-version.outputs.docker_version }}-${{ matrix.arch }}
cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
@ -153,12 +153,6 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
@ -167,7 +161,7 @@ jobs:
- name: Create and push multi-arch manifests
run: |
VERSION=${{ steps.version.outputs.version }}
VERSION=${{ needs.check-version.outputs.docker_version }}
# Create versioned tags
docker buildx imagetools create -t langflowai/openrag-backend:$VERSION \
@ -224,13 +218,6 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Extract version from pyproject.toml
id: version
run: |
VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
- name: Build wheel and source distribution
run: |
uv build
@ -253,8 +240,8 @@ jobs:
- name: Create Release
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.version.outputs.version }}
name: Release ${{ steps.version.outputs.version }}
tag_name: v${{ needs.check-version.outputs.version }}
name: Release ${{ needs.check-version.outputs.version }}
draft: false
prerelease: ${{ needs.check-version.outputs.is_prerelease }}
generate_release_notes: true

View file

@ -38,8 +38,12 @@ jobs:
docker builder prune -af || true
docker-compose -f docker-compose.yml down -v --remove-orphans || true
- name: Cleanup OpenSearch data (root-owned files)
run: |
docker run --rm -v $(pwd):/work alpine rm -rf /work/opensearch-data || true
- run: df -h
- name: Checkout
uses: actions/checkout@v4

View file

@ -80,10 +80,11 @@ services:
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
volumes:
- ./openrag-documents:/app/openrag-documents:Z
- ./keys:/app/keys:Z
- ./flows:/app/flows:U,z
- ./config:/app/config:Z
- ${OPENRAG_DOCUMENTS_PATH:-./openrag-documents}:/app/openrag-documents:Z
- ${OPENRAG_KEYS_PATH:-./keys}:/app/keys:Z
- ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z
- ${OPENRAG_CONFIG_PATH:-./config}:/app/config:Z
- ${OPENRAG_DATA_PATH:-./data}:/app/data:Z
openrag-frontend:
image: langflowai/openrag-frontend:${OPENRAG_VERSION:-latest}
@ -100,7 +101,7 @@ services:
langflow:
volumes:
- ./flows:/app/flows:U,z
- ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z
image: langflowai/openrag-langflow:${OPENRAG_VERSION:-latest}
build:
context: .

View file

@ -34,7 +34,7 @@ dependencies = [
"structlog>=25.4.0",
"docling-serve==1.5.0",
"docling-core==2.48.1",
"easyocr>=1.7.1"
"easyocr>=1.7.1; sys_platform != 'darwin'"
]
[dependency-groups]

View file

@ -14,10 +14,17 @@ from src.tui.managers.container_manager import ContainerManager
async def main():
"""Clear OpenSearch data directory."""
cm = ContainerManager()
opensearch_data_path = Path("opensearch-data")
# Get opensearch data path from env config (same as container_manager uses)
from src.tui.managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
opensearch_data_path = Path(
env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))
).expanduser()
if not opensearch_data_path.exists():
print("opensearch-data directory does not exist")
print(f"opensearch-data directory does not exist at {opensearch_data_path}")
return 0
print("Clearing OpenSearch data directory...")

View file

@ -36,8 +36,10 @@ class ConnectionConfig:
class ConnectionManager:
"""Manages multiple connector connections with persistence"""
def __init__(self, connections_file: str = "connections.json"):
def __init__(self, connections_file: str = "data/connections.json"):
self.connections_file = Path(connections_file)
# Ensure data directory exists
self.connections_file.parent.mkdir(parents=True, exist_ok=True)
self.connections: Dict[str, ConnectionConfig] = {}
self.active_connectors: Dict[str, BaseConnector] = {}

View file

@ -96,11 +96,8 @@ class GoogleDriveConnector(BaseConnector):
client_id = config.get("client_id") or env_client_id
client_secret = config.get("client_secret") or env_client_secret
# Token file default (so callback & workers dont need to pass it)
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(
project_root / "google_drive_token.json"
)
# Token file default - use data/ directory for persistence
token_file = config.get("token_file") or "data/google_drive_token.json"
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
if not isinstance(client_id, str) or not client_id.strip():

View file

@ -58,9 +58,8 @@ class OneDriveConnector(BaseConnector):
except Exception as e:
logger.debug(f"Failed to get client_secret: {e}")
# Token file setup
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(project_root / "onedrive_token.json")
# Token file setup - use data/ directory for persistence
token_file = config.get("token_file") or "data/onedrive_token.json"
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
# Only initialize OAuth if we have credentials
@ -72,7 +71,7 @@ class OneDriveConnector(BaseConnector):
oauth_token_file = config["token_file"]
else:
# Use a per-connection cache file to avoid collisions with other connectors
oauth_token_file = f"onedrive_token_{connection_id}.json"
oauth_token_file = f"data/onedrive_token_{connection_id}.json"
# MSA & org both work via /common for OneDrive personal testing
authority = "https://login.microsoftonline.com/common"

View file

@ -66,20 +66,19 @@ class SharePointConnector(BaseConnector):
logger.debug(f"Failed to get client_secret: {e}")
pass # Credentials not available, that's OK for listing
# Token file setup
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(project_root / "sharepoint_token.json")
# Token file setup - use data/ directory for persistence
token_file = config.get("token_file") or "data/sharepoint_token.json"
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
# Only initialize OAuth if we have credentials
if self.client_id and self.client_secret:
connection_id = config.get("connection_id", "default")
# Use token_file from config if provided, otherwise generate one
if config.get("token_file"):
oauth_token_file = config["token_file"]
else:
oauth_token_file = f"sharepoint_token_{connection_id}.json"
oauth_token_file = f"data/sharepoint_token_{connection_id}.json"
authority = f"https://login.microsoftonline.com/{self.tenant_id}" if self.tenant_id != "common" else "https://login.microsoftonline.com/common"

View file

@ -63,8 +63,8 @@ class AuthService:
# We'll validate client credentials when creating the connector
# Create connection configuration
token_file = f"{connector_type}_{purpose}_{uuid.uuid4().hex[:8]}.json"
# Create connection configuration - use data/ directory for persistence
token_file = f"data/{connector_type}_{purpose}_{uuid.uuid4().hex[:8]}.json"
config = {
"token_file": token_file,
"connector_type": connector_type,

View file

@ -15,9 +15,11 @@ logger = get_logger(__name__)
class ConversationPersistenceService:
"""Simple service to persist conversations to disk"""
def __init__(self, storage_file: str = "conversations.json"):
def __init__(self, storage_file: str = "data/conversations.json"):
self.storage_file = storage_file
# Ensure data directory exists
os.makedirs(os.path.dirname(self.storage_file), exist_ok=True)
self.lock = threading.Lock()
self._conversations = self._load_conversations()

View file

@ -13,9 +13,11 @@ logger = get_logger(__name__)
class SessionOwnershipService:
"""Simple service to track which user owns which session"""
def __init__(self):
self.ownership_file = "session_ownership.json"
self.ownership_file = "data/session_ownership.json"
# Ensure data directory exists
os.makedirs(os.path.dirname(self.ownership_file), exist_ok=True)
self.ownership_data = self._load_ownership_data()
def _load_ownership_data(self) -> Dict[str, Dict[str, any]]:

View file

@ -454,8 +454,30 @@ def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[It
def copy_sample_documents(*, force: bool = False) -> None:
"""Copy sample documents from package to current directory if they don't exist."""
documents_dir = Path("openrag-documents")
"""Copy sample documents from package to host directory.
Uses the first path from OPENRAG_DOCUMENTS_PATHS env var.
Defaults to ~/.openrag/documents if not configured.
"""
from .managers.env_manager import EnvManager
from pathlib import Path
# Get the configured documents path from env
env_manager = EnvManager()
env_manager.load_existing_env()
# Parse the first path from the documents paths config
documents_path_str = env_manager.config.openrag_documents_paths
if documents_path_str:
first_path = documents_path_str.split(',')[0].strip()
# Expand $HOME and ~
first_path = first_path.replace("$HOME", str(Path.home()))
documents_dir = Path(first_path).expanduser()
else:
# Default fallback
documents_dir = Path.home() / ".openrag" / "documents"
documents_dir.mkdir(parents=True, exist_ok=True)
try:
assets_files = files("tui._assets.openrag-documents")
@ -466,8 +488,15 @@ def copy_sample_documents(*, force: bool = False) -> None:
def copy_sample_flows(*, force: bool = False) -> None:
"""Copy sample flows from package to current directory if they don't exist."""
flows_dir = Path("flows")
"""Copy sample flows from package to host directory.
Flows are placed in ~/.openrag/flows/ which will be volume-mounted to containers.
"""
from pathlib import Path
# Flows always go to ~/.openrag/flows/ - this will be volume-mounted
flows_dir = Path.home() / ".openrag" / "flows"
flows_dir.mkdir(parents=True, exist_ok=True)
try:
assets_files = files("tui._assets.flows")
@ -478,7 +507,9 @@ def copy_sample_flows(*, force: bool = False) -> None:
def copy_compose_files(*, force: bool = False) -> None:
"""Copy docker-compose templates into the workspace if they are missing."""
"""Copy docker-compose templates into the TUI workspace if they are missing."""
from utils.paths import get_tui_compose_file
try:
assets_root = files("tui._assets")
except Exception as e:
@ -486,7 +517,9 @@ def copy_compose_files(*, force: bool = False) -> None:
return
for filename in ("docker-compose.yml", "docker-compose.gpu.yml"):
destination = Path(filename)
is_gpu = "gpu" in filename
destination = get_tui_compose_file(gpu=is_gpu)
if destination.exists() and not force:
continue
@ -505,11 +538,177 @@ def copy_compose_files(*, force: bool = False) -> None:
logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
destination.write_bytes(resource_bytes)
logger.info(f"Copied docker-compose template: {filename}")
logger.info(f"Copied docker-compose template to {destination}")
except Exception as error:
logger.debug(f"Could not copy compose file {filename}: {error}")
def migrate_legacy_data_directories():
"""Migrate data from CWD-based directories to ~/.openrag/.
This is a one-time migration for users upgrading from the old layout.
Migrates: documents, flows, keys, config, opensearch-data
Prompts user for confirmation before migrating. If user declines,
exits with a message to downgrade to v1.52 or earlier.
"""
import shutil
import sys
cwd = Path.cwd()
target_base = Path.home() / ".openrag"
marker = target_base / ".migrated"
# Check if migration already completed
if marker.exists():
return
# Define migration mappings: (source_path, target_path, description)
migrations = [
(cwd / "openrag-documents", target_base / "documents", "documents"),
(cwd / "flows", target_base / "flows", "flows"),
(cwd / "keys", target_base / "keys", "keys"),
(cwd / "config", target_base / "config", "config"),
(cwd / "opensearch-data", target_base / "data" / "opensearch-data", "OpenSearch data"),
]
# Check which sources exist and need migration
sources_to_migrate = [(s, t, d) for s, t, d in migrations if s.exists()]
if not sources_to_migrate:
# No legacy data to migrate, just mark as done and update .env paths
marker.parent.mkdir(parents=True, exist_ok=True)
marker.touch()
# Still need to update .env with centralized paths
try:
from managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
# Explicitly set centralized paths (overrides any old CWD-relative paths)
home = str(Path.home())
env_manager.config.openrag_documents_paths = f"{home}/.openrag/documents"
env_manager.config.openrag_documents_path = f"{home}/.openrag/documents"
env_manager.config.openrag_keys_path = f"{home}/.openrag/keys"
env_manager.config.openrag_flows_path = f"{home}/.openrag/flows"
env_manager.config.openrag_config_path = f"{home}/.openrag/config"
env_manager.config.openrag_data_path = f"{home}/.openrag/data"
env_manager.config.opensearch_data_path = f"{home}/.openrag/data/opensearch-data"
env_manager.save_env()
logger.info("Updated .env file with centralized paths")
except Exception as e:
logger.warning(f"Failed to update .env paths: {e}")
return
# Prompt user for confirmation
print("\n" + "=" * 60)
print(" OpenRAG Data Migration Required")
print("=" * 60)
print(f"\nStarting with this version, OpenRAG stores data in:")
print(f" {target_base}")
print("\nThe following will be copied from your current directory:")
for source, target, desc in sources_to_migrate:
print(f" - {desc}: {source.name}/ -> {target}")
print("\nThis is a one-time migration.")
print("\nIf you don't want to migrate, exit and downgrade to v1.52 or earlier.")
try:
response = input("\nProceed with migration? [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
response = ""
if response != "y":
print("\nMigration cancelled. Exiting.")
sys.exit(0)
print("\nMigrating...")
# Perform migration (always copy, never delete originals)
for source, target, description in sources_to_migrate:
try:
target.parent.mkdir(parents=True, exist_ok=True)
if target.exists():
# Target exists - merge contents (copy only new items)
logger.info(f"Merging {description} from {source} to {target}")
if source.is_dir():
for item in source.iterdir():
src_item = source / item.name
dst_item = target / item.name
if not dst_item.exists():
if src_item.is_dir():
shutil.copytree(src_item, dst_item)
else:
shutil.copy2(src_item, dst_item)
logger.debug(f"Copied {src_item} to {dst_item}")
else:
# Target doesn't exist - copy entire directory
logger.info(f"Copying {description} from {source} to {target}")
if source.is_dir():
shutil.copytree(source, target)
else:
shutil.copy2(source, target)
print(f" Migrated {description}")
except Exception as e:
logger.warning(f"Failed to migrate {description}: {e}")
print(f" Warning: Failed to migrate {description}: {e}")
# Create marker to prevent future migration prompts
marker.parent.mkdir(parents=True, exist_ok=True)
marker.touch()
# Update .env file with centralized paths
try:
from managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
# Explicitly set centralized paths (overrides any old CWD-relative paths)
home = str(Path.home())
env_manager.config.openrag_documents_paths = f"{home}/.openrag/documents"
env_manager.config.openrag_documents_path = f"{home}/.openrag/documents"
env_manager.config.openrag_keys_path = f"{home}/.openrag/keys"
env_manager.config.openrag_flows_path = f"{home}/.openrag/flows"
env_manager.config.openrag_config_path = f"{home}/.openrag/config"
env_manager.config.openrag_data_path = f"{home}/.openrag/data"
env_manager.config.opensearch_data_path = f"{home}/.openrag/data/opensearch-data"
env_manager.save_env()
print(" Updated .env with centralized paths")
logger.info("Updated .env file with centralized paths")
except Exception as e:
logger.warning(f"Failed to update .env paths: {e}")
print(f" Warning: Failed to update .env paths: {e}")
print("\nMigration complete!\n")
logger.info("Data migration completed successfully")
def setup_host_directories():
"""Initialize OpenRAG directory structure on the host.
Creates directories that will be volume-mounted into containers:
- ~/.openrag/documents/ (for document ingestion)
- ~/.openrag/flows/ (for Langflow flows)
- ~/.openrag/keys/ (for JWT keys)
- ~/.openrag/config/ (for configuration)
- ~/.openrag/data/ (for backend data: conversations, OAuth tokens, etc.)
- ~/.openrag/data/opensearch-data/ (for OpenSearch index)
"""
base_dir = Path.home() / ".openrag"
directories = [
base_dir / "documents",
base_dir / "flows",
base_dir / "keys",
base_dir / "config",
base_dir / "data",
base_dir / "data" / "opensearch-data",
]
for directory in directories:
directory.mkdir(parents=True, exist_ok=True)
logger.debug(f"Ensured directory exists: {directory}")
def run_tui():
"""Run the OpenRAG TUI application."""
# Check for native Windows before launching TUI
@ -526,6 +725,12 @@ def run_tui():
app = None
try:
# Migrate legacy data directories from CWD to ~/.openrag/
migrate_legacy_data_directories()
# Initialize host directory structure
setup_host_directories()
# Keep bundled assets aligned with the packaged versions
copy_sample_documents(force=True)
copy_sample_flows(force=True)

View file

@ -87,11 +87,25 @@ class ContainerManager:
}
def _find_compose_file(self, filename: str) -> Path:
"""Find compose file in current directory or package resources."""
# First check current working directory
cwd_path = Path(filename)
"""Find compose file in centralized TUI directory, current directory, or package resources."""
from utils.paths import get_tui_compose_file
self._compose_search_log = f"Searching for {filename}:\n"
self._compose_search_log += f" 1. Current directory: {cwd_path.absolute()}"
# First check centralized TUI directory (~/.openrag/tui/)
is_gpu = "gpu" in filename
tui_path = get_tui_compose_file(gpu=is_gpu)
self._compose_search_log += f" 1. TUI directory: {tui_path.absolute()}"
if tui_path.exists():
self._compose_search_log += " ✓ FOUND"
return tui_path
else:
self._compose_search_log += " ✗ NOT FOUND"
# Then check current working directory (for backward compatibility)
cwd_path = Path(filename)
self._compose_search_log += f"\n 2. Current directory: {cwd_path.absolute()}"
if cwd_path.exists():
self._compose_search_log += " ✓ FOUND"
@ -99,28 +113,29 @@ class ContainerManager:
else:
self._compose_search_log += " ✗ NOT FOUND"
# Then check package resources
self._compose_search_log += f"\n 2. Package resources: "
# Finally check package resources
self._compose_search_log += f"\n 3. Package resources: "
try:
pkg_files = files("tui._assets")
self._compose_search_log += f"{pkg_files}"
compose_resource = pkg_files / filename
if compose_resource.is_file():
self._compose_search_log += f" ✓ FOUND, copying to current directory"
# Copy to cwd for compose command to work
self._compose_search_log += f" ✓ FOUND, copying to TUI directory"
# Copy to TUI directory
tui_path.parent.mkdir(parents=True, exist_ok=True)
content = compose_resource.read_text()
cwd_path.write_text(content)
return cwd_path
tui_path.write_text(content)
return tui_path
else:
self._compose_search_log += f" ✗ NOT FOUND"
except Exception as e:
self._compose_search_log += f" ✗ SKIPPED ({e})"
# Don't log this as an error since it's expected when running from source
# Fall back to original path (will fail later if not found)
self._compose_search_log += f"\n 3. Falling back to: {cwd_path.absolute()}"
return Path(filename)
# Fall back to TUI path (will fail later if not found)
self._compose_search_log += f"\n 4. Falling back to: {tui_path.absolute()}"
return tui_path
def _get_env_from_file(self) -> Dict[str, str]:
"""Read environment variables from .env file, prioritizing file values over os.environ.
@ -136,9 +151,17 @@ class ContainerManager:
even if os.environ has stale values.
"""
from dotenv import load_dotenv
from utils.paths import get_tui_env_file
env = dict(os.environ) # Start with current environment
env_file = Path(".env")
# Check centralized TUI .env location first
tui_env_file = get_tui_env_file()
if tui_env_file.exists():
env_file = tui_env_file
else:
# Fall back to CWD .env for backward compatibility
env_file = Path(".env")
if env_file.exists():
try:
@ -147,6 +170,7 @@ class ContainerManager:
load_dotenv(dotenv_path=env_file, override=True)
# Update our dict with all environment variables (including those from .env)
env.update(os.environ)
logger.debug(f"Loaded environment from {env_file}")
except Exception as e:
logger.debug(f"Error reading .env file for Docker Compose: {e}")
@ -269,7 +293,17 @@ class ContainerManager:
use_gpu = not cpu_mode
# Build compose command with override pattern
cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
cmd = self.runtime_info.compose_command.copy()
# Add --env-file to explicitly specify the .env location
from utils.paths import get_tui_env_file
tui_env_file = get_tui_env_file()
if tui_env_file.exists():
cmd.extend(["--env-file", str(tui_env_file)])
elif Path(".env").exists():
cmd.extend(["--env-file", ".env"])
cmd.extend(["-f", str(self.compose_file)])
if use_gpu and self.gpu_compose_file.exists():
cmd.extend(["-f", str(self.gpu_compose_file)])
cmd.extend(args)
@ -315,7 +349,17 @@ class ContainerManager:
use_gpu = not cpu_mode
# Build compose command with override pattern
cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
cmd = self.runtime_info.compose_command.copy()
# Add --env-file to explicitly specify the .env location
from utils.paths import get_tui_env_file
tui_env_file = get_tui_env_file()
if tui_env_file.exists():
cmd.extend(["--env-file", str(tui_env_file)])
elif Path(".env").exists():
cmd.extend(["--env-file", ".env"])
cmd.extend(["-f", str(self.compose_file)])
if use_gpu and self.gpu_compose_file.exists():
cmd.extend(["-f", str(self.gpu_compose_file)])
cmd.extend(args)
@ -388,7 +432,17 @@ class ContainerManager:
use_gpu = not cpu_mode
# Build compose command with override pattern
cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
cmd = self.runtime_info.compose_command.copy()
# Add --env-file to explicitly specify the .env location
from utils.paths import get_tui_env_file
tui_env_file = get_tui_env_file()
if tui_env_file.exists():
cmd.extend(["--env-file", str(tui_env_file)])
elif Path(".env").exists():
cmd.extend(["--env-file", ".env"])
cmd.extend(["-f", str(self.compose_file)])
if use_gpu and self.gpu_compose_file.exists():
cmd.extend(["-f", str(self.gpu_compose_file)])
cmd.extend(args)
@ -794,13 +848,24 @@ class ContainerManager:
async def _parse_compose_images(self) -> list[str]:
"""Get resolved image names from compose files using docker/podman compose, with robust fallbacks."""
from utils.paths import get_tui_env_file
images: set[str] = set()
# Try both GPU and CPU modes to get all images
for use_gpu in [True, False]:
try:
# Build compose command with override pattern
cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
cmd = self.runtime_info.compose_command.copy()
# Add --env-file to explicitly specify the .env location
tui_env_file = get_tui_env_file()
if tui_env_file.exists():
cmd.extend(["--env-file", str(tui_env_file)])
elif Path(".env").exists():
cmd.extend(["--env-file", ".env"])
cmd.extend(["-f", str(self.compose_file)])
if use_gpu and self.gpu_compose_file.exists():
cmd.extend(["-f", str(self.gpu_compose_file)])
cmd.extend(["config", "--format", "json"])
@ -821,7 +886,16 @@ class ContainerManager:
continue
# Fallback to YAML output (for older compose versions)
cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
cmd = self.runtime_info.compose_command.copy()
# Add --env-file to explicitly specify the .env location
tui_env_file = get_tui_env_file()
if tui_env_file.exists():
cmd.extend(["--env-file", str(tui_env_file)])
elif Path(".env").exists():
cmd.extend(["--env-file", ".env"])
cmd.extend(["-f", str(self.compose_file)])
if use_gpu and self.gpu_compose_file.exists():
cmd.extend(["-f", str(self.gpu_compose_file)])
cmd.append("config")
@ -966,7 +1040,7 @@ class ContainerManager:
up_success = {"value": True}
error_messages = []
async for message, replace_last in self._stream_compose_command(["up", "-d"], up_success, cpu_mode):
async for message, replace_last in self._stream_compose_command(["up", "-d", "--no-build"], up_success, cpu_mode):
# Detect error patterns in the output
lower_msg = message.lower()
@ -1041,7 +1115,7 @@ class ContainerManager:
# Restart with new images using streaming output
restart_success = True
async for message, replace_last in self._run_compose_command_streaming(
["up", "-d", "--force-recreate"], cpu_mode
["up", "-d", "--force-recreate", "--no-build"], cpu_mode
):
yield False, message, replace_last
# Check for error patterns in the output
@ -1053,6 +1127,39 @@ class ContainerManager:
else:
yield False, "Some errors occurred during service restart", False
async def clear_directory_with_container(self, path: Path) -> tuple[bool, str]:
"""Clear a directory using a container to handle container-owned files.
Args:
path: The directory to clear (contents will be deleted, directory recreated)
Returns:
Tuple of (success, message)
"""
if not self.is_available():
return False, "No container runtime available"
if not path.exists():
return True, "Directory does not exist, nothing to clear"
path = path.absolute()
# Use alpine container to delete files owned by container user
cmd = [
"run", "--rm",
"-v", f"{path}:/work:Z",
"alpine",
"sh", "-c",
"rm -rf /work/* /work/.[!.]* 2>/dev/null; echo done"
]
success, stdout, stderr = await self._run_runtime_command(cmd)
if success and "done" in stdout:
return True, f"Cleared {path}"
else:
return False, f"Failed to clear {path}: {stderr or 'Unknown error'}"
async def clear_opensearch_data_volume(self) -> AsyncIterator[tuple[bool, str]]:
"""Clear opensearch data using a temporary container with proper permissions."""
if not self.is_available():
@ -1061,45 +1168,23 @@ class ContainerManager:
yield False, "Clearing OpenSearch data volume..."
# Get the absolute path to opensearch-data directory
opensearch_data_path = Path("opensearch-data").absolute()
# Get opensearch data path from env config
from .env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
opensearch_data_path = Path(env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))).expanduser().absolute()
if not opensearch_data_path.exists():
yield True, "OpenSearch data directory does not exist, skipping"
return
# Use the opensearch container with proper volume mount flags
# :Z flag ensures proper SELinux labeling and UID mapping for rootless containers
cmd = [
"run",
"--rm",
"-v", f"{opensearch_data_path}:/usr/share/opensearch/data:Z",
"langflowai/openrag-opensearch:latest",
"bash", "-c",
"rm -rf /usr/share/opensearch/data/* /usr/share/opensearch/data/.[!.]* && echo 'Cleared successfully'"
]
success, stdout, stderr = await self._run_runtime_command(cmd)
if success and "Cleared successfully" in stdout:
# Use alpine with root to clear container-owned files
success, msg = await self.clear_directory_with_container(opensearch_data_path)
if success:
yield True, "OpenSearch data cleared successfully"
else:
# If it fails, try with the base opensearch image
yield False, "Retrying with base OpenSearch image..."
cmd = [
"run",
"--rm",
"-v", f"{opensearch_data_path}:/usr/share/opensearch/data:Z",
"opensearchproject/opensearch:3.0.0",
"bash", "-c",
"rm -rf /usr/share/opensearch/data/* /usr/share/opensearch/data/.[!.]* && echo 'Cleared successfully'"
]
success, stdout, stderr = await self._run_runtime_command(cmd)
if success and "Cleared successfully" in stdout:
yield True, "OpenSearch data cleared successfully"
else:
yield False, f"Failed to clear OpenSearch data: {stderr if stderr else 'Unknown error'}"
yield False, f"Failed to clear OpenSearch data: {msg}"
async def reset_services(self) -> AsyncIterator[tuple[bool, str]]:
"""Reset all services (stop, remove containers/volumes, clear data) and yield progress updates."""
@ -1145,7 +1230,7 @@ class ContainerManager:
return
# Build compose command with override pattern
cmd = self.runtime_info.compose_command + ["-f", str(self.compose_file)]
cmd = self.runtime_info.compose_command.copy() + ["-f", str(self.compose_file)]
if self.use_gpu_compose and self.gpu_compose_file.exists():
cmd.extend(["-f", str(self.gpu_compose_file)])
cmd.extend(["logs", "-f", service_name])

View file

@ -64,11 +64,16 @@ class EnvConfig:
disable_ingest_with_langflow: str = "False"
nudges_flow_id: str = "ebc01d31-1976-46ce-a385-b0240327226c"
# Document paths (comma-separated)
openrag_documents_paths: str = "./openrag-documents"
# Document paths (comma-separated) - use centralized location by default
openrag_documents_paths: str = "$HOME/.openrag/documents"
# OpenSearch data path
opensearch_data_path: str = "./opensearch-data"
# Volume mount paths - use centralized location by default
openrag_documents_path: str = "$HOME/.openrag/documents" # Primary documents path for compose
openrag_keys_path: str = "$HOME/.openrag/keys"
openrag_flows_path: str = "$HOME/.openrag/flows"
openrag_config_path: str = "$HOME/.openrag/config"
openrag_data_path: str = "$HOME/.openrag/data" # Backend data (conversations, tokens, etc.)
opensearch_data_path: str = "$HOME/.openrag/data/opensearch-data"
# Container version (linked to TUI version)
openrag_version: str = ""
@ -81,7 +86,26 @@ class EnvManager:
"""Manages environment configuration for OpenRAG."""
def __init__(self, env_file: Optional[Path] = None):
self.env_file = env_file or Path(".env")
if env_file:
self.env_file = env_file
else:
# Use centralized location for TUI .env file
from utils.paths import get_tui_env_file, get_legacy_paths
self.env_file = get_tui_env_file()
# Check for legacy .env in current directory and migrate if needed
legacy_env = get_legacy_paths()["tui_env"]
if not self.env_file.exists() and legacy_env.exists():
try:
import shutil
self.env_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(legacy_env, self.env_file)
logger.info(f"Migrated .env from {legacy_env} to {self.env_file}")
except Exception as e:
logger.warning(f"Failed to migrate .env file: {e}")
self.config = EnvConfig()
def generate_secure_password(self) -> str:
@ -155,6 +179,11 @@ class EnvManager:
"AWS_SECRET_ACCESS_KEY": "aws_secret_access_key", # pragma: allowlist secret
"LANGFLOW_PUBLIC_URL": "langflow_public_url",
"OPENRAG_DOCUMENTS_PATHS": "openrag_documents_paths",
"OPENRAG_DOCUMENTS_PATH": "openrag_documents_path",
"OPENRAG_KEYS_PATH": "openrag_keys_path",
"OPENRAG_FLOWS_PATH": "openrag_flows_path",
"OPENRAG_CONFIG_PATH": "openrag_config_path",
"OPENRAG_DATA_PATH": "openrag_data_path",
"OPENSEARCH_DATA_PATH": "opensearch_data_path",
"LANGFLOW_AUTO_LOGIN": "langflow_auto_login",
"LANGFLOW_NEW_USER_IS_ACTIVE": "langflow_new_user_is_active",
@ -348,11 +377,34 @@ class EnvManager:
f.write(f"LANGFLOW_URL_INGEST_FLOW_ID={self._quote_env_value(self.config.langflow_url_ingest_flow_id)}\n")
f.write(f"NUDGES_FLOW_ID={self._quote_env_value(self.config.nudges_flow_id)}\n")
f.write(f"OPENSEARCH_PASSWORD={self._quote_env_value(self.config.opensearch_password)}\n")
# Expand $HOME in paths before writing to .env
# This ensures paths work with all compose implementations (docker, podman)
from utils.paths import expand_path
f.write(
f"OPENRAG_DOCUMENTS_PATHS={self._quote_env_value(self.config.openrag_documents_paths)}\n"
f"OPENRAG_DOCUMENTS_PATHS={self._quote_env_value(expand_path(self.config.openrag_documents_paths))}\n"
)
f.write("\n")
# Volume mount paths for Docker Compose
f.write("# Volume mount paths for Docker Compose\n")
f.write(
f"OPENRAG_DOCUMENTS_PATH={self._quote_env_value(expand_path(self.config.openrag_documents_path))}\n"
)
f.write(
f"OPENSEARCH_DATA_PATH={self._quote_env_value(self.config.opensearch_data_path)}\n"
f"OPENRAG_KEYS_PATH={self._quote_env_value(expand_path(self.config.openrag_keys_path))}\n"
)
f.write(
f"OPENRAG_FLOWS_PATH={self._quote_env_value(expand_path(self.config.openrag_flows_path))}\n"
)
f.write(
f"OPENRAG_CONFIG_PATH={self._quote_env_value(expand_path(self.config.openrag_config_path))}\n"
)
f.write(
f"OPENRAG_DATA_PATH={self._quote_env_value(expand_path(self.config.openrag_data_path))}\n"
)
f.write(
f"OPENSEARCH_DATA_PATH={self._quote_env_value(expand_path(self.config.opensearch_data_path))}\n"
)
# Set OPENRAG_VERSION to TUI version
if self.config.openrag_version:
@ -476,7 +528,7 @@ class EnvManager:
(
"openrag_documents_paths",
"Documents Paths",
"./openrag-documents,/path/to/more/docs",
"~/.openrag/documents",
False,
),
]
@ -601,12 +653,13 @@ class EnvManager:
def generate_compose_volume_mounts(self) -> List[str]:
"""Generate Docker Compose volume mount strings from documents paths."""
is_valid, _, validated_paths = validate_documents_paths(
self.config.openrag_documents_paths
)
# Expand $HOME before validation
paths_str = self.config.openrag_documents_paths.replace("$HOME", str(Path.home()))
is_valid, error_msg, validated_paths = validate_documents_paths(paths_str)
if not is_valid:
return ["./openrag-documents:/app/openrag-documents:Z"] # fallback
logger.warning(f"Invalid documents paths: {error_msg}")
return []
volume_mounts = []
for i, path in enumerate(validated_paths):

View file

@ -523,7 +523,7 @@ class ConfigScreen(Screen):
yield Label("Documents Paths")
current_value = getattr(self.env_manager.config, "openrag_documents_paths", "")
input_widget = Input(
placeholder="./openrag-documents,/path/to/more/docs",
placeholder="~/.openrag/documents",
value=current_value,
validators=[DocumentsPathValidator()],
id="input-openrag_documents_paths",
@ -544,9 +544,9 @@ class ConfigScreen(Screen):
"Directory to persist OpenSearch indices across upgrades",
classes="helper-text",
)
current_value = getattr(self.env_manager.config, "opensearch_data_path", "./opensearch-data")
current_value = getattr(self.env_manager.config, "opensearch_data_path", "$HOME/.openrag/data/opensearch-data")
input_widget = Input(
placeholder="./opensearch-data",
placeholder="~/.openrag/data/opensearch-data",
value=current_value,
id="input-opensearch_data_path",
)

View file

@ -167,8 +167,8 @@ class DiagnosticsScreen(Screen):
status = self.query_one("#copy-status", Static)
# Create logs directory if it doesn't exist
logs_dir = Path("logs")
logs_dir.mkdir(exist_ok=True)
logs_dir = Path.home() / ".openrag" / "logs"
logs_dir.mkdir(parents=True, exist_ok=True)
# Create a timestamped filename
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

View file

@ -481,27 +481,40 @@ class MonitorScreen(Screen):
# Clear config, conversations.json, and optionally flow backups (before stopping containers)
try:
config_path = Path("config")
conversations_file = Path("conversations.json")
flows_backup_path = Path("flows/backup")
# Get paths from env config
from ..managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
def expand_path(path_str: str) -> Path:
return Path(path_str.replace("$HOME", str(Path.home()))).expanduser()
config_path = expand_path(env_manager.config.openrag_config_path)
flows_path = expand_path(env_manager.config.openrag_flows_path)
flows_backup_path = flows_path / "backup"
if config_path.exists():
shutil.rmtree(config_path)
# Use container to handle files owned by container user
success, msg = await self.container_manager.clear_directory_with_container(config_path)
if not success:
# Fallback to regular rmtree if container method fails
shutil.rmtree(config_path)
# Recreate empty config directory
config_path.mkdir(parents=True, exist_ok=True)
if conversations_file.exists():
conversations_file.unlink()
# Delete flow backups only if user chose to (and they actually exist)
if self._check_flow_backups():
if delete_backups:
shutil.rmtree(flows_backup_path)
# Use container to handle files owned by container user
success, msg = await self.container_manager.clear_directory_with_container(flows_backup_path)
if not success:
# Fallback to regular rmtree if container method fails
shutil.rmtree(flows_backup_path)
# Recreate empty backup directory
flows_backup_path.mkdir(parents=True, exist_ok=True)
self.notify("Flow backups deleted", severity="information")
else:
self.notify("Flow backups preserved in ./flows/backup", severity="information")
self.notify(f"Flow backups preserved in {flows_backup_path}", severity="information")
except Exception as e:
self.notify(
@ -531,7 +544,11 @@ class MonitorScreen(Screen):
# Now clear opensearch-data using container
yield False, "Clearing OpenSearch data..."
opensearch_data_path = Path("opensearch-data")
# Get opensearch data path from env config
from ..managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
opensearch_data_path = Path(env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))).expanduser()
if opensearch_data_path.exists():
async for success, message in self.container_manager.clear_opensearch_data_volume():
yield success, message
@ -549,10 +566,15 @@ class MonitorScreen(Screen):
yield True, "Factory reset completed successfully"
def _check_flow_backups(self) -> bool:
"""Check if there are any flow backups in ./flows/backup directory."""
"""Check if there are any flow backups in flows/backup directory."""
from pathlib import Path
from ..managers.env_manager import EnvManager
backup_dir = Path("flows/backup")
# Get flows path from env config
env_manager = EnvManager()
env_manager.load_existing_env()
flows_path = Path(env_manager.config.openrag_flows_path.replace("$HOME", str(Path.home()))).expanduser()
backup_dir = flows_path / "backup"
if not backup_dir.exists():
return False

View file

@ -68,11 +68,17 @@ class WelcomeScreen(Screen):
yield Footer()
def _check_flow_backups(self) -> bool:
"""Check if there are any flow backups in ./flows/backup directory."""
backup_dir = Path("flows/backup")
"""Check if there are any flow backups in flows/backup directory."""
from ..managers.env_manager import EnvManager
# Get flows path from env config
env_manager = EnvManager()
env_manager.load_existing_env()
flows_path = Path(env_manager.config.openrag_flows_path.replace("$HOME", str(Path.home()))).expanduser()
backup_dir = flows_path / "backup"
if not backup_dir.exists():
return False
try:
# Check if there are any .json files in the backup directory
backup_files = list(backup_dir.glob("*.json"))

View file

@ -315,15 +315,22 @@ class CommandOutputModal(ModalScreen):
asyncio.create_task(callback_result)
self.call_after_refresh(_invoke_callback)
except asyncio.CancelledError:
# Modal was dismissed while command was running - this is fine
pass
except Exception as e:
self._update_output(f"Error: {e}", False)
output.text = "\n".join(self._output_lines)
output.move_cursor((len(self._output_lines), 0))
finally:
# Enable the close button and focus it
close_btn = self.query_one("#close-btn", Button)
close_btn.disabled = False
close_btn.focus()
# Enable the close button and focus it (if modal still exists)
try:
close_btn = self.query_one("#close-btn", Button)
close_btn.disabled = False
close_btn.focus()
except Exception:
# Modal was already dismissed
pass
def _update_output(self, message: str, replace_last: bool = False) -> None:
"""Update the output buffer by appending or replacing the last line.

View file

@ -100,9 +100,9 @@ class FlowBackupWarningModal(ModalScreen[tuple[bool, bool]]):
with Container(id="dialog"):
yield Label("⚠ Flow Backups Detected", id="title")
yield Static(
f"Flow backups found in ./flows/backup\n\n"
f"Flow backups found in your flows/backup directory.\n\n"
f"Proceeding with {self.operation} will reset custom flows to defaults.\n"
f"Your customizations are backed up in ./flows/backup/\n\n"
f"Your customizations are backed up in the flows/backup/ directory.\n\n"
f"Choose whether to keep or delete the backup files:",
id="message"
)

View file

@ -92,8 +92,8 @@ class VersionMismatchWarningModal(ModalScreen[bool]):
f"Current TUI version is {self.tui_version}\n\n"
f"Starting services will update containers to version {self.tui_version}.\n"
f"This may cause compatibility issues with your flows.\n\n"
f"⚠️ Please backup your flows before continuing:\n"
f" Your flows are in ./flows/ directory\n\n"
f"⚠️ Please backup your flows before continuing.\n"
f" Your flows are in ~/.openrag/flows/\n\n"
f"Do you want to continue?",
id="message"
)

85
src/utils/paths.py Normal file
View file

@ -0,0 +1,85 @@
"""Host-side path management for OpenRAG TUI.
This module provides functions for TUI to get standardized paths on the host machine.
All TUI files are centralized under ~/.openrag/ to avoid cluttering the user's CWD.
Note: This module is for HOST-SIDE (TUI) use only. Container code should not use these paths.
"""
from pathlib import Path
def get_openrag_home() -> Path:
"""Get the OpenRAG home directory on the host.
Returns:
Path to ~/.openrag/ directory
"""
home_dir = Path.home() / ".openrag"
home_dir.mkdir(parents=True, exist_ok=True)
return home_dir
def get_tui_dir() -> Path:
"""Get the TUI directory for TUI-specific files.
Returns:
Path to ~/.openrag/tui/ directory
"""
tui_dir = get_openrag_home() / "tui"
tui_dir.mkdir(parents=True, exist_ok=True)
return tui_dir
def get_tui_env_file() -> Path:
"""Get the TUI .env file path.
Returns:
Path to ~/.openrag/tui/.env file
"""
return get_tui_dir() / ".env"
def get_tui_compose_file(gpu: bool = False) -> Path:
"""Get the TUI docker-compose file path.
Args:
gpu: If True, returns path to docker-compose.gpu.yml
Returns:
Path to docker-compose file in ~/.openrag/tui/
"""
filename = "docker-compose.gpu.yml" if gpu else "docker-compose.yml"
return get_tui_dir() / filename
def get_legacy_paths() -> dict:
"""Get legacy (CWD-based) paths for migration purposes.
Returns:
Dictionary mapping resource names to their old CWD-based paths
"""
cwd = Path.cwd()
return {
"tui_env": cwd / ".env",
"tui_compose": cwd / "docker-compose.yml",
"tui_compose_gpu": cwd / "docker-compose.gpu.yml",
}
def expand_path(path: str) -> str:
"""Expand $HOME and ~ in a path string to the actual home directory.
Args:
path: Path string that may contain $HOME or ~
Returns:
Path string with $HOME and ~ expanded to actual home directory
"""
if not path:
return path
expanded = path.replace("$HOME", str(Path.home()))
# Also handle ~ at start of path
if expanded.startswith("~"):
expanded = str(Path.home()) + expanded[1:]
return expanded

2
uv.lock generated
View file

@ -4429,4 +4429,4 @@ source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276 },
]
]