fix: clean up openrag files on host only
This commit is contained in:
parent
aa214624c9
commit
3ffce7bd8d
7 changed files with 107 additions and 537 deletions
|
|
@ -1,13 +1,11 @@
|
||||||
"""Configuration management for OpenRAG."""
|
"""Configuration management for OpenRAG."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
import yaml
|
import yaml
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Any, Optional
|
from typing import Dict, Any, Optional
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
from utils.paths import get_config_file, get_legacy_paths
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
@ -132,40 +130,10 @@ class ConfigManager:
|
||||||
"""Initialize configuration manager.
|
"""Initialize configuration manager.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config_file: Path to configuration file. Defaults to centralized location.
|
config_file: Path to configuration file. Defaults to 'config.yaml' in project root.
|
||||||
"""
|
"""
|
||||||
if config_file:
|
self.config_file = Path(config_file) if config_file else Path("config/config.yaml")
|
||||||
self.config_file = Path(config_file)
|
|
||||||
else:
|
|
||||||
# Use centralized location
|
|
||||||
self.config_file = get_config_file()
|
|
||||||
|
|
||||||
# Check for legacy location and migrate if needed
|
|
||||||
legacy_config = get_legacy_paths()["config"]
|
|
||||||
if not self.config_file.exists() and legacy_config.exists():
|
|
||||||
self._migrate_from_legacy(legacy_config)
|
|
||||||
|
|
||||||
self._config: Optional[OpenRAGConfig] = None
|
self._config: Optional[OpenRAGConfig] = None
|
||||||
|
|
||||||
def _migrate_from_legacy(self, legacy_config_path: Path) -> None:
|
|
||||||
"""Migrate configuration from legacy location to centralized location.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
legacy_config_path: Path to legacy config file
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
logger.info(
|
|
||||||
f"Migrating configuration from {legacy_config_path} to {self.config_file}"
|
|
||||||
)
|
|
||||||
# Ensure parent directory exists
|
|
||||||
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
# Copy the config file
|
|
||||||
shutil.copy2(legacy_config_path, self.config_file)
|
|
||||||
logger.info("Configuration migration completed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Failed to migrate configuration from {legacy_config_path}: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def load_config(self) -> OpenRAGConfig:
|
def load_config(self) -> OpenRAGConfig:
|
||||||
"""Load configuration from environment variables and config file.
|
"""Load configuration from environment variables and config file.
|
||||||
|
|
|
||||||
67
src/main.py
67
src/main.py
|
|
@ -246,39 +246,25 @@ async def init_index():
|
||||||
|
|
||||||
def generate_jwt_keys():
|
def generate_jwt_keys():
|
||||||
"""Generate RSA keys for JWT signing if they don't exist"""
|
"""Generate RSA keys for JWT signing if they don't exist"""
|
||||||
from utils.paths import get_keys_dir, get_private_key_path, get_public_key_path, get_legacy_paths
|
keys_dir = "keys"
|
||||||
|
private_key_path = os.path.join(keys_dir, "private_key.pem")
|
||||||
# Use centralized keys directory
|
public_key_path = os.path.join(keys_dir, "public_key.pem")
|
||||||
keys_dir = get_keys_dir()
|
|
||||||
private_key_path = get_private_key_path()
|
# Create keys directory if it doesn't exist
|
||||||
public_key_path = get_public_key_path()
|
os.makedirs(keys_dir, exist_ok=True)
|
||||||
|
|
||||||
# Check for legacy keys and migrate if needed
|
|
||||||
legacy_paths = get_legacy_paths()
|
|
||||||
if not private_key_path.exists() and legacy_paths["private_key"].exists():
|
|
||||||
logger.info(f"Migrating JWT keys from {legacy_paths['keys_dir']} to {keys_dir}")
|
|
||||||
try:
|
|
||||||
shutil.copy2(legacy_paths["private_key"], private_key_path)
|
|
||||||
if legacy_paths["public_key"].exists():
|
|
||||||
shutil.copy2(legacy_paths["public_key"], public_key_path)
|
|
||||||
logger.info("JWT keys migration completed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to migrate JWT keys: {e}")
|
|
||||||
|
|
||||||
# Ensure keys directory exists (already done by get_keys_dir)
|
|
||||||
|
|
||||||
# Generate keys if they don't exist
|
# Generate keys if they don't exist
|
||||||
if not private_key_path.exists():
|
if not os.path.exists(private_key_path):
|
||||||
try:
|
try:
|
||||||
# Generate private key
|
# Generate private key
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["openssl", "genrsa", "-out", str(private_key_path), "2048"],
|
["openssl", "genrsa", "-out", private_key_path, "2048"],
|
||||||
check=True,
|
check=True,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set restrictive permissions on private key (readable by owner only)
|
# Set restrictive permissions on private key (readable by owner only)
|
||||||
os.chmod(str(private_key_path), 0o600)
|
os.chmod(private_key_path, 0o600)
|
||||||
|
|
||||||
# Generate public key
|
# Generate public key
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
|
|
@ -286,17 +272,17 @@ def generate_jwt_keys():
|
||||||
"openssl",
|
"openssl",
|
||||||
"rsa",
|
"rsa",
|
||||||
"-in",
|
"-in",
|
||||||
str(private_key_path),
|
private_key_path,
|
||||||
"-pubout",
|
"-pubout",
|
||||||
"-out",
|
"-out",
|
||||||
str(public_key_path),
|
public_key_path,
|
||||||
],
|
],
|
||||||
check=True,
|
check=True,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set permissions on public key (readable by all)
|
# Set permissions on public key (readable by all)
|
||||||
os.chmod(str(public_key_path), 0o644)
|
os.chmod(public_key_path, 0o644)
|
||||||
|
|
||||||
logger.info("Generated RSA keys for JWT signing")
|
logger.info("Generated RSA keys for JWT signing")
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
|
|
@ -306,8 +292,8 @@ def generate_jwt_keys():
|
||||||
else:
|
else:
|
||||||
# Ensure correct permissions on existing keys
|
# Ensure correct permissions on existing keys
|
||||||
try:
|
try:
|
||||||
os.chmod(str(private_key_path), 0o600)
|
os.chmod(private_key_path, 0o600)
|
||||||
os.chmod(str(public_key_path), 0o644)
|
os.chmod(public_key_path, 0o644)
|
||||||
logger.info("RSA keys already exist, ensured correct permissions")
|
logger.info("RSA keys already exist, ensured correct permissions")
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.warning("Failed to set permissions on existing keys", error=str(e))
|
logger.warning("Failed to set permissions on existing keys", error=str(e))
|
||||||
|
|
@ -328,18 +314,17 @@ async def init_index_when_ready():
|
||||||
|
|
||||||
def _get_documents_dir():
|
def _get_documents_dir():
|
||||||
"""Get the documents directory path, handling both Docker and local environments."""
|
"""Get the documents directory path, handling both Docker and local environments."""
|
||||||
from utils.paths import get_documents_dir
|
# In Docker, the volume is mounted at /app/openrag-documents
|
||||||
|
# Locally, we use openrag-documents
|
||||||
# Use centralized path utility which handles both container and local environments
|
|
||||||
path = get_documents_dir()
|
|
||||||
container_env = detect_container_environment()
|
container_env = detect_container_environment()
|
||||||
|
|
||||||
if container_env:
|
if container_env:
|
||||||
|
path = os.path.abspath("/app/openrag-documents")
|
||||||
logger.debug(f"Running in {container_env}, using container path: {path}")
|
logger.debug(f"Running in {container_env}, using container path: {path}")
|
||||||
|
return path
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Running locally, using centralized path: {path}")
|
path = os.path.abspath(os.path.join(os.getcwd(), "openrag-documents"))
|
||||||
|
logger.debug(f"Running locally, using local path: {path}")
|
||||||
return str(path)
|
return path
|
||||||
|
|
||||||
|
|
||||||
async def ingest_default_documents_when_ready(services):
|
async def ingest_default_documents_when_ready(services):
|
||||||
|
|
@ -575,16 +560,6 @@ async def startup_tasks(services):
|
||||||
async def initialize_services():
|
async def initialize_services():
|
||||||
"""Initialize all services and their dependencies"""
|
"""Initialize all services and their dependencies"""
|
||||||
await TelemetryClient.send_event(Category.SERVICE_INITIALIZATION, MessageId.ORB_SVC_INIT_START)
|
await TelemetryClient.send_event(Category.SERVICE_INITIALIZATION, MessageId.ORB_SVC_INIT_START)
|
||||||
|
|
||||||
# Perform migration if needed (move files from old locations to ~/.openrag)
|
|
||||||
from utils.migration import perform_migration
|
|
||||||
try:
|
|
||||||
migration_results = perform_migration()
|
|
||||||
if migration_results:
|
|
||||||
logger.info("File migration completed", results=migration_results)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Migration failed, some files may still be in legacy locations. Consider manual migration. Error: {e}")
|
|
||||||
|
|
||||||
# Generate JWT keys if they don't exist
|
# Generate JWT keys if they don't exist
|
||||||
generate_jwt_keys()
|
generate_jwt_keys()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,13 +40,17 @@ class FlowsService:
|
||||||
|
|
||||||
def _get_flows_directory(self):
|
def _get_flows_directory(self):
|
||||||
"""Get the flows directory path"""
|
"""Get the flows directory path"""
|
||||||
from utils.paths import get_flows_dir
|
current_file_dir = os.path.dirname(os.path.abspath(__file__)) # src/services/
|
||||||
return str(get_flows_dir())
|
src_dir = os.path.dirname(current_file_dir) # src/
|
||||||
|
project_root = os.path.dirname(src_dir) # project root
|
||||||
|
return os.path.join(project_root, "flows")
|
||||||
|
|
||||||
def _get_backup_directory(self):
|
def _get_backup_directory(self):
|
||||||
"""Get the backup directory path"""
|
"""Get the backup directory path"""
|
||||||
from utils.paths import get_flows_backup_dir
|
flows_dir = self._get_flows_directory()
|
||||||
return str(get_flows_backup_dir())
|
backup_dir = os.path.join(flows_dir, "backup")
|
||||||
|
os.makedirs(backup_dir, exist_ok=True)
|
||||||
|
return backup_dir
|
||||||
|
|
||||||
def _get_latest_backup_path(self, flow_id: str, flow_type: str):
|
def _get_latest_backup_path(self, flow_id: str, flow_type: str):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -51,29 +51,18 @@ class SessionManager:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
secret_key: str = None,
|
secret_key: str = None,
|
||||||
private_key_path: str = None,
|
private_key_path: str = "keys/private_key.pem",
|
||||||
public_key_path: str = None,
|
public_key_path: str = "keys/public_key.pem",
|
||||||
):
|
):
|
||||||
from utils.paths import get_private_key_path, get_public_key_path
|
|
||||||
|
|
||||||
self.secret_key = secret_key # Keep for backward compatibility
|
self.secret_key = secret_key # Keep for backward compatibility
|
||||||
self.users: Dict[str, User] = {} # user_id -> User
|
self.users: Dict[str, User] = {} # user_id -> User
|
||||||
self.user_opensearch_clients: Dict[
|
self.user_opensearch_clients: Dict[
|
||||||
str, Any
|
str, Any
|
||||||
] = {} # user_id -> OpenSearch client
|
] = {} # user_id -> OpenSearch client
|
||||||
|
|
||||||
# Use centralized key paths if not explicitly provided
|
|
||||||
if private_key_path is None:
|
|
||||||
self.private_key_path = str(get_private_key_path())
|
|
||||||
else:
|
|
||||||
self.private_key_path = private_key_path
|
|
||||||
|
|
||||||
if public_key_path is None:
|
|
||||||
self.public_key_path = str(get_public_key_path())
|
|
||||||
else:
|
|
||||||
self.public_key_path = public_key_path
|
|
||||||
|
|
||||||
# Load RSA keys
|
# Load RSA keys
|
||||||
|
self.private_key_path = private_key_path
|
||||||
|
self.public_key_path = public_key_path
|
||||||
self._load_rsa_keys()
|
self._load_rsa_keys()
|
||||||
|
|
||||||
def _load_rsa_keys(self):
|
def _load_rsa_keys(self):
|
||||||
|
|
|
||||||
|
|
@ -454,9 +454,28 @@ def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[It
|
||||||
|
|
||||||
|
|
||||||
def copy_sample_documents(*, force: bool = False) -> None:
|
def copy_sample_documents(*, force: bool = False) -> None:
|
||||||
"""Copy sample documents from package to centralized directory if they don't exist."""
|
"""Copy sample documents from package to host directory.
|
||||||
from utils.paths import get_documents_dir
|
|
||||||
documents_dir = get_documents_dir()
|
Uses the first path from OPENRAG_DOCUMENTS_PATHS env var.
|
||||||
|
Defaults to ~/.openrag/documents/openrag-documents if not configured.
|
||||||
|
"""
|
||||||
|
from .managers.env_manager import EnvManager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Get the configured documents path from env
|
||||||
|
env_manager = EnvManager()
|
||||||
|
env_manager.load_existing_env()
|
||||||
|
|
||||||
|
# Parse the first path from the documents paths config
|
||||||
|
documents_path_str = env_manager.config.openrag_documents_paths
|
||||||
|
if documents_path_str:
|
||||||
|
first_path = documents_path_str.split(',')[0].strip()
|
||||||
|
documents_dir = Path(first_path).expanduser()
|
||||||
|
else:
|
||||||
|
# Default fallback
|
||||||
|
documents_dir = Path.home() / ".openrag" / "documents" / "openrag-documents"
|
||||||
|
|
||||||
|
documents_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assets_files = files("tui._assets.openrag-documents")
|
assets_files = files("tui._assets.openrag-documents")
|
||||||
|
|
@ -467,9 +486,15 @@ def copy_sample_documents(*, force: bool = False) -> None:
|
||||||
|
|
||||||
|
|
||||||
def copy_sample_flows(*, force: bool = False) -> None:
|
def copy_sample_flows(*, force: bool = False) -> None:
|
||||||
"""Copy sample flows from package to centralized directory if they don't exist."""
|
"""Copy sample flows from package to host directory.
|
||||||
from utils.paths import get_flows_dir
|
|
||||||
flows_dir = get_flows_dir()
|
Flows are placed in ~/.openrag/flows/ which will be volume-mounted to containers.
|
||||||
|
"""
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Flows always go to ~/.openrag/flows/ - this will be volume-mounted
|
||||||
|
flows_dir = Path.home() / ".openrag" / "flows"
|
||||||
|
flows_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assets_files = files("tui._assets.flows")
|
assets_files = files("tui._assets.flows")
|
||||||
|
|
@ -516,6 +541,32 @@ def copy_compose_files(*, force: bool = False) -> None:
|
||||||
logger.debug(f"Could not copy compose file {filename}: {error}")
|
logger.debug(f"Could not copy compose file {filename}: {error}")
|
||||||
|
|
||||||
|
|
||||||
|
def setup_host_directories():
|
||||||
|
"""Initialize OpenRAG directory structure on the host.
|
||||||
|
|
||||||
|
Creates directories that will be volume-mounted into containers:
|
||||||
|
- ~/.openrag/documents/openrag-documents/ (for document ingestion)
|
||||||
|
- ~/.openrag/flows/ (for Langflow flows)
|
||||||
|
- ~/.openrag/keys/ (for JWT keys)
|
||||||
|
- ~/.openrag/config/ (for configuration)
|
||||||
|
- ~/.openrag/data/opensearch-data/ (for OpenSearch data)
|
||||||
|
"""
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
base_dir = Path.home() / ".openrag"
|
||||||
|
directories = [
|
||||||
|
base_dir / "documents" / "openrag-documents",
|
||||||
|
base_dir / "flows",
|
||||||
|
base_dir / "keys",
|
||||||
|
base_dir / "config",
|
||||||
|
base_dir / "data" / "opensearch-data",
|
||||||
|
]
|
||||||
|
|
||||||
|
for directory in directories:
|
||||||
|
directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
logger.debug(f"Ensured directory exists: {directory}")
|
||||||
|
|
||||||
|
|
||||||
def run_tui():
|
def run_tui():
|
||||||
"""Run the OpenRAG TUI application."""
|
"""Run the OpenRAG TUI application."""
|
||||||
# Check for native Windows before launching TUI
|
# Check for native Windows before launching TUI
|
||||||
|
|
@ -532,6 +583,9 @@ def run_tui():
|
||||||
|
|
||||||
app = None
|
app = None
|
||||||
try:
|
try:
|
||||||
|
# Initialize host directory structure
|
||||||
|
setup_host_directories()
|
||||||
|
|
||||||
# Keep bundled assets aligned with the packaged versions
|
# Keep bundled assets aligned with the packaged versions
|
||||||
copy_sample_documents(force=True)
|
copy_sample_documents(force=True)
|
||||||
copy_sample_flows(force=True)
|
copy_sample_flows(force=True)
|
||||||
|
|
|
||||||
|
|
@ -1,285 +0,0 @@
|
||||||
"""Migration utilities for moving OpenRAG files to centralized location.
|
|
||||||
|
|
||||||
This module handles migration of files from legacy locations (current working directory)
|
|
||||||
to the new centralized location (~/.openrag/).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional, List, Dict
|
|
||||||
from utils.logging_config import get_logger
|
|
||||||
from utils.container_utils import detect_container_environment
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def get_migration_marker_file() -> Path:
|
|
||||||
"""Get the path to the migration marker file.
|
|
||||||
|
|
||||||
This file is created after a successful migration to prevent repeated migrations.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to migration marker file
|
|
||||||
"""
|
|
||||||
from utils.paths import get_openrag_home
|
|
||||||
return get_openrag_home() / ".migrated"
|
|
||||||
|
|
||||||
|
|
||||||
def is_migration_needed() -> bool:
|
|
||||||
"""Check if migration is needed.
|
|
||||||
|
|
||||||
Migration is not needed if:
|
|
||||||
- We're in a container environment
|
|
||||||
- Migration has already been completed (marker file exists)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if migration should be performed, False otherwise
|
|
||||||
"""
|
|
||||||
# Don't migrate in container environments
|
|
||||||
if detect_container_environment():
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check if migration has already been completed
|
|
||||||
marker_file = get_migration_marker_file()
|
|
||||||
if marker_file.exists():
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check if any legacy files exist
|
|
||||||
from utils.paths import get_legacy_paths
|
|
||||||
legacy_paths = get_legacy_paths()
|
|
||||||
|
|
||||||
for name, path in legacy_paths.items():
|
|
||||||
if path.exists():
|
|
||||||
logger.info(f"Found legacy file/directory: {path}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_directory(src: Path, dst: Path, description: str) -> bool:
|
|
||||||
"""Migrate a directory from source to destination.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
src: Source directory path
|
|
||||||
dst: Destination directory path
|
|
||||||
description: Human-readable description for logging
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if migration was successful or not needed, False otherwise
|
|
||||||
"""
|
|
||||||
if not src.exists():
|
|
||||||
logger.debug(f"Source directory does not exist, skipping: {src}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
if not src.is_dir():
|
|
||||||
logger.warning(f"Source is not a directory: {src}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Ensure parent directory exists
|
|
||||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# If destination already exists, merge contents
|
|
||||||
if dst.exists():
|
|
||||||
logger.info(f"Destination already exists, merging: {dst}")
|
|
||||||
# Copy contents recursively
|
|
||||||
for item in src.iterdir():
|
|
||||||
src_item = src / item.name
|
|
||||||
dst_item = dst / item.name
|
|
||||||
|
|
||||||
if src_item.is_dir():
|
|
||||||
if not dst_item.exists():
|
|
||||||
shutil.copytree(src_item, dst_item)
|
|
||||||
logger.debug(f"Copied directory: {src_item} -> {dst_item}")
|
|
||||||
else:
|
|
||||||
if not dst_item.exists():
|
|
||||||
shutil.copy2(src_item, dst_item)
|
|
||||||
logger.debug(f"Copied file: {src_item} -> {dst_item}")
|
|
||||||
else:
|
|
||||||
# Move entire directory
|
|
||||||
shutil.move(str(src), str(dst))
|
|
||||||
logger.info(f"Migrated {description}: {src} -> {dst}")
|
|
||||||
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to migrate {description} from {src} to {dst}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_file(src: Path, dst: Path, description: str) -> bool:
|
|
||||||
"""Migrate a file from source to destination.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
src: Source file path
|
|
||||||
dst: Destination file path
|
|
||||||
description: Human-readable description for logging
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if migration was successful or not needed, False otherwise
|
|
||||||
"""
|
|
||||||
if not src.exists():
|
|
||||||
logger.debug(f"Source file does not exist, skipping: {src}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
if not src.is_file():
|
|
||||||
logger.warning(f"Source is not a file: {src}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Ensure parent directory exists
|
|
||||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# Only copy if destination doesn't exist
|
|
||||||
if dst.exists():
|
|
||||||
logger.debug(f"Destination already exists, skipping: {dst}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Copy the file
|
|
||||||
shutil.copy2(src, dst)
|
|
||||||
logger.info(f"Migrated {description}: {src} -> {dst}")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to migrate {description} from {src} to {dst}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def perform_migration() -> Dict[str, bool]:
|
|
||||||
"""Perform migration of all OpenRAG files to centralized location.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary mapping resource names to migration success status
|
|
||||||
"""
|
|
||||||
if not is_migration_needed():
|
|
||||||
logger.debug("Migration not needed or already completed")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
logger.info("Starting migration of OpenRAG files to centralized location")
|
|
||||||
|
|
||||||
from utils.paths import (
|
|
||||||
get_config_file,
|
|
||||||
get_keys_dir,
|
|
||||||
get_documents_dir,
|
|
||||||
get_flows_dir,
|
|
||||||
get_tui_env_file,
|
|
||||||
get_tui_compose_file,
|
|
||||||
get_opensearch_data_dir,
|
|
||||||
get_legacy_paths,
|
|
||||||
)
|
|
||||||
|
|
||||||
legacy_paths = get_legacy_paths()
|
|
||||||
results = {}
|
|
||||||
|
|
||||||
# Migrate configuration file
|
|
||||||
if legacy_paths["config"].exists():
|
|
||||||
results["config"] = migrate_file(
|
|
||||||
legacy_paths["config"],
|
|
||||||
get_config_file(),
|
|
||||||
"configuration file"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Migrate JWT keys directory
|
|
||||||
if legacy_paths["keys_dir"].exists():
|
|
||||||
results["keys"] = migrate_directory(
|
|
||||||
legacy_paths["keys_dir"],
|
|
||||||
get_keys_dir(),
|
|
||||||
"JWT keys directory"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Migrate documents directory
|
|
||||||
if legacy_paths["documents"].exists():
|
|
||||||
results["documents"] = migrate_directory(
|
|
||||||
legacy_paths["documents"],
|
|
||||||
get_documents_dir(),
|
|
||||||
"documents directory"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Migrate flows directory
|
|
||||||
if legacy_paths["flows"].exists():
|
|
||||||
results["flows"] = migrate_directory(
|
|
||||||
legacy_paths["flows"],
|
|
||||||
get_flows_dir(),
|
|
||||||
"flows directory"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Migrate TUI .env file
|
|
||||||
if legacy_paths["tui_env"].exists():
|
|
||||||
results["tui_env"] = migrate_file(
|
|
||||||
legacy_paths["tui_env"],
|
|
||||||
get_tui_env_file(),
|
|
||||||
"TUI .env file"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Migrate docker-compose files
|
|
||||||
if legacy_paths["tui_compose"].exists():
|
|
||||||
results["tui_compose"] = migrate_file(
|
|
||||||
legacy_paths["tui_compose"],
|
|
||||||
get_tui_compose_file(gpu=False),
|
|
||||||
"docker-compose.yml"
|
|
||||||
)
|
|
||||||
|
|
||||||
if legacy_paths["tui_compose_gpu"].exists():
|
|
||||||
results["tui_compose_gpu"] = migrate_file(
|
|
||||||
legacy_paths["tui_compose_gpu"],
|
|
||||||
get_tui_compose_file(gpu=True),
|
|
||||||
"docker-compose.gpu.yml"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Note: We don't migrate opensearch-data as it's typically large and managed by Docker
|
|
||||||
# Users can manually move it if needed, or specify a custom path via env var
|
|
||||||
|
|
||||||
# Create migration marker file
|
|
||||||
marker_file = get_migration_marker_file()
|
|
||||||
try:
|
|
||||||
marker_file.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
marker_file.write_text("Migration completed successfully\n")
|
|
||||||
logger.info("Migration marker file created")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to create migration marker file: {e}")
|
|
||||||
|
|
||||||
# Log summary
|
|
||||||
successful = sum(1 for success in results.values() if success)
|
|
||||||
total = len(results)
|
|
||||||
logger.info(f"Migration completed: {successful}/{total} items migrated successfully")
|
|
||||||
|
|
||||||
if successful < total:
|
|
||||||
logger.warning("Some migrations failed. Check logs for details.")
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_legacy_files(dry_run: bool = True) -> List[str]:
|
|
||||||
"""Clean up legacy files after successful migration.
|
|
||||||
|
|
||||||
This function removes the old files from the current working directory after
|
|
||||||
confirming they have been successfully migrated.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
dry_run: If True, only list files that would be removed without actually removing them
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of file paths that were (or would be) removed
|
|
||||||
"""
|
|
||||||
from utils.paths import get_legacy_paths
|
|
||||||
|
|
||||||
legacy_paths = get_legacy_paths()
|
|
||||||
removed_files = []
|
|
||||||
|
|
||||||
for name, path in legacy_paths.items():
|
|
||||||
if not path.exists():
|
|
||||||
continue
|
|
||||||
|
|
||||||
if dry_run:
|
|
||||||
logger.info(f"Would remove: {path}")
|
|
||||||
removed_files.append(str(path))
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
if path.is_dir():
|
|
||||||
shutil.rmtree(path)
|
|
||||||
else:
|
|
||||||
path.unlink()
|
|
||||||
logger.info(f"Removed legacy file/directory: {path}")
|
|
||||||
removed_files.append(str(path))
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to remove {path}: {e}")
|
|
||||||
|
|
||||||
return removed_files
|
|
||||||
|
|
@ -1,157 +1,30 @@
|
||||||
"""Centralized path management for OpenRAG.
|
"""Host-side path management for OpenRAG TUI.
|
||||||
|
|
||||||
This module provides functions to get standardized paths for OpenRAG files and directories.
|
This module provides functions for TUI to get standardized paths on the host machine.
|
||||||
All paths are centralized under ~/.openrag/ to avoid cluttering the user's current working directory.
|
All TUI files are centralized under ~/.openrag/ to avoid cluttering the user's CWD.
|
||||||
|
|
||||||
|
Note: This module is for HOST-SIDE (TUI) use only. Container code should not use these paths.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from utils.logging_config import get_logger
|
|
||||||
from utils.container_utils import detect_container_environment
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def get_openrag_home() -> Path:
|
def get_openrag_home() -> Path:
|
||||||
"""Get the OpenRAG home directory.
|
"""Get the OpenRAG home directory on the host.
|
||||||
|
|
||||||
In containers: Uses current working directory (for backward compatibility)
|
|
||||||
In local environments: Uses ~/.openrag/
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to OpenRAG home directory
|
Path to ~/.openrag/ directory
|
||||||
"""
|
"""
|
||||||
# In container environments, use the container's working directory
|
|
||||||
# This maintains backward compatibility with existing Docker setups
|
|
||||||
container_env = detect_container_environment()
|
|
||||||
if container_env:
|
|
||||||
# In containers, return the container's working directory.
|
|
||||||
# This ensures compatibility with existing Docker volume mounts,
|
|
||||||
# as Docker typically mounts volumes into the working directory.
|
|
||||||
return Path.cwd()
|
|
||||||
|
|
||||||
# In local environments, use centralized location
|
|
||||||
home_dir = Path.home() / ".openrag"
|
home_dir = Path.home() / ".openrag"
|
||||||
home_dir.mkdir(parents=True, exist_ok=True)
|
home_dir.mkdir(parents=True, exist_ok=True)
|
||||||
return home_dir
|
return home_dir
|
||||||
|
|
||||||
|
|
||||||
def get_config_dir() -> Path:
|
|
||||||
"""Get the configuration directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to config directory (~/.openrag/config/ or ./config/ in containers)
|
|
||||||
"""
|
|
||||||
config_dir = get_openrag_home() / "config"
|
|
||||||
config_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
return config_dir
|
|
||||||
|
|
||||||
|
|
||||||
def get_config_file() -> Path:
|
|
||||||
"""Get the configuration file path.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to config.yaml file
|
|
||||||
"""
|
|
||||||
return get_config_dir() / "config.yaml"
|
|
||||||
|
|
||||||
|
|
||||||
def get_keys_dir() -> Path:
|
|
||||||
"""Get the JWT keys directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to keys directory (~/.openrag/keys/ or ./keys/ in containers)
|
|
||||||
"""
|
|
||||||
keys_dir = get_openrag_home() / "keys"
|
|
||||||
keys_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
return keys_dir
|
|
||||||
|
|
||||||
|
|
||||||
def get_private_key_path() -> Path:
|
|
||||||
"""Get the JWT private key path.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to private_key.pem
|
|
||||||
"""
|
|
||||||
return get_keys_dir() / "private_key.pem"
|
|
||||||
|
|
||||||
|
|
||||||
def get_public_key_path() -> Path:
|
|
||||||
"""Get the JWT public key path.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to public_key.pem
|
|
||||||
"""
|
|
||||||
return get_keys_dir() / "public_key.pem"
|
|
||||||
|
|
||||||
|
|
||||||
def get_documents_dir() -> Path:
|
|
||||||
"""Get the documents directory for default document ingestion.
|
|
||||||
|
|
||||||
In containers: Uses /app/openrag-documents (Docker volume mount)
|
|
||||||
In local environments: Uses ~/.openrag/documents/openrag-documents
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to documents directory
|
|
||||||
"""
|
|
||||||
container_env = detect_container_environment()
|
|
||||||
if container_env:
|
|
||||||
# In containers, use the Docker volume mount path
|
|
||||||
return Path("/app/openrag-documents")
|
|
||||||
|
|
||||||
# In local environments, use centralized location
|
|
||||||
documents_dir = get_openrag_home() / "documents" / "openrag-documents"
|
|
||||||
documents_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
return documents_dir
|
|
||||||
|
|
||||||
|
|
||||||
def get_flows_dir() -> Path:
|
|
||||||
"""Get the flows directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to flows directory (~/.openrag/flows/ or ./flows/ in containers)
|
|
||||||
"""
|
|
||||||
flows_dir = get_openrag_home() / "flows"
|
|
||||||
flows_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
return flows_dir
|
|
||||||
|
|
||||||
|
|
||||||
def get_flows_backup_dir() -> Path:
|
|
||||||
"""Get the flows backup directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to flows/backup directory
|
|
||||||
"""
|
|
||||||
backup_dir = get_flows_dir() / "backup"
|
|
||||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
return backup_dir
|
|
||||||
|
|
||||||
|
|
||||||
def get_data_dir() -> Path:
|
|
||||||
"""Get the data directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to data directory (~/.openrag/data/ or ./data/ in containers)
|
|
||||||
"""
|
|
||||||
data_dir = get_openrag_home() / "data"
|
|
||||||
data_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
return data_dir
|
|
||||||
|
|
||||||
|
|
||||||
def get_opensearch_data_dir() -> Path:
|
|
||||||
"""Get the OpenSearch data directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to OpenSearch data directory
|
|
||||||
"""
|
|
||||||
return get_data_dir() / "opensearch-data"
|
|
||||||
|
|
||||||
|
|
||||||
def get_tui_dir() -> Path:
|
def get_tui_dir() -> Path:
|
||||||
"""Get the TUI directory for TUI-specific files.
|
"""Get the TUI directory for TUI-specific files.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to tui directory (~/.openrag/tui/ or ./tui/ in containers)
|
Path to ~/.openrag/tui/ directory
|
||||||
"""
|
"""
|
||||||
tui_dir = get_openrag_home() / "tui"
|
tui_dir = get_openrag_home() / "tui"
|
||||||
tui_dir.mkdir(parents=True, exist_ok=True)
|
tui_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
@ -162,7 +35,7 @@ def get_tui_env_file() -> Path:
|
||||||
"""Get the TUI .env file path.
|
"""Get the TUI .env file path.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to .env file
|
Path to ~/.openrag/tui/.env file
|
||||||
"""
|
"""
|
||||||
return get_tui_dir() / ".env"
|
return get_tui_dir() / ".env"
|
||||||
|
|
||||||
|
|
@ -174,29 +47,21 @@ def get_tui_compose_file(gpu: bool = False) -> Path:
|
||||||
gpu: If True, returns path to docker-compose.gpu.yml
|
gpu: If True, returns path to docker-compose.gpu.yml
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to docker-compose file
|
Path to docker-compose file in ~/.openrag/tui/
|
||||||
"""
|
"""
|
||||||
filename = "docker-compose.gpu.yml" if gpu else "docker-compose.yml"
|
filename = "docker-compose.gpu.yml" if gpu else "docker-compose.yml"
|
||||||
return get_tui_dir() / filename
|
return get_tui_dir() / filename
|
||||||
|
|
||||||
|
|
||||||
# Backward compatibility functions for migration
|
|
||||||
def get_legacy_paths() -> dict:
|
def get_legacy_paths() -> dict:
|
||||||
"""Get legacy (old) paths for migration purposes.
|
"""Get legacy (CWD-based) paths for migration purposes.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping resource names to their old paths
|
Dictionary mapping resource names to their old CWD-based paths
|
||||||
"""
|
"""
|
||||||
cwd = Path.cwd()
|
cwd = Path.cwd()
|
||||||
return {
|
return {
|
||||||
"config": cwd / "config" / "config.yaml",
|
|
||||||
"keys_dir": cwd / "keys",
|
|
||||||
"private_key": cwd / "keys" / "private_key.pem",
|
|
||||||
"public_key": cwd / "keys" / "public_key.pem",
|
|
||||||
"documents": cwd / "openrag-documents",
|
|
||||||
"flows": cwd / "flows",
|
|
||||||
"tui_env": cwd / ".env",
|
"tui_env": cwd / ".env",
|
||||||
"tui_compose": cwd / "docker-compose.yml",
|
"tui_compose": cwd / "docker-compose.yml",
|
||||||
"tui_compose_gpu": cwd / "docker-compose.gpu.yml",
|
"tui_compose_gpu": cwd / "docker-compose.gpu.yml",
|
||||||
"opensearch_data": cwd / "opensearch-data",
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue