diff --git a/src/config/config_manager.py b/src/config/config_manager.py index a8434554..e5231889 100644 --- a/src/config/config_manager.py +++ b/src/config/config_manager.py @@ -1,13 +1,11 @@ """Configuration management for OpenRAG.""" import os -import shutil import yaml from pathlib import Path from typing import Dict, Any, Optional from dataclasses import dataclass, asdict from utils.logging_config import get_logger -from utils.paths import get_config_file, get_legacy_paths logger = get_logger(__name__) @@ -132,40 +130,10 @@ class ConfigManager: """Initialize configuration manager. Args: - config_file: Path to configuration file. Defaults to centralized location. + config_file: Path to configuration file. Defaults to 'config.yaml' in project root. """ - if config_file: - self.config_file = Path(config_file) - else: - # Use centralized location - self.config_file = get_config_file() - - # Check for legacy location and migrate if needed - legacy_config = get_legacy_paths()["config"] - if not self.config_file.exists() and legacy_config.exists(): - self._migrate_from_legacy(legacy_config) - + self.config_file = Path(config_file) if config_file else Path("config/config.yaml") self._config: Optional[OpenRAGConfig] = None - - def _migrate_from_legacy(self, legacy_config_path: Path) -> None: - """Migrate configuration from legacy location to centralized location. - - Args: - legacy_config_path: Path to legacy config file - """ - try: - logger.info( - f"Migrating configuration from {legacy_config_path} to {self.config_file}" - ) - # Ensure parent directory exists - self.config_file.parent.mkdir(parents=True, exist_ok=True) - # Copy the config file - shutil.copy2(legacy_config_path, self.config_file) - logger.info("Configuration migration completed successfully") - except Exception as e: - logger.warning( - f"Failed to migrate configuration from {legacy_config_path}: {e}" - ) def load_config(self) -> OpenRAGConfig: """Load configuration from environment variables and config file. diff --git a/src/main.py b/src/main.py index 89723e22..1c3d065e 100644 --- a/src/main.py +++ b/src/main.py @@ -246,39 +246,25 @@ async def init_index(): def generate_jwt_keys(): """Generate RSA keys for JWT signing if they don't exist""" - from utils.paths import get_keys_dir, get_private_key_path, get_public_key_path, get_legacy_paths - - # Use centralized keys directory - keys_dir = get_keys_dir() - private_key_path = get_private_key_path() - public_key_path = get_public_key_path() - - # Check for legacy keys and migrate if needed - legacy_paths = get_legacy_paths() - if not private_key_path.exists() and legacy_paths["private_key"].exists(): - logger.info(f"Migrating JWT keys from {legacy_paths['keys_dir']} to {keys_dir}") - try: - shutil.copy2(legacy_paths["private_key"], private_key_path) - if legacy_paths["public_key"].exists(): - shutil.copy2(legacy_paths["public_key"], public_key_path) - logger.info("JWT keys migration completed successfully") - except Exception as e: - logger.warning(f"Failed to migrate JWT keys: {e}") - - # Ensure keys directory exists (already done by get_keys_dir) + keys_dir = "keys" + private_key_path = os.path.join(keys_dir, "private_key.pem") + public_key_path = os.path.join(keys_dir, "public_key.pem") + + # Create keys directory if it doesn't exist + os.makedirs(keys_dir, exist_ok=True) # Generate keys if they don't exist - if not private_key_path.exists(): + if not os.path.exists(private_key_path): try: # Generate private key subprocess.run( - ["openssl", "genrsa", "-out", str(private_key_path), "2048"], + ["openssl", "genrsa", "-out", private_key_path, "2048"], check=True, capture_output=True, ) # Set restrictive permissions on private key (readable by owner only) - os.chmod(str(private_key_path), 0o600) + os.chmod(private_key_path, 0o600) # Generate public key subprocess.run( @@ -286,17 +272,17 @@ def generate_jwt_keys(): "openssl", "rsa", "-in", - str(private_key_path), + private_key_path, "-pubout", "-out", - str(public_key_path), + public_key_path, ], check=True, capture_output=True, ) # Set permissions on public key (readable by all) - os.chmod(str(public_key_path), 0o644) + os.chmod(public_key_path, 0o644) logger.info("Generated RSA keys for JWT signing") except subprocess.CalledProcessError as e: @@ -306,8 +292,8 @@ def generate_jwt_keys(): else: # Ensure correct permissions on existing keys try: - os.chmod(str(private_key_path), 0o600) - os.chmod(str(public_key_path), 0o644) + os.chmod(private_key_path, 0o600) + os.chmod(public_key_path, 0o644) logger.info("RSA keys already exist, ensured correct permissions") except OSError as e: logger.warning("Failed to set permissions on existing keys", error=str(e)) @@ -328,18 +314,17 @@ async def init_index_when_ready(): def _get_documents_dir(): """Get the documents directory path, handling both Docker and local environments.""" - from utils.paths import get_documents_dir - - # Use centralized path utility which handles both container and local environments - path = get_documents_dir() + # In Docker, the volume is mounted at /app/openrag-documents + # Locally, we use openrag-documents container_env = detect_container_environment() - if container_env: + path = os.path.abspath("/app/openrag-documents") logger.debug(f"Running in {container_env}, using container path: {path}") + return path else: - logger.debug(f"Running locally, using centralized path: {path}") - - return str(path) + path = os.path.abspath(os.path.join(os.getcwd(), "openrag-documents")) + logger.debug(f"Running locally, using local path: {path}") + return path async def ingest_default_documents_when_ready(services): @@ -575,16 +560,6 @@ async def startup_tasks(services): async def initialize_services(): """Initialize all services and their dependencies""" await TelemetryClient.send_event(Category.SERVICE_INITIALIZATION, MessageId.ORB_SVC_INIT_START) - - # Perform migration if needed (move files from old locations to ~/.openrag) - from utils.migration import perform_migration - try: - migration_results = perform_migration() - if migration_results: - logger.info("File migration completed", results=migration_results) - except Exception as e: - logger.warning(f"Migration failed, some files may still be in legacy locations. Consider manual migration. Error: {e}") - # Generate JWT keys if they don't exist generate_jwt_keys() diff --git a/src/services/flows_service.py b/src/services/flows_service.py index ebf147b4..e97ac2d3 100644 --- a/src/services/flows_service.py +++ b/src/services/flows_service.py @@ -40,13 +40,17 @@ class FlowsService: def _get_flows_directory(self): """Get the flows directory path""" - from utils.paths import get_flows_dir - return str(get_flows_dir()) + current_file_dir = os.path.dirname(os.path.abspath(__file__)) # src/services/ + src_dir = os.path.dirname(current_file_dir) # src/ + project_root = os.path.dirname(src_dir) # project root + return os.path.join(project_root, "flows") def _get_backup_directory(self): """Get the backup directory path""" - from utils.paths import get_flows_backup_dir - return str(get_flows_backup_dir()) + flows_dir = self._get_flows_directory() + backup_dir = os.path.join(flows_dir, "backup") + os.makedirs(backup_dir, exist_ok=True) + return backup_dir def _get_latest_backup_path(self, flow_id: str, flow_type: str): """ diff --git a/src/session_manager.py b/src/session_manager.py index 1cf343e6..6b2023d5 100644 --- a/src/session_manager.py +++ b/src/session_manager.py @@ -51,29 +51,18 @@ class SessionManager: def __init__( self, secret_key: str = None, - private_key_path: str = None, - public_key_path: str = None, + private_key_path: str = "keys/private_key.pem", + public_key_path: str = "keys/public_key.pem", ): - from utils.paths import get_private_key_path, get_public_key_path - self.secret_key = secret_key # Keep for backward compatibility self.users: Dict[str, User] = {} # user_id -> User self.user_opensearch_clients: Dict[ str, Any ] = {} # user_id -> OpenSearch client - # Use centralized key paths if not explicitly provided - if private_key_path is None: - self.private_key_path = str(get_private_key_path()) - else: - self.private_key_path = private_key_path - - if public_key_path is None: - self.public_key_path = str(get_public_key_path()) - else: - self.public_key_path = public_key_path - # Load RSA keys + self.private_key_path = private_key_path + self.public_key_path = public_key_path self._load_rsa_keys() def _load_rsa_keys(self): diff --git a/src/tui/main.py b/src/tui/main.py index 8dec2a2c..9623f50e 100644 --- a/src/tui/main.py +++ b/src/tui/main.py @@ -454,9 +454,28 @@ def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[It def copy_sample_documents(*, force: bool = False) -> None: - """Copy sample documents from package to centralized directory if they don't exist.""" - from utils.paths import get_documents_dir - documents_dir = get_documents_dir() + """Copy sample documents from package to host directory. + + Uses the first path from OPENRAG_DOCUMENTS_PATHS env var. + Defaults to ~/.openrag/documents/openrag-documents if not configured. + """ + from .managers.env_manager import EnvManager + from pathlib import Path + + # Get the configured documents path from env + env_manager = EnvManager() + env_manager.load_existing_env() + + # Parse the first path from the documents paths config + documents_path_str = env_manager.config.openrag_documents_paths + if documents_path_str: + first_path = documents_path_str.split(',')[0].strip() + documents_dir = Path(first_path).expanduser() + else: + # Default fallback + documents_dir = Path.home() / ".openrag" / "documents" / "openrag-documents" + + documents_dir.mkdir(parents=True, exist_ok=True) try: assets_files = files("tui._assets.openrag-documents") @@ -467,9 +486,15 @@ def copy_sample_documents(*, force: bool = False) -> None: def copy_sample_flows(*, force: bool = False) -> None: - """Copy sample flows from package to centralized directory if they don't exist.""" - from utils.paths import get_flows_dir - flows_dir = get_flows_dir() + """Copy sample flows from package to host directory. + + Flows are placed in ~/.openrag/flows/ which will be volume-mounted to containers. + """ + from pathlib import Path + + # Flows always go to ~/.openrag/flows/ - this will be volume-mounted + flows_dir = Path.home() / ".openrag" / "flows" + flows_dir.mkdir(parents=True, exist_ok=True) try: assets_files = files("tui._assets.flows") @@ -516,6 +541,32 @@ def copy_compose_files(*, force: bool = False) -> None: logger.debug(f"Could not copy compose file {filename}: {error}") +def setup_host_directories(): + """Initialize OpenRAG directory structure on the host. + + Creates directories that will be volume-mounted into containers: + - ~/.openrag/documents/openrag-documents/ (for document ingestion) + - ~/.openrag/flows/ (for Langflow flows) + - ~/.openrag/keys/ (for JWT keys) + - ~/.openrag/config/ (for configuration) + - ~/.openrag/data/opensearch-data/ (for OpenSearch data) + """ + from pathlib import Path + + base_dir = Path.home() / ".openrag" + directories = [ + base_dir / "documents" / "openrag-documents", + base_dir / "flows", + base_dir / "keys", + base_dir / "config", + base_dir / "data" / "opensearch-data", + ] + + for directory in directories: + directory.mkdir(parents=True, exist_ok=True) + logger.debug(f"Ensured directory exists: {directory}") + + def run_tui(): """Run the OpenRAG TUI application.""" # Check for native Windows before launching TUI @@ -532,6 +583,9 @@ def run_tui(): app = None try: + # Initialize host directory structure + setup_host_directories() + # Keep bundled assets aligned with the packaged versions copy_sample_documents(force=True) copy_sample_flows(force=True) diff --git a/src/utils/migration.py b/src/utils/migration.py deleted file mode 100644 index 6f00daa3..00000000 --- a/src/utils/migration.py +++ /dev/null @@ -1,285 +0,0 @@ -"""Migration utilities for moving OpenRAG files to centralized location. - -This module handles migration of files from legacy locations (current working directory) -to the new centralized location (~/.openrag/). -""" - -import shutil -from pathlib import Path -from typing import Optional, List, Dict -from utils.logging_config import get_logger -from utils.container_utils import detect_container_environment - -logger = get_logger(__name__) - - -def get_migration_marker_file() -> Path: - """Get the path to the migration marker file. - - This file is created after a successful migration to prevent repeated migrations. - - Returns: - Path to migration marker file - """ - from utils.paths import get_openrag_home - return get_openrag_home() / ".migrated" - - -def is_migration_needed() -> bool: - """Check if migration is needed. - - Migration is not needed if: - - We're in a container environment - - Migration has already been completed (marker file exists) - - Returns: - True if migration should be performed, False otherwise - """ - # Don't migrate in container environments - if detect_container_environment(): - return False - - # Check if migration has already been completed - marker_file = get_migration_marker_file() - if marker_file.exists(): - return False - - # Check if any legacy files exist - from utils.paths import get_legacy_paths - legacy_paths = get_legacy_paths() - - for name, path in legacy_paths.items(): - if path.exists(): - logger.info(f"Found legacy file/directory: {path}") - return True - - return False - - -def migrate_directory(src: Path, dst: Path, description: str) -> bool: - """Migrate a directory from source to destination. - - Args: - src: Source directory path - dst: Destination directory path - description: Human-readable description for logging - - Returns: - True if migration was successful or not needed, False otherwise - """ - if not src.exists(): - logger.debug(f"Source directory does not exist, skipping: {src}") - return True - - if not src.is_dir(): - logger.warning(f"Source is not a directory: {src}") - return False - - try: - # Ensure parent directory exists - dst.parent.mkdir(parents=True, exist_ok=True) - - # If destination already exists, merge contents - if dst.exists(): - logger.info(f"Destination already exists, merging: {dst}") - # Copy contents recursively - for item in src.iterdir(): - src_item = src / item.name - dst_item = dst / item.name - - if src_item.is_dir(): - if not dst_item.exists(): - shutil.copytree(src_item, dst_item) - logger.debug(f"Copied directory: {src_item} -> {dst_item}") - else: - if not dst_item.exists(): - shutil.copy2(src_item, dst_item) - logger.debug(f"Copied file: {src_item} -> {dst_item}") - else: - # Move entire directory - shutil.move(str(src), str(dst)) - logger.info(f"Migrated {description}: {src} -> {dst}") - - return True - except Exception as e: - logger.error(f"Failed to migrate {description} from {src} to {dst}: {e}") - return False - - -def migrate_file(src: Path, dst: Path, description: str) -> bool: - """Migrate a file from source to destination. - - Args: - src: Source file path - dst: Destination file path - description: Human-readable description for logging - - Returns: - True if migration was successful or not needed, False otherwise - """ - if not src.exists(): - logger.debug(f"Source file does not exist, skipping: {src}") - return True - - if not src.is_file(): - logger.warning(f"Source is not a file: {src}") - return False - - try: - # Ensure parent directory exists - dst.parent.mkdir(parents=True, exist_ok=True) - - # Only copy if destination doesn't exist - if dst.exists(): - logger.debug(f"Destination already exists, skipping: {dst}") - return True - - # Copy the file - shutil.copy2(src, dst) - logger.info(f"Migrated {description}: {src} -> {dst}") - return True - except Exception as e: - logger.error(f"Failed to migrate {description} from {src} to {dst}: {e}") - return False - - -def perform_migration() -> Dict[str, bool]: - """Perform migration of all OpenRAG files to centralized location. - - Returns: - Dictionary mapping resource names to migration success status - """ - if not is_migration_needed(): - logger.debug("Migration not needed or already completed") - return {} - - logger.info("Starting migration of OpenRAG files to centralized location") - - from utils.paths import ( - get_config_file, - get_keys_dir, - get_documents_dir, - get_flows_dir, - get_tui_env_file, - get_tui_compose_file, - get_opensearch_data_dir, - get_legacy_paths, - ) - - legacy_paths = get_legacy_paths() - results = {} - - # Migrate configuration file - if legacy_paths["config"].exists(): - results["config"] = migrate_file( - legacy_paths["config"], - get_config_file(), - "configuration file" - ) - - # Migrate JWT keys directory - if legacy_paths["keys_dir"].exists(): - results["keys"] = migrate_directory( - legacy_paths["keys_dir"], - get_keys_dir(), - "JWT keys directory" - ) - - # Migrate documents directory - if legacy_paths["documents"].exists(): - results["documents"] = migrate_directory( - legacy_paths["documents"], - get_documents_dir(), - "documents directory" - ) - - # Migrate flows directory - if legacy_paths["flows"].exists(): - results["flows"] = migrate_directory( - legacy_paths["flows"], - get_flows_dir(), - "flows directory" - ) - - # Migrate TUI .env file - if legacy_paths["tui_env"].exists(): - results["tui_env"] = migrate_file( - legacy_paths["tui_env"], - get_tui_env_file(), - "TUI .env file" - ) - - # Migrate docker-compose files - if legacy_paths["tui_compose"].exists(): - results["tui_compose"] = migrate_file( - legacy_paths["tui_compose"], - get_tui_compose_file(gpu=False), - "docker-compose.yml" - ) - - if legacy_paths["tui_compose_gpu"].exists(): - results["tui_compose_gpu"] = migrate_file( - legacy_paths["tui_compose_gpu"], - get_tui_compose_file(gpu=True), - "docker-compose.gpu.yml" - ) - - # Note: We don't migrate opensearch-data as it's typically large and managed by Docker - # Users can manually move it if needed, or specify a custom path via env var - - # Create migration marker file - marker_file = get_migration_marker_file() - try: - marker_file.parent.mkdir(parents=True, exist_ok=True) - marker_file.write_text("Migration completed successfully\n") - logger.info("Migration marker file created") - except Exception as e: - logger.warning(f"Failed to create migration marker file: {e}") - - # Log summary - successful = sum(1 for success in results.values() if success) - total = len(results) - logger.info(f"Migration completed: {successful}/{total} items migrated successfully") - - if successful < total: - logger.warning("Some migrations failed. Check logs for details.") - - return results - - -def cleanup_legacy_files(dry_run: bool = True) -> List[str]: - """Clean up legacy files after successful migration. - - This function removes the old files from the current working directory after - confirming they have been successfully migrated. - - Args: - dry_run: If True, only list files that would be removed without actually removing them - - Returns: - List of file paths that were (or would be) removed - """ - from utils.paths import get_legacy_paths - - legacy_paths = get_legacy_paths() - removed_files = [] - - for name, path in legacy_paths.items(): - if not path.exists(): - continue - - if dry_run: - logger.info(f"Would remove: {path}") - removed_files.append(str(path)) - else: - try: - if path.is_dir(): - shutil.rmtree(path) - else: - path.unlink() - logger.info(f"Removed legacy file/directory: {path}") - removed_files.append(str(path)) - except Exception as e: - logger.error(f"Failed to remove {path}: {e}") - - return removed_files diff --git a/src/utils/paths.py b/src/utils/paths.py index 6a3e3eca..c621c8f8 100644 --- a/src/utils/paths.py +++ b/src/utils/paths.py @@ -1,157 +1,30 @@ -"""Centralized path management for OpenRAG. +"""Host-side path management for OpenRAG TUI. -This module provides functions to get standardized paths for OpenRAG files and directories. -All paths are centralized under ~/.openrag/ to avoid cluttering the user's current working directory. +This module provides functions for TUI to get standardized paths on the host machine. +All TUI files are centralized under ~/.openrag/ to avoid cluttering the user's CWD. + +Note: This module is for HOST-SIDE (TUI) use only. Container code should not use these paths. """ -import os from pathlib import Path -from utils.logging_config import get_logger -from utils.container_utils import detect_container_environment - -logger = get_logger(__name__) def get_openrag_home() -> Path: - """Get the OpenRAG home directory. - - In containers: Uses current working directory (for backward compatibility) - In local environments: Uses ~/.openrag/ + """Get the OpenRAG home directory on the host. Returns: - Path to OpenRAG home directory + Path to ~/.openrag/ directory """ - # In container environments, use the container's working directory - # This maintains backward compatibility with existing Docker setups - container_env = detect_container_environment() - if container_env: - # In containers, return the container's working directory. - # This ensures compatibility with existing Docker volume mounts, - # as Docker typically mounts volumes into the working directory. - return Path.cwd() - - # In local environments, use centralized location home_dir = Path.home() / ".openrag" home_dir.mkdir(parents=True, exist_ok=True) return home_dir -def get_config_dir() -> Path: - """Get the configuration directory. - - Returns: - Path to config directory (~/.openrag/config/ or ./config/ in containers) - """ - config_dir = get_openrag_home() / "config" - config_dir.mkdir(parents=True, exist_ok=True) - return config_dir - - -def get_config_file() -> Path: - """Get the configuration file path. - - Returns: - Path to config.yaml file - """ - return get_config_dir() / "config.yaml" - - -def get_keys_dir() -> Path: - """Get the JWT keys directory. - - Returns: - Path to keys directory (~/.openrag/keys/ or ./keys/ in containers) - """ - keys_dir = get_openrag_home() / "keys" - keys_dir.mkdir(parents=True, exist_ok=True) - return keys_dir - - -def get_private_key_path() -> Path: - """Get the JWT private key path. - - Returns: - Path to private_key.pem - """ - return get_keys_dir() / "private_key.pem" - - -def get_public_key_path() -> Path: - """Get the JWT public key path. - - Returns: - Path to public_key.pem - """ - return get_keys_dir() / "public_key.pem" - - -def get_documents_dir() -> Path: - """Get the documents directory for default document ingestion. - - In containers: Uses /app/openrag-documents (Docker volume mount) - In local environments: Uses ~/.openrag/documents/openrag-documents - - Returns: - Path to documents directory - """ - container_env = detect_container_environment() - if container_env: - # In containers, use the Docker volume mount path - return Path("/app/openrag-documents") - - # In local environments, use centralized location - documents_dir = get_openrag_home() / "documents" / "openrag-documents" - documents_dir.mkdir(parents=True, exist_ok=True) - return documents_dir - - -def get_flows_dir() -> Path: - """Get the flows directory. - - Returns: - Path to flows directory (~/.openrag/flows/ or ./flows/ in containers) - """ - flows_dir = get_openrag_home() / "flows" - flows_dir.mkdir(parents=True, exist_ok=True) - return flows_dir - - -def get_flows_backup_dir() -> Path: - """Get the flows backup directory. - - Returns: - Path to flows/backup directory - """ - backup_dir = get_flows_dir() / "backup" - backup_dir.mkdir(parents=True, exist_ok=True) - return backup_dir - - -def get_data_dir() -> Path: - """Get the data directory. - - Returns: - Path to data directory (~/.openrag/data/ or ./data/ in containers) - """ - data_dir = get_openrag_home() / "data" - data_dir.mkdir(parents=True, exist_ok=True) - return data_dir - - -def get_opensearch_data_dir() -> Path: - """Get the OpenSearch data directory. - - Returns: - Path to OpenSearch data directory - """ - return get_data_dir() / "opensearch-data" - - def get_tui_dir() -> Path: """Get the TUI directory for TUI-specific files. Returns: - Path to tui directory (~/.openrag/tui/ or ./tui/ in containers) + Path to ~/.openrag/tui/ directory """ tui_dir = get_openrag_home() / "tui" tui_dir.mkdir(parents=True, exist_ok=True) @@ -162,7 +35,7 @@ def get_tui_env_file() -> Path: """Get the TUI .env file path. Returns: - Path to .env file + Path to ~/.openrag/tui/.env file """ return get_tui_dir() / ".env" @@ -174,29 +47,21 @@ def get_tui_compose_file(gpu: bool = False) -> Path: gpu: If True, returns path to docker-compose.gpu.yml Returns: - Path to docker-compose file + Path to docker-compose file in ~/.openrag/tui/ """ filename = "docker-compose.gpu.yml" if gpu else "docker-compose.yml" return get_tui_dir() / filename -# Backward compatibility functions for migration def get_legacy_paths() -> dict: - """Get legacy (old) paths for migration purposes. + """Get legacy (CWD-based) paths for migration purposes. Returns: - Dictionary mapping resource names to their old paths + Dictionary mapping resource names to their old CWD-based paths """ cwd = Path.cwd() return { - "config": cwd / "config" / "config.yaml", - "keys_dir": cwd / "keys", - "private_key": cwd / "keys" / "private_key.pem", - "public_key": cwd / "keys" / "public_key.pem", - "documents": cwd / "openrag-documents", - "flows": cwd / "flows", "tui_env": cwd / ".env", "tui_compose": cwd / "docker-compose.yml", "tui_compose_gpu": cwd / "docker-compose.gpu.yml", - "opensearch_data": cwd / "opensearch-data", }