diff --git a/docker-compose.yml b/docker-compose.yml index e33b0d83..2a73da89 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -84,6 +84,7 @@ services: - ${OPENRAG_KEYS_PATH:-./keys}:/app/keys:Z - ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z - ${OPENRAG_CONFIG_PATH:-./config}:/app/config:Z + - ${OPENRAG_DATA_PATH:-./data}:/app/data:Z openrag-frontend: image: langflowai/openrag-frontend:${OPENRAG_VERSION:-latest} diff --git a/src/connectors/connection_manager.py b/src/connectors/connection_manager.py index 07ebd5ee..fd207be6 100644 --- a/src/connectors/connection_manager.py +++ b/src/connectors/connection_manager.py @@ -36,8 +36,10 @@ class ConnectionConfig: class ConnectionManager: """Manages multiple connector connections with persistence""" - def __init__(self, connections_file: str = "connections.json"): + def __init__(self, connections_file: str = "data/connections.json"): self.connections_file = Path(connections_file) + # Ensure data directory exists + self.connections_file.parent.mkdir(parents=True, exist_ok=True) self.connections: Dict[str, ConnectionConfig] = {} self.active_connectors: Dict[str, BaseConnector] = {} diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index afd8b8c2..28389cfd 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -96,11 +96,8 @@ class GoogleDriveConnector(BaseConnector): client_id = config.get("client_id") or env_client_id client_secret = config.get("client_secret") or env_client_secret - # Token file default (so callback & workers don’t need to pass it) - project_root = Path(__file__).resolve().parent.parent.parent.parent - token_file = config.get("token_file") or str( - project_root / "google_drive_token.json" - ) + # Token file default - use data/ directory for persistence + token_file = config.get("token_file") or "data/google_drive_token.json" Path(token_file).parent.mkdir(parents=True, exist_ok=True) if not isinstance(client_id, str) or not client_id.strip(): diff --git a/src/connectors/onedrive/connector.py b/src/connectors/onedrive/connector.py index 796e4310..dcca775d 100644 --- a/src/connectors/onedrive/connector.py +++ b/src/connectors/onedrive/connector.py @@ -58,9 +58,8 @@ class OneDriveConnector(BaseConnector): except Exception as e: logger.debug(f"Failed to get client_secret: {e}") - # Token file setup - project_root = Path(__file__).resolve().parent.parent.parent.parent - token_file = config.get("token_file") or str(project_root / "onedrive_token.json") + # Token file setup - use data/ directory for persistence + token_file = config.get("token_file") or "data/onedrive_token.json" Path(token_file).parent.mkdir(parents=True, exist_ok=True) # Only initialize OAuth if we have credentials @@ -72,7 +71,7 @@ class OneDriveConnector(BaseConnector): oauth_token_file = config["token_file"] else: # Use a per-connection cache file to avoid collisions with other connectors - oauth_token_file = f"onedrive_token_{connection_id}.json" + oauth_token_file = f"data/onedrive_token_{connection_id}.json" # MSA & org both work via /common for OneDrive personal testing authority = "https://login.microsoftonline.com/common" diff --git a/src/connectors/sharepoint/connector.py b/src/connectors/sharepoint/connector.py index df6dc102..f9482d9c 100644 --- a/src/connectors/sharepoint/connector.py +++ b/src/connectors/sharepoint/connector.py @@ -66,20 +66,19 @@ class SharePointConnector(BaseConnector): logger.debug(f"Failed to get client_secret: {e}") pass # Credentials not available, that's OK for listing - # Token file setup - project_root = Path(__file__).resolve().parent.parent.parent.parent - token_file = config.get("token_file") or str(project_root / "sharepoint_token.json") + # Token file setup - use data/ directory for persistence + token_file = config.get("token_file") or "data/sharepoint_token.json" Path(token_file).parent.mkdir(parents=True, exist_ok=True) - + # Only initialize OAuth if we have credentials if self.client_id and self.client_secret: connection_id = config.get("connection_id", "default") - + # Use token_file from config if provided, otherwise generate one if config.get("token_file"): oauth_token_file = config["token_file"] else: - oauth_token_file = f"sharepoint_token_{connection_id}.json" + oauth_token_file = f"data/sharepoint_token_{connection_id}.json" authority = f"https://login.microsoftonline.com/{self.tenant_id}" if self.tenant_id != "common" else "https://login.microsoftonline.com/common" diff --git a/src/services/auth_service.py b/src/services/auth_service.py index d27d1eb6..daed90e6 100644 --- a/src/services/auth_service.py +++ b/src/services/auth_service.py @@ -63,8 +63,8 @@ class AuthService: # We'll validate client credentials when creating the connector - # Create connection configuration - token_file = f"{connector_type}_{purpose}_{uuid.uuid4().hex[:8]}.json" + # Create connection configuration - use data/ directory for persistence + token_file = f"data/{connector_type}_{purpose}_{uuid.uuid4().hex[:8]}.json" config = { "token_file": token_file, "connector_type": connector_type, diff --git a/src/services/conversation_persistence_service.py b/src/services/conversation_persistence_service.py index 0c7edc84..8af36eff 100644 --- a/src/services/conversation_persistence_service.py +++ b/src/services/conversation_persistence_service.py @@ -15,9 +15,11 @@ logger = get_logger(__name__) class ConversationPersistenceService: """Simple service to persist conversations to disk""" - - def __init__(self, storage_file: str = "conversations.json"): + + def __init__(self, storage_file: str = "data/conversations.json"): self.storage_file = storage_file + # Ensure data directory exists + os.makedirs(os.path.dirname(self.storage_file), exist_ok=True) self.lock = threading.Lock() self._conversations = self._load_conversations() diff --git a/src/services/session_ownership_service.py b/src/services/session_ownership_service.py index d700c5c3..8fcd8308 100644 --- a/src/services/session_ownership_service.py +++ b/src/services/session_ownership_service.py @@ -13,9 +13,11 @@ logger = get_logger(__name__) class SessionOwnershipService: """Simple service to track which user owns which session""" - + def __init__(self): - self.ownership_file = "session_ownership.json" + self.ownership_file = "data/session_ownership.json" + # Ensure data directory exists + os.makedirs(os.path.dirname(self.ownership_file), exist_ok=True) self.ownership_data = self._load_ownership_data() def _load_ownership_data(self) -> Dict[str, Dict[str, any]]: diff --git a/src/tui/main.py b/src/tui/main.py index 02c7a294..5e888f36 100644 --- a/src/tui/main.py +++ b/src/tui/main.py @@ -644,22 +644,22 @@ def migrate_legacy_data_directories(): def setup_host_directories(): """Initialize OpenRAG directory structure on the host. - + Creates directories that will be volume-mounted into containers: - ~/.openrag/documents/openrag-documents/ (for document ingestion) - ~/.openrag/flows/ (for Langflow flows) - ~/.openrag/keys/ (for JWT keys) - ~/.openrag/config/ (for configuration) + - ~/.openrag/data/ (for backend data: conversations, OAuth tokens, etc.) - ~/.openrag/data/opensearch-data/ (for OpenSearch data) """ - from pathlib import Path - base_dir = Path.home() / ".openrag" directories = [ base_dir / "documents" / "openrag-documents", base_dir / "flows", base_dir / "keys", base_dir / "config", + base_dir / "data", base_dir / "data" / "opensearch-data", ] diff --git a/src/tui/managers/container_manager.py b/src/tui/managers/container_manager.py index 819b1076..7ec88119 100644 --- a/src/tui/managers/container_manager.py +++ b/src/tui/managers/container_manager.py @@ -1135,8 +1135,11 @@ class ContainerManager: yield False, "Clearing OpenSearch data volume..." - # Get the absolute path to opensearch-data directory - opensearch_data_path = Path("opensearch-data").absolute() + # Get opensearch data path from env config + from .env_manager import EnvManager + env_manager = EnvManager() + env_manager.load_existing_env() + opensearch_data_path = Path(env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))).expanduser().absolute() if not opensearch_data_path.exists(): yield True, "OpenSearch data directory does not exist, skipping" diff --git a/src/tui/managers/env_manager.py b/src/tui/managers/env_manager.py index b9124d8c..f8186fac 100644 --- a/src/tui/managers/env_manager.py +++ b/src/tui/managers/env_manager.py @@ -72,6 +72,7 @@ class EnvConfig: openrag_keys_path: str = "$HOME/.openrag/keys" openrag_flows_path: str = "$HOME/.openrag/flows" openrag_config_path: str = "$HOME/.openrag/config" + openrag_data_path: str = "$HOME/.openrag/data" # Backend data (conversations, tokens, etc.) opensearch_data_path: str = "$HOME/.openrag/data/opensearch-data" # Container version (linked to TUI version) @@ -182,6 +183,7 @@ class EnvManager: "OPENRAG_KEYS_PATH": "openrag_keys_path", "OPENRAG_FLOWS_PATH": "openrag_flows_path", "OPENRAG_CONFIG_PATH": "openrag_config_path", + "OPENRAG_DATA_PATH": "openrag_data_path", "OPENSEARCH_DATA_PATH": "opensearch_data_path", "LANGFLOW_AUTO_LOGIN": "langflow_auto_login", "LANGFLOW_NEW_USER_IS_ACTIVE": "langflow_new_user_is_active", @@ -394,6 +396,9 @@ class EnvManager: f.write( f"OPENRAG_CONFIG_PATH={self._quote_env_value(self.config.openrag_config_path)}\n" ) + f.write( + f"OPENRAG_DATA_PATH={self._quote_env_value(self.config.openrag_data_path)}\n" + ) f.write( f"OPENSEARCH_DATA_PATH={self._quote_env_value(self.config.opensearch_data_path)}\n" ) diff --git a/src/tui/screens/monitor.py b/src/tui/screens/monitor.py index 7c5e0203..d21bbd13 100644 --- a/src/tui/screens/monitor.py +++ b/src/tui/screens/monitor.py @@ -481,18 +481,23 @@ class MonitorScreen(Screen): # Clear config, conversations.json, and optionally flow backups (before stopping containers) try: - config_path = Path("config") - conversations_file = Path("conversations.json") - flows_backup_path = Path("flows/backup") - + # Get paths from env config + from ..managers.env_manager import EnvManager + env_manager = EnvManager() + env_manager.load_existing_env() + + def expand_path(path_str: str) -> Path: + return Path(path_str.replace("$HOME", str(Path.home()))).expanduser() + + config_path = expand_path(env_manager.config.openrag_config_path) + flows_path = expand_path(env_manager.config.openrag_flows_path) + flows_backup_path = flows_path / "backup" + if config_path.exists(): shutil.rmtree(config_path) # Recreate empty config directory config_path.mkdir(parents=True, exist_ok=True) - - if conversations_file.exists(): - conversations_file.unlink() - + # Delete flow backups only if user chose to (and they actually exist) if self._check_flow_backups(): if delete_backups: @@ -501,7 +506,7 @@ class MonitorScreen(Screen): flows_backup_path.mkdir(parents=True, exist_ok=True) self.notify("Flow backups deleted", severity="information") else: - self.notify("Flow backups preserved in ./flows/backup", severity="information") + self.notify(f"Flow backups preserved in {flows_backup_path}", severity="information") except Exception as e: self.notify( @@ -531,7 +536,11 @@ class MonitorScreen(Screen): # Now clear opensearch-data using container yield False, "Clearing OpenSearch data..." - opensearch_data_path = Path("opensearch-data") + # Get opensearch data path from env config + from ..managers.env_manager import EnvManager + env_manager = EnvManager() + env_manager.load_existing_env() + opensearch_data_path = Path(env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))).expanduser() if opensearch_data_path.exists(): async for success, message in self.container_manager.clear_opensearch_data_volume(): yield success, message @@ -549,10 +558,15 @@ class MonitorScreen(Screen): yield True, "Factory reset completed successfully" def _check_flow_backups(self) -> bool: - """Check if there are any flow backups in ./flows/backup directory.""" + """Check if there are any flow backups in flows/backup directory.""" from pathlib import Path + from ..managers.env_manager import EnvManager - backup_dir = Path("flows/backup") + # Get flows path from env config + env_manager = EnvManager() + env_manager.load_existing_env() + flows_path = Path(env_manager.config.openrag_flows_path.replace("$HOME", str(Path.home()))).expanduser() + backup_dir = flows_path / "backup" if not backup_dir.exists(): return False diff --git a/src/tui/screens/welcome.py b/src/tui/screens/welcome.py index 146b437f..84b76520 100644 --- a/src/tui/screens/welcome.py +++ b/src/tui/screens/welcome.py @@ -68,11 +68,17 @@ class WelcomeScreen(Screen): yield Footer() def _check_flow_backups(self) -> bool: - """Check if there are any flow backups in ./flows/backup directory.""" - backup_dir = Path("flows/backup") + """Check if there are any flow backups in flows/backup directory.""" + from ..managers.env_manager import EnvManager + + # Get flows path from env config + env_manager = EnvManager() + env_manager.load_existing_env() + flows_path = Path(env_manager.config.openrag_flows_path.replace("$HOME", str(Path.home()))).expanduser() + backup_dir = flows_path / "backup" if not backup_dir.exists(): return False - + try: # Check if there are any .json files in the backup directory backup_files = list(backup_dir.glob("*.json"))