persist json state

This commit is contained in:
phact 2025-12-12 15:13:11 -05:00
parent 41f7f701c9
commit 55b8f72b2b
13 changed files with 72 additions and 42 deletions

View file

@ -84,6 +84,7 @@ services:
- ${OPENRAG_KEYS_PATH:-./keys}:/app/keys:Z
- ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z
- ${OPENRAG_CONFIG_PATH:-./config}:/app/config:Z
- ${OPENRAG_DATA_PATH:-./data}:/app/data:Z
openrag-frontend:
image: langflowai/openrag-frontend:${OPENRAG_VERSION:-latest}

View file

@ -36,8 +36,10 @@ class ConnectionConfig:
class ConnectionManager:
"""Manages multiple connector connections with persistence"""
def __init__(self, connections_file: str = "connections.json"):
def __init__(self, connections_file: str = "data/connections.json"):
self.connections_file = Path(connections_file)
# Ensure data directory exists
self.connections_file.parent.mkdir(parents=True, exist_ok=True)
self.connections: Dict[str, ConnectionConfig] = {}
self.active_connectors: Dict[str, BaseConnector] = {}

View file

@ -96,11 +96,8 @@ class GoogleDriveConnector(BaseConnector):
client_id = config.get("client_id") or env_client_id
client_secret = config.get("client_secret") or env_client_secret
# Token file default (so callback & workers dont need to pass it)
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(
project_root / "google_drive_token.json"
)
# Token file default - use data/ directory for persistence
token_file = config.get("token_file") or "data/google_drive_token.json"
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
if not isinstance(client_id, str) or not client_id.strip():

View file

@ -58,9 +58,8 @@ class OneDriveConnector(BaseConnector):
except Exception as e:
logger.debug(f"Failed to get client_secret: {e}")
# Token file setup
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(project_root / "onedrive_token.json")
# Token file setup - use data/ directory for persistence
token_file = config.get("token_file") or "data/onedrive_token.json"
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
# Only initialize OAuth if we have credentials
@ -72,7 +71,7 @@ class OneDriveConnector(BaseConnector):
oauth_token_file = config["token_file"]
else:
# Use a per-connection cache file to avoid collisions with other connectors
oauth_token_file = f"onedrive_token_{connection_id}.json"
oauth_token_file = f"data/onedrive_token_{connection_id}.json"
# MSA & org both work via /common for OneDrive personal testing
authority = "https://login.microsoftonline.com/common"

View file

@ -66,20 +66,19 @@ class SharePointConnector(BaseConnector):
logger.debug(f"Failed to get client_secret: {e}")
pass # Credentials not available, that's OK for listing
# Token file setup
project_root = Path(__file__).resolve().parent.parent.parent.parent
token_file = config.get("token_file") or str(project_root / "sharepoint_token.json")
# Token file setup - use data/ directory for persistence
token_file = config.get("token_file") or "data/sharepoint_token.json"
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
# Only initialize OAuth if we have credentials
if self.client_id and self.client_secret:
connection_id = config.get("connection_id", "default")
# Use token_file from config if provided, otherwise generate one
if config.get("token_file"):
oauth_token_file = config["token_file"]
else:
oauth_token_file = f"sharepoint_token_{connection_id}.json"
oauth_token_file = f"data/sharepoint_token_{connection_id}.json"
authority = f"https://login.microsoftonline.com/{self.tenant_id}" if self.tenant_id != "common" else "https://login.microsoftonline.com/common"

View file

@ -63,8 +63,8 @@ class AuthService:
# We'll validate client credentials when creating the connector
# Create connection configuration
token_file = f"{connector_type}_{purpose}_{uuid.uuid4().hex[:8]}.json"
# Create connection configuration - use data/ directory for persistence
token_file = f"data/{connector_type}_{purpose}_{uuid.uuid4().hex[:8]}.json"
config = {
"token_file": token_file,
"connector_type": connector_type,

View file

@ -15,9 +15,11 @@ logger = get_logger(__name__)
class ConversationPersistenceService:
"""Simple service to persist conversations to disk"""
def __init__(self, storage_file: str = "conversations.json"):
def __init__(self, storage_file: str = "data/conversations.json"):
self.storage_file = storage_file
# Ensure data directory exists
os.makedirs(os.path.dirname(self.storage_file), exist_ok=True)
self.lock = threading.Lock()
self._conversations = self._load_conversations()

View file

@ -13,9 +13,11 @@ logger = get_logger(__name__)
class SessionOwnershipService:
"""Simple service to track which user owns which session"""
def __init__(self):
self.ownership_file = "session_ownership.json"
self.ownership_file = "data/session_ownership.json"
# Ensure data directory exists
os.makedirs(os.path.dirname(self.ownership_file), exist_ok=True)
self.ownership_data = self._load_ownership_data()
def _load_ownership_data(self) -> Dict[str, Dict[str, any]]:

View file

@ -644,22 +644,22 @@ def migrate_legacy_data_directories():
def setup_host_directories():
"""Initialize OpenRAG directory structure on the host.
Creates directories that will be volume-mounted into containers:
- ~/.openrag/documents/openrag-documents/ (for document ingestion)
- ~/.openrag/flows/ (for Langflow flows)
- ~/.openrag/keys/ (for JWT keys)
- ~/.openrag/config/ (for configuration)
- ~/.openrag/data/ (for backend data: conversations, OAuth tokens, etc.)
- ~/.openrag/data/opensearch-data/ (for OpenSearch data)
"""
from pathlib import Path
base_dir = Path.home() / ".openrag"
directories = [
base_dir / "documents" / "openrag-documents",
base_dir / "flows",
base_dir / "keys",
base_dir / "config",
base_dir / "data",
base_dir / "data" / "opensearch-data",
]

View file

@ -1135,8 +1135,11 @@ class ContainerManager:
yield False, "Clearing OpenSearch data volume..."
# Get the absolute path to opensearch-data directory
opensearch_data_path = Path("opensearch-data").absolute()
# Get opensearch data path from env config
from .env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
opensearch_data_path = Path(env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))).expanduser().absolute()
if not opensearch_data_path.exists():
yield True, "OpenSearch data directory does not exist, skipping"

View file

@ -72,6 +72,7 @@ class EnvConfig:
openrag_keys_path: str = "$HOME/.openrag/keys"
openrag_flows_path: str = "$HOME/.openrag/flows"
openrag_config_path: str = "$HOME/.openrag/config"
openrag_data_path: str = "$HOME/.openrag/data" # Backend data (conversations, tokens, etc.)
opensearch_data_path: str = "$HOME/.openrag/data/opensearch-data"
# Container version (linked to TUI version)
@ -182,6 +183,7 @@ class EnvManager:
"OPENRAG_KEYS_PATH": "openrag_keys_path",
"OPENRAG_FLOWS_PATH": "openrag_flows_path",
"OPENRAG_CONFIG_PATH": "openrag_config_path",
"OPENRAG_DATA_PATH": "openrag_data_path",
"OPENSEARCH_DATA_PATH": "opensearch_data_path",
"LANGFLOW_AUTO_LOGIN": "langflow_auto_login",
"LANGFLOW_NEW_USER_IS_ACTIVE": "langflow_new_user_is_active",
@ -394,6 +396,9 @@ class EnvManager:
f.write(
f"OPENRAG_CONFIG_PATH={self._quote_env_value(self.config.openrag_config_path)}\n"
)
f.write(
f"OPENRAG_DATA_PATH={self._quote_env_value(self.config.openrag_data_path)}\n"
)
f.write(
f"OPENSEARCH_DATA_PATH={self._quote_env_value(self.config.opensearch_data_path)}\n"
)

View file

@ -481,18 +481,23 @@ class MonitorScreen(Screen):
# Clear config, conversations.json, and optionally flow backups (before stopping containers)
try:
config_path = Path("config")
conversations_file = Path("conversations.json")
flows_backup_path = Path("flows/backup")
# Get paths from env config
from ..managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
def expand_path(path_str: str) -> Path:
return Path(path_str.replace("$HOME", str(Path.home()))).expanduser()
config_path = expand_path(env_manager.config.openrag_config_path)
flows_path = expand_path(env_manager.config.openrag_flows_path)
flows_backup_path = flows_path / "backup"
if config_path.exists():
shutil.rmtree(config_path)
# Recreate empty config directory
config_path.mkdir(parents=True, exist_ok=True)
if conversations_file.exists():
conversations_file.unlink()
# Delete flow backups only if user chose to (and they actually exist)
if self._check_flow_backups():
if delete_backups:
@ -501,7 +506,7 @@ class MonitorScreen(Screen):
flows_backup_path.mkdir(parents=True, exist_ok=True)
self.notify("Flow backups deleted", severity="information")
else:
self.notify("Flow backups preserved in ./flows/backup", severity="information")
self.notify(f"Flow backups preserved in {flows_backup_path}", severity="information")
except Exception as e:
self.notify(
@ -531,7 +536,11 @@ class MonitorScreen(Screen):
# Now clear opensearch-data using container
yield False, "Clearing OpenSearch data..."
opensearch_data_path = Path("opensearch-data")
# Get opensearch data path from env config
from ..managers.env_manager import EnvManager
env_manager = EnvManager()
env_manager.load_existing_env()
opensearch_data_path = Path(env_manager.config.opensearch_data_path.replace("$HOME", str(Path.home()))).expanduser()
if opensearch_data_path.exists():
async for success, message in self.container_manager.clear_opensearch_data_volume():
yield success, message
@ -549,10 +558,15 @@ class MonitorScreen(Screen):
yield True, "Factory reset completed successfully"
def _check_flow_backups(self) -> bool:
"""Check if there are any flow backups in ./flows/backup directory."""
"""Check if there are any flow backups in flows/backup directory."""
from pathlib import Path
from ..managers.env_manager import EnvManager
backup_dir = Path("flows/backup")
# Get flows path from env config
env_manager = EnvManager()
env_manager.load_existing_env()
flows_path = Path(env_manager.config.openrag_flows_path.replace("$HOME", str(Path.home()))).expanduser()
backup_dir = flows_path / "backup"
if not backup_dir.exists():
return False

View file

@ -68,11 +68,17 @@ class WelcomeScreen(Screen):
yield Footer()
def _check_flow_backups(self) -> bool:
"""Check if there are any flow backups in ./flows/backup directory."""
backup_dir = Path("flows/backup")
"""Check if there are any flow backups in flows/backup directory."""
from ..managers.env_manager import EnvManager
# Get flows path from env config
env_manager = EnvManager()
env_manager.load_existing_env()
flows_path = Path(env_manager.config.openrag_flows_path.replace("$HOME", str(Path.home()))).expanduser()
backup_dir = flows_path / "backup"
if not backup_dir.exists():
return False
try:
# Check if there are any .json files in the backup directory
backup_files = list(backup_dir.glob("*.json"))