openrag/src/config/config_manager.py
Lucas Oliveira e0015f35db
fix: update onboarding design, make opensearch index be initialized after onboarding, make flow reset change the models to the provider chosen (#100)
* changed tooltip stype

* added start on label wrapper

* changed switch to checkbox on openai onboarding and changed copies

* made border be red when api key is invalid

* Added embedding configuration after onboarding

* changed openrag ingest docling to have same embedding model component as other flows

* changed flows service to get flow by id, not by path

* modify reset_langflow to also put right embedding model

* added endpoint and project id to provider config

* added replacing the model with the provider model when resetting

* Moved consts to settings.py

* raise when flow_id is not found
2025-09-26 12:04:17 -03:00

212 lines
7.5 KiB
Python

"""Configuration management for OpenRAG."""
import os
import yaml
from pathlib import Path
from typing import Dict, Any, Optional
from dataclasses import dataclass, asdict
from utils.logging_config import get_logger
logger = get_logger(__name__)
@dataclass
class ProviderConfig:
"""Model provider configuration."""
model_provider: str = "openai" # openai, anthropic, etc.
api_key: str = ""
endpoint: str = "" # For providers like Watson/IBM that need custom endpoints
project_id: str = "" # For providers like Watson/IBM that need project IDs
@dataclass
class KnowledgeConfig:
"""Knowledge/ingestion configuration."""
embedding_model: str = "text-embedding-3-small"
chunk_size: int = 1000
chunk_overlap: int = 200
doclingPresets: str = "standard"
@dataclass
class AgentConfig:
"""Agent configuration."""
llm_model: str = "gpt-4o-mini"
system_prompt: str = "You are a helpful AI assistant with access to a knowledge base. Answer questions based on the provided context."
@dataclass
class OpenRAGConfig:
"""Complete OpenRAG configuration."""
provider: ProviderConfig
knowledge: KnowledgeConfig
agent: AgentConfig
edited: bool = False # Track if manually edited
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "OpenRAGConfig":
"""Create config from dictionary."""
return cls(
provider=ProviderConfig(**data.get("provider", {})),
knowledge=KnowledgeConfig(**data.get("knowledge", {})),
agent=AgentConfig(**data.get("agent", {})),
edited=data.get("edited", False),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert config to dictionary."""
return asdict(self)
class ConfigManager:
"""Manages OpenRAG configuration from multiple sources."""
def __init__(self, config_file: Optional[str] = None):
"""Initialize configuration manager.
Args:
config_file: Path to configuration file. Defaults to 'config.yaml' in project root.
"""
self.config_file = Path(config_file) if config_file else Path("config.yaml")
self._config: Optional[OpenRAGConfig] = None
def load_config(self) -> OpenRAGConfig:
"""Load configuration from environment variables and config file.
Priority order:
1. Environment variables (highest)
2. Configuration file
3. Defaults (lowest)
"""
if self._config is not None:
return self._config
# Start with defaults
config_data = {"provider": {}, "knowledge": {}, "agent": {}}
# Load from config file if it exists
if self.config_file.exists():
try:
with open(self.config_file, "r") as f:
file_config = yaml.safe_load(f) or {}
# Merge file config
for section in ["provider", "knowledge", "agent"]:
if section in file_config:
config_data[section].update(file_config[section])
config_data["edited"] = file_config.get("edited", False)
logger.info(f"Loaded configuration from {self.config_file}")
except Exception as e:
logger.warning(f"Failed to load config file {self.config_file}: {e}")
# Create config object first to check edited flags
temp_config = OpenRAGConfig.from_dict(config_data)
# Override with environment variables (highest priority, but respect edited flags)
self._load_env_overrides(config_data, temp_config)
# Create config object
self._config = OpenRAGConfig.from_dict(config_data)
logger.debug("Configuration loaded", config=self._config.to_dict())
return self._config
def _load_env_overrides(
self, config_data: Dict[str, Any], temp_config: Optional["OpenRAGConfig"] = None
) -> None:
"""Load environment variable overrides, respecting edited flag."""
# Skip all environment overrides if config has been manually edited
if temp_config and temp_config.edited:
logger.debug("Skipping all env overrides - config marked as edited")
return
# Provider settings
if os.getenv("MODEL_PROVIDER"):
config_data["provider"]["model_provider"] = os.getenv("MODEL_PROVIDER")
if os.getenv("PROVIDER_API_KEY"):
config_data["provider"]["api_key"] = os.getenv("PROVIDER_API_KEY")
if os.getenv("PROVIDER_ENDPOINT"):
config_data["provider"]["endpoint"] = os.getenv("PROVIDER_ENDPOINT")
if os.getenv("PROVIDER_PROJECT_ID"):
config_data["provider"]["project_id"] = os.getenv("PROVIDER_PROJECT_ID")
# Backward compatibility for OpenAI
if os.getenv("OPENAI_API_KEY"):
config_data["provider"]["api_key"] = os.getenv("OPENAI_API_KEY")
if not config_data["provider"].get("model_provider"):
config_data["provider"]["model_provider"] = "openai"
# Knowledge settings
if os.getenv("EMBEDDING_MODEL"):
config_data["knowledge"]["embedding_model"] = os.getenv("EMBEDDING_MODEL")
if os.getenv("CHUNK_SIZE"):
config_data["knowledge"]["chunk_size"] = int(os.getenv("CHUNK_SIZE"))
if os.getenv("CHUNK_OVERLAP"):
config_data["knowledge"]["chunk_overlap"] = int(os.getenv("CHUNK_OVERLAP"))
if os.getenv("OCR_ENABLED"):
config_data["knowledge"]["ocr"] = os.getenv("OCR_ENABLED").lower() in (
"true",
"1",
"yes",
)
if os.getenv("PICTURE_DESCRIPTIONS_ENABLED"):
config_data["knowledge"]["picture_descriptions"] = os.getenv(
"PICTURE_DESCRIPTIONS_ENABLED"
).lower() in ("true", "1", "yes")
# Agent settings
if os.getenv("LLM_MODEL"):
config_data["agent"]["llm_model"] = os.getenv("LLM_MODEL")
if os.getenv("SYSTEM_PROMPT"):
config_data["agent"]["system_prompt"] = os.getenv("SYSTEM_PROMPT")
def get_config(self) -> OpenRAGConfig:
"""Get current configuration, loading if necessary."""
if self._config is None:
return self.load_config()
return self._config
def reload_config(self) -> OpenRAGConfig:
"""Force reload configuration from sources."""
self._config = None
return self.load_config()
def save_config_file(self, config: Optional[OpenRAGConfig] = None) -> bool:
"""Save configuration to file.
Args:
config: Configuration to save. If None, uses current config.
Returns:
True if saved successfully, False otherwise.
"""
if config is None:
config = self.get_config()
# Mark config as edited when saving
config.edited = True
try:
# Ensure directory exists
self.config_file.parent.mkdir(parents=True, exist_ok=True)
with open(self.config_file, "w") as f:
yaml.dump(config.to_dict(), f, default_flow_style=False, indent=2)
# Update cached config to reflect the edited flags
self._config = config
logger.info(f"Configuration saved to {self.config_file} - marked as edited")
return True
except Exception as e:
logger.error(f"Failed to save configuration to {self.config_file}: {e}")
return False
# Global config manager instance
config_manager = ConfigManager()