feat: add validation to llm env variables (#558)
… needed <!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Implemented enhanced configuration validation for environment-based settings. Now, if any configuration parameter is provided via the environment, all required settings must be present. This improvement helps catch misconfigurations early, reducing potential errors and ensuring a smoother, more reliable user experience. These proactive measures significantly enhance overall system stability and performance. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Boris <boris@topoteretes.com>
This commit is contained in:
parent
1cb83312fe
commit
4b777cf214
1 changed files with 56 additions and 0 deletions
|
|
@ -1,6 +1,8 @@
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
from pydantic import model_validator, Field
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
class LLMConfig(BaseSettings):
|
class LLMConfig(BaseSettings):
|
||||||
|
|
@ -16,6 +18,60 @@ class LLMConfig(BaseSettings):
|
||||||
|
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def ensure_env_vars_for_ollama(self) -> "LLMConfig":
|
||||||
|
"""
|
||||||
|
Only if llm_provider is 'ollama':
|
||||||
|
- If any of (LLM_MODEL, LLM_ENDPOINT, LLM_API_KEY) is set, all must be set.
|
||||||
|
- If any of (EMBEDDING_PROVIDER, EMBEDDING_MODEL, EMBEDDING_DIMENSIONS,
|
||||||
|
HUGGINGFACE_TOKENIZER) is set, all must be set.
|
||||||
|
Otherwise, skip these checks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.llm_provider != "ollama":
|
||||||
|
# Skip checks unless provider is "ollama"
|
||||||
|
return self
|
||||||
|
|
||||||
|
def is_env_set(var_name: str) -> bool:
|
||||||
|
"""Return True if environment variable is present and non-empty."""
|
||||||
|
val = os.environ.get(var_name)
|
||||||
|
return val is not None and val.strip() != ""
|
||||||
|
|
||||||
|
#
|
||||||
|
# 1. Check LLM environment variables
|
||||||
|
#
|
||||||
|
llm_env_vars = {
|
||||||
|
"LLM_MODEL": is_env_set("LLM_MODEL"),
|
||||||
|
"LLM_ENDPOINT": is_env_set("LLM_ENDPOINT"),
|
||||||
|
"LLM_API_KEY": is_env_set("LLM_API_KEY"),
|
||||||
|
}
|
||||||
|
if any(llm_env_vars.values()) and not all(llm_env_vars.values()):
|
||||||
|
missing_llm = [key for key, is_set in llm_env_vars.items() if not is_set]
|
||||||
|
raise ValueError(
|
||||||
|
"You have set some but not all of the required environment variables "
|
||||||
|
f"for LLM usage (LLM_MODEL, LLM_ENDPOINT, LLM_API_KEY). Missing: {missing_llm}"
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# 2. Check embedding environment variables
|
||||||
|
#
|
||||||
|
embedding_env_vars = {
|
||||||
|
"EMBEDDING_PROVIDER": is_env_set("EMBEDDING_PROVIDER"),
|
||||||
|
"EMBEDDING_MODEL": is_env_set("EMBEDDING_MODEL"),
|
||||||
|
"EMBEDDING_DIMENSIONS": is_env_set("EMBEDDING_DIMENSIONS"),
|
||||||
|
"HUGGINGFACE_TOKENIZER": is_env_set("HUGGINGFACE_TOKENIZER"),
|
||||||
|
}
|
||||||
|
if any(embedding_env_vars.values()) and not all(embedding_env_vars.values()):
|
||||||
|
missing_embed = [key for key, is_set in embedding_env_vars.items() if not is_set]
|
||||||
|
raise ValueError(
|
||||||
|
"You have set some but not all of the required environment variables "
|
||||||
|
"for embeddings (EMBEDDING_PROVIDER, EMBEDDING_MODEL, "
|
||||||
|
"EMBEDDING_DIMENSIONS, HUGGINGFACE_TOKENIZER). Missing: "
|
||||||
|
f"{missing_embed}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"provider": self.llm_provider,
|
"provider": self.llm_provider,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue