LightRAG/scripts/lib/generate_from_schema.py
Claude 0a48c633cd
Add Schema-Driven Configuration Pattern
Implement comprehensive configuration management system with:

**Core Components:**
- config/config.schema.yaml: Configuration metadata (single source of truth)
- scripts/lib/generate_from_schema.py: Schema → local.yaml generator
- scripts/lib/generate_env.py: local.yaml → .env converter
- scripts/setup.sh: One-click configuration initialization

**Key Features:**
- Deep merge logic preserves existing values
- Auto-generation of secrets (32-char random strings)
- Type inference for configuration values
- Nested YAML → flat environment variables
- Git-safe: local.yaml and .env excluded from version control

**Configuration Coverage:**
- Trilingual entity extractor (Chinese/English/Swedish)
- LightRAG API, database, vector DB settings
- LLM provider configuration
- Entity/relation extraction settings
- Security and performance tuning

**Documentation:**
- docs/ConfigurationGuide-zh.md: Complete usage guide with examples

**Usage:**
```bash
./scripts/setup.sh  # Generate config/local.yaml and .env
```

This enables centralized configuration management with automatic
secret generation and safe handling of sensitive data.
2025-11-19 19:33:13 +00:00

294 lines
7.3 KiB
Python
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
配置生成器 - 从 Schema 生成本地配置
从 config/config.schema.yaml 读取配置元数据,生成 config/local.yaml。
支持深度合并、自动生成密钥、保留现有值。
"""
import sys
import secrets
import string
from pathlib import Path
from typing import Any, Dict, List, Optional
import yaml
def generate_secret(length: int = 32) -> str:
"""生成随机密钥"""
alphabet = string.ascii_letters + string.digits
return ''.join(secrets.choice(alphabet) for _ in range(length))
def set_nested_value(data: Dict, path: str, value: Any) -> None:
"""
设置嵌套字典的值
Args:
data: 目标字典
path: 点分隔的路径,如 "trilingual.chinese.enabled"
value: 要设置的值
"""
keys = path.split('.')
current = data
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
current[keys[-1]] = value
def get_nested_value(data: Dict, path: str, default: Any = None) -> Any:
"""
获取嵌套字典的值
Args:
data: 源字典
path: 点分隔的路径
default: 默认值
Returns:
找到的值或默认值
"""
keys = path.split('.')
current = data
try:
for key in keys:
current = current[key]
return current
except (KeyError, TypeError):
return default
def deep_merge(base: Dict, overlay: Dict) -> Dict:
"""
深度合并两个字典
overlay 中的值会覆盖 base 中的值,但会保留 base 中 overlay 没有的键。
Args:
base: 基础字典
overlay: 覆盖字典
Returns:
合并后的字典
"""
result = base.copy()
for key, value in overlay.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
def infer_type(value: Any) -> Any:
"""
推断并转换值的类型
Args:
value: 原始值
Returns:
转换后的值
"""
if isinstance(value, bool):
return value
elif isinstance(value, int):
return value
elif isinstance(value, float):
return value
elif isinstance(value, str):
# 尝试转换为数字
try:
if '.' in value:
return float(value)
else:
return int(value)
except ValueError:
# 尝试转换为布尔值
if value.lower() in ('true', 'yes', 'on'):
return True
elif value.lower() in ('false', 'no', 'off'):
return False
return value
else:
return value
def load_schema(schema_path: Path) -> List[Dict]:
"""
加载配置 Schema
Args:
schema_path: Schema 文件路径
Returns:
Schema 字段列表
"""
if not schema_path.exists():
raise FileNotFoundError(f"Schema 文件不存在: {schema_path}")
with open(schema_path, 'r', encoding='utf-8') as f:
schema = yaml.safe_load(f)
if not isinstance(schema, list):
raise ValueError("Schema 必须是列表格式")
return schema
def load_existing_config(config_path: Path) -> Dict:
"""
加载现有配置
Args:
config_path: 配置文件路径
Returns:
现有配置字典(如果文件不存在则返回空字典)
"""
if not config_path.exists():
return {}
with open(config_path, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
return config if config else {}
def generate_config(schema: List[Dict], existing_config: Dict) -> Dict:
"""
从 Schema 生成配置
Args:
schema: Schema 字段列表
existing_config: 现有配置
Returns:
生成的配置字典
"""
config = {}
for field in schema:
section = field.get('section')
if not section:
continue
# 检查是否有现有值
existing_value = get_nested_value(existing_config, section)
if existing_value is not None:
# 保留现有值
set_nested_value(config, section, existing_value)
else:
# 生成新值
field_type = field.get('type', '')
auto_generate = field.get('auto_generate', False)
default_value = field.get('default')
if field_type == 'secret' and auto_generate:
# 自动生成密钥
value = generate_secret(32)
elif default_value is not None:
# 使用默认值
value = infer_type(default_value)
else:
# 跳过没有默认值的字段
continue
set_nested_value(config, section, value)
return config
def save_config(config: Dict, config_path: Path) -> None:
"""
保存配置到 YAML 文件
Args:
config: 配置字典
config_path: 输出文件路径
"""
# 确保目录存在
config_path.parent.mkdir(parents=True, exist_ok=True)
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(
config,
f,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
indent=2
)
def main():
"""主函数"""
# 获取项目根目录
project_root = Path(__file__).parent.parent.parent
# 文件路径
schema_path = project_root / 'config' / 'config.schema.yaml'
config_path = project_root / 'config' / 'local.yaml'
print("=" * 70)
print(" 配置生成器")
print("=" * 70)
print()
try:
# 加载 Schema
print(f"📖 读取 Schema: {schema_path.relative_to(project_root)}")
schema = load_schema(schema_path)
print(f" 找到 {len(schema)} 个配置字段")
# 加载现有配置
print(f"\n🔍 检查现有配置: {config_path.relative_to(project_root)}")
existing_config = load_existing_config(config_path)
if existing_config:
print(f" 找到现有配置,将保留已有值")
else:
print(f" 未找到现有配置,将使用默认值")
# 生成配置
print(f"\n⚙️ 生成配置...")
config = generate_config(schema, existing_config)
# 深度合并(保留现有配置中 schema 未定义的字段)
if existing_config:
config = deep_merge(existing_config, config)
# 保存配置
print(f"\n💾 保存配置: {config_path.relative_to(project_root)}")
save_config(config, config_path)
print()
print("=" * 70)
print(" ✅ 配置生成成功")
print("=" * 70)
print()
print(f"配置文件: {config_path}")
print()
print("提示:")
print(" - 配置文件已添加到 .gitignore不会提交到 Git")
print(" - 修改配置后重新运行此脚本可更新配置")
print(" - 现有值会被保留,新字段会使用默认值")
print()
except Exception as e:
print(f"\n❌ 错误: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == '__main__':
main()