Add Schema-Driven Configuration Pattern

Implement comprehensive configuration management system with: **Core Components:** - config/config.schema.yaml: Configuration metadata (single source of truth) - scripts/lib/generate_from_schema.py: Schema → local.yaml generator - scripts/lib/generate_env.py: local.yaml → .env converter - scripts/setup.sh: One-click configuration initialization **Key Features:** - Deep merge logic preserves existing values - Auto-generation of secrets (32-char random strings) - Type inference for configuration values - Nested YAML → flat environment variables - Git-safe: local.yaml and .env excluded from version control **Configuration Coverage:** - Trilingual entity extractor (Chinese/English/Swedish) - LightRAG API, database, vector DB settings - LLM provider configuration - Entity/relation extraction settings - Security and performance tuning **Documentation:** - docs/ConfigurationGuide-zh.md: Complete usage guide with examples **Usage:** ```bash ./scripts/setup.sh # Generate config/local.yaml and .env ``` This enables centralized configuration management with automatic secret generation and safe handling of sensitive data.
2025-11-19 19:33:13 +00:00 · 2025-11-19 19:33:13 +00:00 · 0a48c633cd
commit 0a48c633cd
parent 12ab6ebb42
6 changed files with 1447 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,6 +14,9 @@ venv/
 # Enviroment Variable Files
 .env

+# Configuration Files (generated from schema)
+config/local.yaml
+
 # Build / Distribution
 dist/
 build/
--- a/config/config.schema.yaml
+++ b/config/config.schema.yaml
@ -0,0 +1,199 @@
+# LightRAG 三语言实体提取器配置 Schema
+#
+# 此文件定义配置字段的元数据，包括：
+# - 字段路径（section）
+# - 默认值（default）
+# - 类型（type: 留空为自动推断，secret 为密钥）
+# - 自动生成（auto_generate: 密钥自动生成）
+# - 描述（description）
+#
+# 运行 ./scripts/setup.sh 自动生成 config/local.yaml 和 .env
+
+# ============================================================
+# 三语言实体提取器配置
+# ============================================================
+
+# 通用配置
+- section: trilingual.enabled
+  default: true
+  description: "Enable trilingual entity extractor (Chinese/English/Swedish)"
+
+- section: trilingual.default_language
+  default: "en"
+  description: "Default language if not specified (zh/en/sv)"
+
+- section: trilingual.lazy_loading
+  default: true
+  description: "Enable lazy loading (load models on-demand to save memory)"
+
+# 中文配置（HanLP）
+- section: trilingual.chinese.enabled
+  default: true
+  description: "Enable Chinese entity extraction (HanLP)"
+
+- section: trilingual.chinese.model
+  default: "CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH"
+  description: "HanLP model name for Chinese"
+
+- section: trilingual.chinese.cache_dir
+  default: ""
+  description: "HanLP model cache directory (empty = default ~/.hanlp)"
+
+# 英文配置（spaCy）
+- section: trilingual.english.enabled
+  default: true
+  description: "Enable English entity extraction (spaCy)"
+
+- section: trilingual.english.model
+  default: "en_core_web_trf"
+  description: "spaCy model name for English (en_core_web_trf/en_core_web_lg/en_core_web_sm)"
+
+- section: trilingual.english.batch_size
+  default: 32
+  description: "spaCy batch size for English processing"
+
+# 瑞典语配置（spaCy）
+- section: trilingual.swedish.enabled
+  default: true
+  description: "Enable Swedish entity extraction (spaCy)"
+
+- section: trilingual.swedish.model
+  default: "sv_core_news_lg"
+  description: "spaCy model name for Swedish (sv_core_news_lg/sv_core_news_md/sv_core_news_sm)"
+
+- section: trilingual.swedish.batch_size
+  default: 32
+  description: "spaCy batch size for Swedish processing"
+
+# 性能配置
+- section: trilingual.performance.max_text_length
+  default: 1000000
+  description: "Maximum text length to process (characters)"
+
+- section: trilingual.performance.enable_gpu
+  default: false
+  description: "Enable GPU acceleration if available"
+
+- section: trilingual.performance.num_threads
+  default: 4
+  description: "Number of threads for parallel processing"
+
+# 缓存配置
+- section: trilingual.cache.enabled
+  default: true
+  description: "Enable result caching"
+
+- section: trilingual.cache.ttl
+  default: 3600
+  description: "Cache TTL in seconds (0 = no expiry)"
+
+- section: trilingual.cache.max_size
+  default: 1000
+  description: "Maximum number of cached results"
+
+# 日志配置
+- section: trilingual.logging.level
+  default: "INFO"
+  description: "Logging level (DEBUG/INFO/WARNING/ERROR)"
+
+- section: trilingual.logging.format
+  default: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  description: "Logging format string"
+
+# ============================================================
+# LightRAG 通用配置示例
+# ============================================================
+
+# API 密钥（自动生成）
+- section: lightrag.api.secret_key
+  type: secret
+  auto_generate: true
+  description: "API secret key (auto-generated, 32 characters)"
+
+# API 配置
+- section: lightrag.api.host
+  default: "0.0.0.0"
+  description: "API server host"
+
+- section: lightrag.api.port
+  default: 9621
+  description: "API server port"
+
+- section: lightrag.api.debug
+  default: false
+  description: "Enable debug mode"
+
+# 数据库配置
+- section: lightrag.database.type
+  default: "sqlite"
+  description: "Database type (sqlite/postgres/mysql)"
+
+- section: lightrag.database.path
+  default: "./data/lightrag.db"
+  description: "Database file path (for SQLite)"
+
+# 向量数据库配置
+- section: lightrag.vector_db.type
+  default: "nano"
+  description: "Vector database type (nano/milvus/qdrant/chroma)"
+
+- section: lightrag.vector_db.dimension
+  default: 1536
+  description: "Vector dimension"
+
+# LLM 配置
+- section: lightrag.llm.provider
+  default: "openai"
+  description: "LLM provider (openai/anthropic/ollama/custom)"
+
+- section: lightrag.llm.model
+  default: "gpt-4o-mini"
+  description: "LLM model name"
+
+- section: lightrag.llm.api_key
+  type: secret
+  auto_generate: false
+  description: "LLM API key (user-provided)"
+
+- section: lightrag.llm.base_url
+  default: ""
+  description: "Custom LLM base URL (optional)"
+
+- section: lightrag.llm.max_tokens
+  default: 4096
+  description: "Maximum tokens per request"
+
+- section: lightrag.llm.temperature
+  default: 0.0
+  description: "LLM temperature (0.0-1.0)"
+
+# 实体提取配置
+- section: lightrag.entity_extraction.max_gleaning
+  default: 1
+  description: "Entity extraction gleaning rounds (0=disabled, 1=enabled)"
+
+- section: lightrag.entity_extraction.use_trilingual
+  default: false
+  description: "Use trilingual extractor instead of LLM (requires setup)"
+
+# 关系提取配置
+- section: lightrag.relation_extraction.enabled
+  default: true
+  description: "Enable relation extraction"
+
+- section: lightrag.relation_extraction.method
+  default: "llm"
+  description: "Relation extraction method (llm/pattern/hybrid)"
+
+# 安全配置
+- section: lightrag.security.enable_api_key
+  default: false
+  description: "Require API key for requests"
+
+- section: lightrag.security.allowed_origins
+  default: "*"
+  description: "CORS allowed origins (comma-separated)"
+
+- section: lightrag.security.rate_limit
+  default: 100
+  description: "API rate limit (requests per minute per IP)"
--- a/docs/ConfigurationGuide-zh.md
+++ b/docs/ConfigurationGuide-zh.md
@ -0,0 +1,658 @@
+# LightRAG 配置指南
+
+## 概述
+
+LightRAG 采用 **Schema-Driven Configuration Pattern**（架构驱动配置模式），通过单一数据源管理所有配置，自动生成本地配置文件和环境变量。
+
+### 核心设计原则
+
+**单一数据源 (Single Source of Truth)**:
+- `config/config.schema.yaml` - 配置元数据（Git 追踪）
+- `config/local.yaml` - 本地配置（自动生成，Git 忽略）
+- `.env` - 环境变量（自动生成，Git 忽略）
+
+**自动化工作流**:
+```bash
+config.schema.yaml → config/local.yaml → .env
+```
+
+**关键特性**:
+- ✅ 深度合并 - 保留现有值
+- ✅ 自动生成密钥 - 无需手动管理
+- ✅ 类型推断 - 自动转换数据类型
+- ✅ 安全性 - 配置文件不会提交到 Git
+
+---
+
+## 快速开始
+
+### 1. 初始化配置
+
+```bash
+cd /path/to/LightRAG
+./scripts/setup.sh
+```
+
+这会自动：
+1. 读取 `config/config.schema.yaml`
+2. 生成 `config/local.yaml`（包含自动生成的密钥）
+3. 生成 `.env`（环境变量格式）
+
+### 2. 检查生成的配置
+
+```bash
+# 查看本地配置
+cat config/local.yaml
+
+# 查看环境变量
+cat .env
+```
+
+### 3. 修改配置（可选）
+
+```bash
+# 编辑本地配置
+nano config/local.yaml
+
+# 修改后重新生成 .env
+./scripts/setup.sh
+```
+
+---
+
+## 配置文件说明
+
+### config.schema.yaml（配置元数据）
+
+**位置**: `config/config.schema.yaml`
+
+**用途**: 定义所有配置字段的元数据
+
+**格式**:
+```yaml
+- section: trilingual.enabled
+  default: true
+  description: "Enable trilingual entity extractor (Chinese/English/Swedish)"
+
+- section: lightrag.api.secret_key
+  type: secret
+  auto_generate: true
+  description: "API secret key (auto-generated, 32 characters)"
+
+- section: lightrag.llm.api_key
+  type: secret
+  auto_generate: false
+  description: "LLM API key (user-provided)"
+```
+
+**字段说明**:
+- `section`: 配置路径（点分隔，如 `trilingual.chinese.enabled`）
+- `default`: 默认值（如果有）
+- `type`: 类型标记（`secret` = 密钥，留空 = 自动推断）
+- `auto_generate`: 是否自动生成密钥（仅适用于 `type: secret`）
+- `description`: 字段描述
+
+**重要**:
+- ✅ 此文件会提交到 Git
+- ✅ 修改此文件后运行 `./scripts/setup.sh` 更新配置
+- ✅ 新增字段会使用默认值，现有值会保留
+
+### config/local.yaml（本地配置）
+
+**位置**: `config/local.yaml`
+
+**用途**: 实际的配置文件（YAML 格式）
+
+**格式**:
+```yaml
+trilingual:
+  enabled: true
+  default_language: en
+  lazy_loading: true
+  chinese:
+    enabled: true
+    model: CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH
+  english:
+    enabled: true
+    model: en_core_web_trf
+    batch_size: 32
+
+lightrag:
+  api:
+    secret_key: abc123...  # 自动生成
+    host: 0.0.0.0
+    port: 9621
+  llm:
+    provider: openai
+    model: gpt-4o-mini
+    api_key: sk-...  # 手动填写
+```
+
+**重要**:
+- ❌ 此文件不会提交到 Git（已添加到 `.gitignore`）
+- ✅ 可以直接编辑此文件修改配置
+- ✅ 修改后运行 `./scripts/setup.sh` 更新 `.env`
+- ✅ 包含自动生成的密钥，请妥善保管
+
+### .env（环境变量）
+
+**位置**: `.env`（项目根目录）
+
+**用途**: 环境变量格式的配置文件
+
+**格式**:
+```bash
+# TRILINGUAL
+TRILINGUAL_ENABLED=true
+TRILINGUAL_DEFAULT_LANGUAGE=en
+TRILINGUAL_LAZY_LOADING=true
+TRILINGUAL_CHINESE_ENABLED=true
+TRILINGUAL_CHINESE_MODEL=CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH
+
+# LIGHTRAG
+LIGHTRAG_API_SECRET_KEY=abc123...
+LIGHTRAG_API_HOST=0.0.0.0
+LIGHTRAG_API_PORT=9621
+LIGHTRAG_LLM_PROVIDER=openai
+LIGHTRAG_LLM_MODEL=gpt-4o-mini
+```
+
+**命名规则**:
+- 嵌套路径 → 大写 + 下划线
+- 例如: `trilingual.chinese.enabled` → `TRILINGUAL_CHINESE_ENABLED`
+
+**重要**:
+- ❌ 此文件不会提交到 Git（已添加到 `.gitignore`）
+- ⚠️ 此文件由脚本自动生成，**不要手动编辑**
+- ✅ 修改 `config/local.yaml` 后重新运行 `./scripts/setup.sh`
+
+---
+
+## 配置项说明
+
+### 三语言实体提取器配置
+
+#### 通用配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.enabled` | `true` | 启用三语言实体提取器 |
+| `trilingual.default_language` | `en` | 默认语言（zh/en/sv） |
+| `trilingual.lazy_loading` | `true` | 启用延迟加载（节省内存） |
+
+#### 中文配置（HanLP）
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.chinese.enabled` | `true` | 启用中文提取 |
+| `trilingual.chinese.model` | `CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH` | HanLP 模型名 |
+| `trilingual.chinese.cache_dir` | `""` | 模型缓存目录（空 = 默认 `~/.hanlp`） |
+
+#### 英文配置（spaCy）
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.english.enabled` | `true` | 启用英文提取 |
+| `trilingual.english.model` | `en_core_web_trf` | spaCy 模型名 |
+| `trilingual.english.batch_size` | `32` | 批处理大小 |
+
+**可选模型**:
+- `en_core_web_trf`: Transformer 模型（最高质量，~440 MB）
+- `en_core_web_lg`: 大模型（高质量，~440 MB）
+- `en_core_web_sm`: 小模型（较低质量，~12 MB）
+
+#### 瑞典语配置（spaCy）
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.swedish.enabled` | `true` | 启用瑞典语提取 |
+| `trilingual.swedish.model` | `sv_core_news_lg` | spaCy 模型名 |
+| `trilingual.swedish.batch_size` | `32` | 批处理大小 |
+
+**可选模型**:
+- `sv_core_news_lg`: 大模型（最高质量，~545 MB）
+- `sv_core_news_md`: 中等模型（~40 MB）
+- `sv_core_news_sm`: 小模型（~12 MB）
+
+#### 性能配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.performance.max_text_length` | `1000000` | 最大文本长度（字符） |
+| `trilingual.performance.enable_gpu` | `false` | 启用 GPU 加速 |
+| `trilingual.performance.num_threads` | `4` | 并行处理线程数 |
+
+#### 缓存配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.cache.enabled` | `true` | 启用结果缓存 |
+| `trilingual.cache.ttl` | `3600` | 缓存 TTL（秒，0 = 永不过期） |
+| `trilingual.cache.max_size` | `1000` | 最大缓存数量 |
+
+#### 日志配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `trilingual.logging.level` | `INFO` | 日志级别（DEBUG/INFO/WARNING/ERROR） |
+| `trilingual.logging.format` | `%(asctime)s - %(name)s - %(levelname)s - %(message)s` | 日志格式 |
+
+### LightRAG 通用配置
+
+#### API 配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.api.secret_key` | *自动生成* | API 密钥（32 字符） |
+| `lightrag.api.host` | `0.0.0.0` | API 服务器地址 |
+| `lightrag.api.port` | `9621` | API 服务器端口 |
+| `lightrag.api.debug` | `false` | 调试模式 |
+
+#### 数据库配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.database.type` | `sqlite` | 数据库类型（sqlite/postgres/mysql） |
+| `lightrag.database.path` | `./data/lightrag.db` | 数据库路径（SQLite） |
+
+#### 向量数据库配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.vector_db.type` | `nano` | 向量数据库类型（nano/milvus/qdrant/chroma） |
+| `lightrag.vector_db.dimension` | `1536` | 向量维度 |
+
+#### LLM 配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.llm.provider` | `openai` | LLM 提供商（openai/anthropic/ollama/custom） |
+| `lightrag.llm.model` | `gpt-4o-mini` | LLM 模型名 |
+| `lightrag.llm.api_key` | *需手动填写* | LLM API 密钥 |
+| `lightrag.llm.base_url` | `""` | 自定义 LLM 基础 URL（可选） |
+| `lightrag.llm.max_tokens` | `4096` | 最大 tokens 数 |
+| `lightrag.llm.temperature` | `0.0` | 温度参数（0.0-1.0） |
+
+#### 实体提取配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.entity_extraction.max_gleaning` | `1` | Gleaning 轮数（0=禁用，1=启用） |
+| `lightrag.entity_extraction.use_trilingual` | `false` | 使用三语言提取器（而非 LLM） |
+
+#### 关系提取配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.relation_extraction.enabled` | `true` | 启用关系提取 |
+| `lightrag.relation_extraction.method` | `llm` | 提取方法（llm/pattern/hybrid） |
+
+#### 安全配置
+
+| 配置项 | 默认值 | 说明 |
+|--------|--------|------|
+| `lightrag.security.enable_api_key` | `false` | 启用 API 密钥验证 |
+| `lightrag.security.allowed_origins` | `*` | CORS 允许的源（逗号分隔） |
+| `lightrag.security.rate_limit` | `100` | API 速率限制（请求/分钟/IP） |
+
+---
+
+## 高级用法
+
+### 1. 添加新配置字段
+
+**步骤**:
+1. 编辑 `config/config.schema.yaml`，添加新字段：
+```yaml
+- section: my_module.new_feature.enabled
+  default: true
+  description: "Enable my new feature"
+```
+
+2. 重新运行设置脚本：
+```bash
+./scripts/setup.sh
+```
+
+3. 检查 `config/local.yaml` 和 `.env`，新字段已自动添加。
+
+### 2. 自动生成密钥
+
+**用途**: API 密钥、JWT 密钥、加密密钥等
+
+**步骤**:
+1. 在 `config.schema.yaml` 中标记为 `type: secret` 和 `auto_generate: true`：
+```yaml
+- section: my_module.secret_token
+  type: secret
+  auto_generate: true
+  description: "Secret token for authentication"
+```
+
+2. 运行 `./scripts/setup.sh`，密钥会自动生成（32 字符）
+
+**注意**:
+- 密钥只生成一次，后续运行会保留现有值
+- 如需重新生成，删除 `config/local.yaml` 中对应字段后重新运行
+
+### 3. 用户提供的密钥
+
+**用途**: LLM API 密钥等需要用户提供的敏感信息
+
+**步骤**:
+1. 在 `config.schema.yaml` 中标记为 `type: secret` 和 `auto_generate: false`：
+```yaml
+- section: lightrag.llm.api_key
+  type: secret
+  auto_generate: false
+  description: "LLM API key (user-provided)"
+```
+
+2. 运行 `./scripts/setup.sh` 生成配置框架
+
+3. 手动编辑 `config/local.yaml`，填写 API 密钥：
+```yaml
+lightrag:
+  llm:
+    api_key: sk-your-actual-key-here
+```
+
+4. 重新运行 `./scripts/setup.sh` 更新 `.env`
+
+### 4. 深度合并逻辑
+
+**特性**: 修改配置时，现有值会被保留，新字段会使用默认值
+
+**示例**:
+
+**现有配置** (`config/local.yaml`):
+```yaml
+trilingual:
+  chinese:
+    enabled: false  # 用户修改过
+```
+
+**Schema 更新** (`config.schema.yaml`):
+```yaml
+- section: trilingual.chinese.enabled
+  default: true
+
+- section: trilingual.chinese.cache_dir  # 新增字段
+  default: "/custom/path"
+```
+
+**运行** `./scripts/setup.sh` **后** (`config/local.yaml`):
+```yaml
+trilingual:
+  chinese:
+    enabled: false      # 保留现有值 ✅
+    cache_dir: /custom/path  # 使用默认值 ✅
+```
+
+### 5. 类型推断
+
+**支持的类型**:
+- 布尔值: `true`, `false`
+- 整数: `123`, `-456`
+- 浮点数: `0.5`, `3.14`
+- 字符串: `hello`, `api_key`
+
+**自动转换**:
+```yaml
+# Schema 中定义
+- section: my_module.timeout
+  default: 30  # 整数
+
+- section: my_module.enabled
+  default: true  # 布尔值
+
+# 生成的 .env
+MY_MODULE_TIMEOUT=30  # 整数字符串
+MY_MODULE_ENABLED=true  # 布尔值字符串
+```
+
+---
+
+## 常见问题
+
+### Q1: 如何修改配置？
+
+**A**: 有两种方法：
+
+**方法 1: 编辑 local.yaml（推荐）**
+```bash
+# 1. 编辑配置
+nano config/local.yaml
+
+# 2. 更新 .env
+./scripts/setup.sh
+```
+
+**方法 2: 编辑 schema.yaml（添加新字段）**
+```bash
+# 1. 添加新字段到 schema
+nano config/config.schema.yaml
+
+# 2. 重新生成配置
+./scripts/setup.sh
+```
+
+### Q2: 配置文件丢失了怎么办？
+
+**A**: 重新运行设置脚本即可：
+```bash
+./scripts/setup.sh
+```
+
+**注意**:
+- ✅ 如果只是 `.env` 丢失，重新运行会从 `config/local.yaml` 重新生成
+- ⚠️ 如果 `config/local.yaml` 也丢失，会使用默认值重新生成（自动生成的密钥会改变）
+
+### Q3: 为什么修改 .env 后重新运行脚本配置被覆盖？
+
+**A**: `.env` 是自动生成的文件，**不应该手动编辑**。
+
+**正确做法**:
+1. 编辑 `config/local.yaml`
+2. 运行 `./scripts/setup.sh`
+3. `.env` 会自动更新
+
+### Q4: 如何在不同环境使用不同配置？
+
+**A**: 使用环境变量覆盖：
+
+**开发环境** (`.env`):
+```bash
+LIGHTRAG_API_DEBUG=true
+LIGHTRAG_LLM_MODEL=gpt-4o-mini
+```
+
+**生产环境** (在服务器上设置环境变量):
+```bash
+export LIGHTRAG_API_DEBUG=false
+export LIGHTRAG_LLM_MODEL=gpt-4o
+```
+
+环境变量的优先级 > `.env` 文件
+
+### Q5: 密钥泄露了怎么办？
+
+**A**: 重新生成密钥：
+
+1. 删除 `config/local.yaml` 中的密钥字段
+2. 运行 `./scripts/setup.sh`
+3. 新密钥会自动生成
+
+**示例**:
+```bash
+# 1. 编辑 config/local.yaml，删除这一行：
+# lightrag.api.secret_key: abc123...
+
+# 2. 重新生成
+./scripts/setup.sh
+
+# 3. 新密钥已生成并保存
+```
+
+### Q6: 如何检查配置是否正确？
+
+**A**: 查看生成的文件：
+
+```bash
+# 查看本地配置
+cat config/local.yaml
+
+# 查看环境变量
+cat .env
+
+# 查看特定配置项
+grep "TRILINGUAL_ENABLED" .env
+```
+
+### Q7: 脚本运行失败怎么办？
+
+**A**: 检查以下几点：
+
+1. **Python 依赖**:
+```bash
+pip install pyyaml
+```
+
+2. **Schema 文件存在**:
+```bash
+ls -la config/config.schema.yaml
+```
+
+3. **脚本权限**:
+```bash
+chmod +x scripts/setup.sh
+chmod +x scripts/lib/generate_from_schema.py
+chmod +x scripts/lib/generate_env.py
+```
+
+4. **查看详细错误**:
+```bash
+./scripts/setup.sh 2>&1 | tee setup.log
+```
+
+---
+
+## 文件结构
+
+```
+LightRAG/
+├── config/
+│   ├── config.schema.yaml   # 配置元数据（Git 追踪）
+│   └── local.yaml           # 本地配置（Git 忽略，自动生成）
+├── scripts/
+│   ├── setup.sh             # 一键设置脚本
+│   └── lib/
+│       ├── generate_from_schema.py  # Schema → local.yaml
+│       └── generate_env.py          # local.yaml → .env
+├── .env                     # 环境变量（Git 忽略，自动生成）
+└── .gitignore               # 忽略 local.yaml 和 .env
+```
+
+---
+
+## 工作流示意图
+
+```
+┌──────────────────────┐
+│ config.schema.yaml   │ ← 配置元数据（Git 追踪）
+│ (单一数据源)        │
+└──────────┬───────────┘
+           │
+           │ 运行 ./scripts/setup.sh
+           │
+           ▼
+┌──────────────────────┐
+│ generate_from_schema │ ← 读取 schema，生成配置
+│                      │   - 深度合并
+│                      │   - 自动生成密钥
+│                      │   - 保留现有值
+└──────────┬───────────┘
+           │
+           ▼
+┌──────────────────────┐
+│ config/local.yaml    │ ← 本地配置（Git 忽略）
+│                      │   - 可手动编辑
+│                      │   - 包含密钥
+└──────────┬───────────┘
+           │
+           │ 运行 ./scripts/setup.sh
+           │
+           ▼
+┌──────────────────────┐
+│ generate_env.py      │ ← 转换为环境变量格式
+│                      │   - 扁平化嵌套结构
+│                      │   - 大写 + 下划线
+└──────────┬───────────┘
+           │
+           ▼
+┌──────────────────────┐
+│ .env                 │ ← 环境变量（Git 忽略）
+│                      │   - 不要手动编辑
+└──────────────────────┘
+```
+
+---
+
+## 最佳实践
+
+### ✅ 推荐做法
+
+1. **修改配置**: 编辑 `config/local.yaml`，然后运行 `./scripts/setup.sh`
+2. **添加字段**: 编辑 `config.schema.yaml`，然后运行 `./scripts/setup.sh`
+3. **版本控制**: 只提交 `config.schema.yaml`，不要提交 `config/local.yaml` 和 `.env`
+4. **密钥管理**: 使用 `auto_generate: true` 自动生成密钥
+5. **环境隔离**: 使用环境变量覆盖 `.env` 中的配置
+
+### ❌ 避免做法
+
+1. **手动编辑 .env**: 修改会被覆盖
+2. **提交密钥**: `config/local.yaml` 和 `.env` 包含敏感信息
+3. **硬编码配置**: 在代码中硬编码配置值
+4. **跳过脚本**: 手动创建配置文件（会丢失深度合并等特性）
+
+---
+
+## 总结
+
+### 核心优势
+
+✅ **单一数据源**: 所有配置元数据集中管理
+
+✅ **自动化**: 一键生成配置，无需手动管理
+
+✅ **安全性**: 配置文件不会提交到 Git
+
+✅ **灵活性**: 支持深度合并、自动生成密钥、类型推断
+
+✅ **可维护性**: 配置修改清晰可追溯
+
+### 适用场景
+
+- ✓ 需要管理大量配置项
+- ✓ 需要自动生成密钥
+- ✓ 需要在多个环境部署
+- ✓ 需要配置版本控制
+- ✓ 团队协作开发
+
+---
+
+## 参考资源
+
+- **Schema 文件**: `config/config.schema.yaml`
+- **生成脚本**: `scripts/lib/generate_from_schema.py`
+- **环境变量脚本**: `scripts/lib/generate_env.py`
+- **设置脚本**: `scripts/setup.sh`
+
+---
+
+## 支持和反馈
+
+如有问题或建议，请：
+1. 查看本文档的常见问题部分
+2. 查看生成脚本的源代码和注释
+3. 提交 Issue 到 LightRAG 仓库
--- a/scripts/lib/generate_env.py
+++ b/scripts/lib/generate_env.py
@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+"""
+环境变量生成器 - 将 YAML 配置转换为 .env 格式
+
+从 config/local.yaml 读取配置，生成 .env 文件。
+支持嵌套 YAML 扁平化为环境变量。
+"""
+
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+import yaml
+
+
+def flatten_dict(data: Dict, parent_key: str = '', sep: str = '_') -> Dict[str, Any]:
+    """
+    扁平化嵌套字典
+
+    Args:
+        data: 嵌套字典
+        parent_key: 父级键名
+        sep: 分隔符
+
+    Returns:
+        扁平化后的字典
+
+    Example:
+        {'trilingual': {'enabled': True}} -> {'TRILINGUAL_ENABLED': True}
+    """
+    items = []
+
+    for key, value in data.items():
+        # 转换为大写并组合键名
+        new_key = f"{parent_key}{sep}{key}".upper() if parent_key else key.upper()
+
+        if isinstance(value, dict):
+            # 递归处理嵌套字典
+            items.extend(flatten_dict(value, new_key, sep=sep).items())
+        else:
+            items.append((new_key, value))
+
+    return dict(items)
+
+
+def format_env_value(value: Any) -> str:
+    """
+    格式化环境变量值
+
+    Args:
+        value: 原始值
+
+    Returns:
+        格式化后的字符串
+
+    Example:
+        True -> 'true'
+        123 -> '123'
+        'hello world' -> 'hello world'
+    """
+    if isinstance(value, bool):
+        return 'true' if value else 'false'
+    elif isinstance(value, (int, float)):
+        return str(value)
+    elif isinstance(value, str):
+        # 如果字符串包含空格或特殊字符，添加引号
+        if ' ' in value or any(c in value for c in ['#', '$', '&', '|', ';']):
+            # 转义内部引号
+            escaped = value.replace('"', '\\"')
+            return f'"{escaped}"'
+        return value
+    elif value is None:
+        return ''
+    else:
+        return str(value)
+
+
+def load_config(config_path: Path) -> Dict:
+    """
+    加载 YAML 配置
+
+    Args:
+        config_path: 配置文件路径
+
+    Returns:
+        配置字典
+    """
+    if not config_path.exists():
+        raise FileNotFoundError(f"配置文件不存在: {config_path}")
+
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = yaml.safe_load(f)
+
+    return config if config else {}
+
+
+def generate_env_content(config: Dict) -> str:
+    """
+    生成 .env 文件内容
+
+    Args:
+        config: 配置字典
+
+    Returns:
+        .env 格式的字符串
+    """
+    # 扁平化配置
+    flat_config = flatten_dict(config)
+
+    # 按键名排序
+    sorted_items = sorted(flat_config.items())
+
+    # 生成 .env 内容
+    lines = [
+        "# LightRAG 环境变量配置",
+        "# 此文件由 scripts/setup.sh 自动生成，请勿手动编辑",
+        "# 修改 config/local.yaml 后重新运行 ./scripts/setup.sh 更新此文件",
+        "",
+    ]
+
+    current_section = None
+
+    for key, value in sorted_items:
+        # 提取顶级 section（第一个下划线之前的部分）
+        section = key.split('_')[0]
+
+        # 如果切换到新 section，添加分隔注释
+        if section != current_section:
+            if current_section is not None:
+                lines.append("")  # 添加空行分隔
+            lines.append(f"# {section}")
+            current_section = section
+
+        # 添加键值对
+        formatted_value = format_env_value(value)
+        lines.append(f"{key}={formatted_value}")
+
+    return '\n'.join(lines) + '\n'
+
+
+def save_env_file(content: str, env_path: Path) -> None:
+    """
+    保存 .env 文件
+
+    Args:
+        content: .env 文件内容
+        env_path: 输出文件路径
+    """
+    with open(env_path, 'w', encoding='utf-8') as f:
+        f.write(content)
+
+
+def main():
+    """主函数"""
+    # 获取项目根目录
+    project_root = Path(__file__).parent.parent.parent
+
+    # 文件路径
+    config_path = project_root / 'config' / 'local.yaml'
+    env_path = project_root / '.env'
+
+    print("=" * 70)
+    print("  环境变量生成器")
+    print("=" * 70)
+    print()
+
+    try:
+        # 加载配置
+        print(f"📖 读取配置: {config_path.relative_to(project_root)}")
+        config = load_config(config_path)
+        print(f"   找到 {len(config)} 个顶级配置节")
+
+        # 生成 .env 内容
+        print(f"\n⚙️  生成环境变量...")
+        env_content = generate_env_content(config)
+
+        # 统计生成的环境变量数量
+        env_count = len([line for line in env_content.split('\n') if '=' in line])
+        print(f"   生成 {env_count} 个环境变量")
+
+        # 保存 .env 文件
+        print(f"\n💾 保存文件: {env_path.relative_to(project_root)}")
+        save_env_file(env_content, env_path)
+
+        print()
+        print("=" * 70)
+        print("  ✅ 环境变量生成成功")
+        print("=" * 70)
+        print()
+        print(f"输出文件: {env_path}")
+        print()
+        print("提示:")
+        print("  - .env 文件已添加到 .gitignore，不会提交到 Git")
+        print("  - 修改 config/local.yaml 后重新运行此脚本更新 .env")
+        print("  - 环境变量命名规则: 嵌套路径转大写并用下划线连接")
+        print("    例如: trilingual.chinese.enabled -> TRILINGUAL_CHINESE_ENABLED")
+        print()
+
+    except Exception as e:
+        print(f"\n❌ 错误: {e}", file=sys.stderr)
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/lib/generate_from_schema.py
+++ b/scripts/lib/generate_from_schema.py
@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+"""
+配置生成器 - 从 Schema 生成本地配置
+
+从 config/config.schema.yaml 读取配置元数据，生成 config/local.yaml。
+支持深度合并、自动生成密钥、保留现有值。
+"""
+
+import sys
+import secrets
+import string
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import yaml
+
+
+def generate_secret(length: int = 32) -> str:
+    """生成随机密钥"""
+    alphabet = string.ascii_letters + string.digits
+    return ''.join(secrets.choice(alphabet) for _ in range(length))
+
+
+def set_nested_value(data: Dict, path: str, value: Any) -> None:
+    """
+    设置嵌套字典的值
+
+    Args:
+        data: 目标字典
+        path: 点分隔的路径，如 "trilingual.chinese.enabled"
+        value: 要设置的值
+    """
+    keys = path.split('.')
+    current = data
+
+    for key in keys[:-1]:
+        if key not in current:
+            current[key] = {}
+        current = current[key]
+
+    current[keys[-1]] = value
+
+
+def get_nested_value(data: Dict, path: str, default: Any = None) -> Any:
+    """
+    获取嵌套字典的值
+
+    Args:
+        data: 源字典
+        path: 点分隔的路径
+        default: 默认值
+
+    Returns:
+        找到的值或默认值
+    """
+    keys = path.split('.')
+    current = data
+
+    try:
+        for key in keys:
+            current = current[key]
+        return current
+    except (KeyError, TypeError):
+        return default
+
+
+def deep_merge(base: Dict, overlay: Dict) -> Dict:
+    """
+    深度合并两个字典
+
+    overlay 中的值会覆盖 base 中的值，但会保留 base 中 overlay 没有的键。
+
+    Args:
+        base: 基础字典
+        overlay: 覆盖字典
+
+    Returns:
+        合并后的字典
+    """
+    result = base.copy()
+
+    for key, value in overlay.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            result[key] = deep_merge(result[key], value)
+        else:
+            result[key] = value
+
+    return result
+
+
+def infer_type(value: Any) -> Any:
+    """
+    推断并转换值的类型
+
+    Args:
+        value: 原始值
+
+    Returns:
+        转换后的值
+    """
+    if isinstance(value, bool):
+        return value
+    elif isinstance(value, int):
+        return value
+    elif isinstance(value, float):
+        return value
+    elif isinstance(value, str):
+        # 尝试转换为数字
+        try:
+            if '.' in value:
+                return float(value)
+            else:
+                return int(value)
+        except ValueError:
+            # 尝试转换为布尔值
+            if value.lower() in ('true', 'yes', 'on'):
+                return True
+            elif value.lower() in ('false', 'no', 'off'):
+                return False
+            return value
+    else:
+        return value
+
+
+def load_schema(schema_path: Path) -> List[Dict]:
+    """
+    加载配置 Schema
+
+    Args:
+        schema_path: Schema 文件路径
+
+    Returns:
+        Schema 字段列表
+    """
+    if not schema_path.exists():
+        raise FileNotFoundError(f"Schema 文件不存在: {schema_path}")
+
+    with open(schema_path, 'r', encoding='utf-8') as f:
+        schema = yaml.safe_load(f)
+
+    if not isinstance(schema, list):
+        raise ValueError("Schema 必须是列表格式")
+
+    return schema
+
+
+def load_existing_config(config_path: Path) -> Dict:
+    """
+    加载现有配置
+
+    Args:
+        config_path: 配置文件路径
+
+    Returns:
+        现有配置字典（如果文件不存在则返回空字典）
+    """
+    if not config_path.exists():
+        return {}
+
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = yaml.safe_load(f)
+
+    return config if config else {}
+
+
+def generate_config(schema: List[Dict], existing_config: Dict) -> Dict:
+    """
+    从 Schema 生成配置
+
+    Args:
+        schema: Schema 字段列表
+        existing_config: 现有配置
+
+    Returns:
+        生成的配置字典
+    """
+    config = {}
+
+    for field in schema:
+        section = field.get('section')
+        if not section:
+            continue
+
+        # 检查是否有现有值
+        existing_value = get_nested_value(existing_config, section)
+
+        if existing_value is not None:
+            # 保留现有值
+            set_nested_value(config, section, existing_value)
+        else:
+            # 生成新值
+            field_type = field.get('type', '')
+            auto_generate = field.get('auto_generate', False)
+            default_value = field.get('default')
+
+            if field_type == 'secret' and auto_generate:
+                # 自动生成密钥
+                value = generate_secret(32)
+            elif default_value is not None:
+                # 使用默认值
+                value = infer_type(default_value)
+            else:
+                # 跳过没有默认值的字段
+                continue
+
+            set_nested_value(config, section, value)
+
+    return config
+
+
+def save_config(config: Dict, config_path: Path) -> None:
+    """
+    保存配置到 YAML 文件
+
+    Args:
+        config: 配置字典
+        config_path: 输出文件路径
+    """
+    # 确保目录存在
+    config_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(config_path, 'w', encoding='utf-8') as f:
+        yaml.dump(
+            config,
+            f,
+            default_flow_style=False,
+            allow_unicode=True,
+            sort_keys=False,
+            indent=2
+        )
+
+
+def main():
+    """主函数"""
+    # 获取项目根目录
+    project_root = Path(__file__).parent.parent.parent
+
+    # 文件路径
+    schema_path = project_root / 'config' / 'config.schema.yaml'
+    config_path = project_root / 'config' / 'local.yaml'
+
+    print("=" * 70)
+    print("  配置生成器")
+    print("=" * 70)
+    print()
+
+    try:
+        # 加载 Schema
+        print(f"📖 读取 Schema: {schema_path.relative_to(project_root)}")
+        schema = load_schema(schema_path)
+        print(f"   找到 {len(schema)} 个配置字段")
+
+        # 加载现有配置
+        print(f"\n🔍 检查现有配置: {config_path.relative_to(project_root)}")
+        existing_config = load_existing_config(config_path)
+
+        if existing_config:
+            print(f"   找到现有配置，将保留已有值")
+        else:
+            print(f"   未找到现有配置，将使用默认值")
+
+        # 生成配置
+        print(f"\n⚙️  生成配置...")
+        config = generate_config(schema, existing_config)
+
+        # 深度合并（保留现有配置中 schema 未定义的字段）
+        if existing_config:
+            config = deep_merge(existing_config, config)
+
+        # 保存配置
+        print(f"\n💾 保存配置: {config_path.relative_to(project_root)}")
+        save_config(config, config_path)
+
+        print()
+        print("=" * 70)
+        print("  ✅ 配置生成成功")
+        print("=" * 70)
+        print()
+        print(f"配置文件: {config_path}")
+        print()
+        print("提示:")
+        print("  - 配置文件已添加到 .gitignore，不会提交到 Git")
+        print("  - 修改配置后重新运行此脚本可更新配置")
+        print("  - 现有值会被保留，新字段会使用默认值")
+        print()
+
+    except Exception as e:
+        print(f"\n❌ 错误: {e}", file=sys.stderr)
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@ -0,0 +1,87 @@
+#!/bin/bash
+# LightRAG 配置初始化脚本
+# 从 config.schema.yaml 自动生成 config/local.yaml 和 .env
+
+set -e  # 遇到错误立即退出
+
+# 获取脚本所在目录
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+echo "======================================================================"
+echo "  LightRAG 配置初始化"
+echo "======================================================================"
+echo ""
+
+# 检查 Python 版本
+echo "🔍 检查 Python 环境..."
+if ! command -v python3 &> /dev/null; then
+    echo "❌ 错误: 未找到 python3"
+    echo "   请先安装 Python 3.7 或更高版本"
+    exit 1
+fi
+
+python_version=$(python3 --version 2>&1 | awk '{print $2}')
+echo "   Python 版本: $python_version"
+
+# 检查必要的 Python 包
+echo ""
+echo "🔍 检查 Python 依赖..."
+if ! python3 -c "import yaml" 2>/dev/null; then
+    echo "   ⚠️  未找到 PyYAML，正在安装..."
+    pip install pyyaml
+else
+    echo "   ✓ PyYAML 已安装"
+fi
+
+# 检查 Schema 文件
+echo ""
+echo "🔍 检查配置 Schema..."
+SCHEMA_FILE="${PROJECT_ROOT}/config/config.schema.yaml"
+if [ ! -f "$SCHEMA_FILE" ]; then
+    echo "❌ 错误: 未找到 $SCHEMA_FILE"
+    exit 1
+fi
+echo "   ✓ Schema 文件存在: config/config.schema.yaml"
+
+# 生成 config/local.yaml
+echo ""
+echo "📝 生成本地配置..."
+python3 "${SCRIPT_DIR}/lib/generate_from_schema.py"
+
+# 检查生成是否成功
+if [ ! -f "${PROJECT_ROOT}/config/local.yaml" ]; then
+    echo "❌ 错误: config/local.yaml 生成失败"
+    exit 1
+fi
+
+# 生成 .env
+echo ""
+echo "📝 生成环境变量..."
+python3 "${SCRIPT_DIR}/lib/generate_env.py"
+
+# 检查生成是否成功
+if [ ! -f "${PROJECT_ROOT}/.env" ]; then
+    echo "❌ 错误: .env 生成失败"
+    exit 1
+fi
+
+echo ""
+echo "======================================================================"
+echo "  ✅ 配置初始化完成"
+echo "======================================================================"
+echo ""
+echo "生成的文件:"
+echo "  - config/local.yaml  (本地配置文件)"
+echo "  - .env               (环境变量文件)"
+echo ""
+echo "下一步:"
+echo "  1. 编辑 config/local.yaml 修改配置（可选）"
+echo "  2. 重新运行 ./scripts/setup.sh 更新 .env（如果修改了配置）"
+echo "  3. 启动 LightRAG 服务"
+echo ""
+echo "提示:"
+echo "  - 这两个文件已添加到 .gitignore，不会提交到 Git"
+echo "  - 密钥已自动生成，请妥善保管"
+echo "  - 修改配置后重新运行此脚本即可更新"
+echo ""