LightRAG/config/config.schema.yaml
Claude 0a48c633cd
Add Schema-Driven Configuration Pattern
Implement comprehensive configuration management system with:

**Core Components:**
- config/config.schema.yaml: Configuration metadata (single source of truth)
- scripts/lib/generate_from_schema.py: Schema → local.yaml generator
- scripts/lib/generate_env.py: local.yaml → .env converter
- scripts/setup.sh: One-click configuration initialization

**Key Features:**
- Deep merge logic preserves existing values
- Auto-generation of secrets (32-char random strings)
- Type inference for configuration values
- Nested YAML → flat environment variables
- Git-safe: local.yaml and .env excluded from version control

**Configuration Coverage:**
- Trilingual entity extractor (Chinese/English/Swedish)
- LightRAG API, database, vector DB settings
- LLM provider configuration
- Entity/relation extraction settings
- Security and performance tuning

**Documentation:**
- docs/ConfigurationGuide-zh.md: Complete usage guide with examples

**Usage:**
```bash
./scripts/setup.sh  # Generate config/local.yaml and .env
```

This enables centralized configuration management with automatic
secret generation and safe handling of sensitive data.
2025-11-19 19:33:13 +00:00

199 lines
5.6 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# LightRAG 三语言实体提取器配置 Schema
#
# 此文件定义配置字段的元数据,包括:
# - 字段路径section
# - 默认值default
# - 类型type: 留空为自动推断secret 为密钥)
# - 自动生成auto_generate: 密钥自动生成)
# - 描述description
#
# 运行 ./scripts/setup.sh 自动生成 config/local.yaml 和 .env
# ============================================================
# 三语言实体提取器配置
# ============================================================
# 通用配置
- section: trilingual.enabled
default: true
description: "Enable trilingual entity extractor (Chinese/English/Swedish)"
- section: trilingual.default_language
default: "en"
description: "Default language if not specified (zh/en/sv)"
- section: trilingual.lazy_loading
default: true
description: "Enable lazy loading (load models on-demand to save memory)"
# 中文配置HanLP
- section: trilingual.chinese.enabled
default: true
description: "Enable Chinese entity extraction (HanLP)"
- section: trilingual.chinese.model
default: "CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH"
description: "HanLP model name for Chinese"
- section: trilingual.chinese.cache_dir
default: ""
description: "HanLP model cache directory (empty = default ~/.hanlp)"
# 英文配置spaCy
- section: trilingual.english.enabled
default: true
description: "Enable English entity extraction (spaCy)"
- section: trilingual.english.model
default: "en_core_web_trf"
description: "spaCy model name for English (en_core_web_trf/en_core_web_lg/en_core_web_sm)"
- section: trilingual.english.batch_size
default: 32
description: "spaCy batch size for English processing"
# 瑞典语配置spaCy
- section: trilingual.swedish.enabled
default: true
description: "Enable Swedish entity extraction (spaCy)"
- section: trilingual.swedish.model
default: "sv_core_news_lg"
description: "spaCy model name for Swedish (sv_core_news_lg/sv_core_news_md/sv_core_news_sm)"
- section: trilingual.swedish.batch_size
default: 32
description: "spaCy batch size for Swedish processing"
# 性能配置
- section: trilingual.performance.max_text_length
default: 1000000
description: "Maximum text length to process (characters)"
- section: trilingual.performance.enable_gpu
default: false
description: "Enable GPU acceleration if available"
- section: trilingual.performance.num_threads
default: 4
description: "Number of threads for parallel processing"
# 缓存配置
- section: trilingual.cache.enabled
default: true
description: "Enable result caching"
- section: trilingual.cache.ttl
default: 3600
description: "Cache TTL in seconds (0 = no expiry)"
- section: trilingual.cache.max_size
default: 1000
description: "Maximum number of cached results"
# 日志配置
- section: trilingual.logging.level
default: "INFO"
description: "Logging level (DEBUG/INFO/WARNING/ERROR)"
- section: trilingual.logging.format
default: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
description: "Logging format string"
# ============================================================
# LightRAG 通用配置示例
# ============================================================
# API 密钥(自动生成)
- section: lightrag.api.secret_key
type: secret
auto_generate: true
description: "API secret key (auto-generated, 32 characters)"
# API 配置
- section: lightrag.api.host
default: "0.0.0.0"
description: "API server host"
- section: lightrag.api.port
default: 9621
description: "API server port"
- section: lightrag.api.debug
default: false
description: "Enable debug mode"
# 数据库配置
- section: lightrag.database.type
default: "sqlite"
description: "Database type (sqlite/postgres/mysql)"
- section: lightrag.database.path
default: "./data/lightrag.db"
description: "Database file path (for SQLite)"
# 向量数据库配置
- section: lightrag.vector_db.type
default: "nano"
description: "Vector database type (nano/milvus/qdrant/chroma)"
- section: lightrag.vector_db.dimension
default: 1536
description: "Vector dimension"
# LLM 配置
- section: lightrag.llm.provider
default: "openai"
description: "LLM provider (openai/anthropic/ollama/custom)"
- section: lightrag.llm.model
default: "gpt-4o-mini"
description: "LLM model name"
- section: lightrag.llm.api_key
type: secret
auto_generate: false
description: "LLM API key (user-provided)"
- section: lightrag.llm.base_url
default: ""
description: "Custom LLM base URL (optional)"
- section: lightrag.llm.max_tokens
default: 4096
description: "Maximum tokens per request"
- section: lightrag.llm.temperature
default: 0.0
description: "LLM temperature (0.0-1.0)"
# 实体提取配置
- section: lightrag.entity_extraction.max_gleaning
default: 1
description: "Entity extraction gleaning rounds (0=disabled, 1=enabled)"
- section: lightrag.entity_extraction.use_trilingual
default: false
description: "Use trilingual extractor instead of LLM (requires setup)"
# 关系提取配置
- section: lightrag.relation_extraction.enabled
default: true
description: "Enable relation extraction"
- section: lightrag.relation_extraction.method
default: "llm"
description: "Relation extraction method (llm/pattern/hybrid)"
# 安全配置
- section: lightrag.security.enable_api_key
default: false
description: "Require API key for requests"
- section: lightrag.security.allowed_origins
default: "*"
description: "CORS allowed origins (comma-separated)"
- section: lightrag.security.rate_limit
default: 100
description: "API rate limit (requests per minute per IP)"