MLO-340: Update lightrag settings (#10)

* Updates LLM and embedding configurations to use OpenRouter and Gemini

* Renames and significantly expands environment configuration template

Renames the environment example file to a standard hidden env template to align with common conventions.

Extensively updates and reorganizes configuration options, adding detailed setup for LLM, embedding, storage backends, PostgreSQL, and overall LightRAG processing parameters.

Comments out some legacy and optional configuration lines to streamline initial setup and clarify default recommended values.

Updates gitignore to exclude various env-related files to protect sensitive keys and improve environment management.

* Updates default config with improved LLM and processing settings

* Adds openai-compatible environment file to .gitignore

* Adds new environment files to ignore list
This commit is contained in:
Taddeus 2025-07-24 17:49:50 +03:00 committed by GitHub
parent e2a894ee5e
commit f494b96a7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 114 additions and 51 deletions

View file

@ -1,12 +1,62 @@
### This is sample file of .env
# ==============================================
# LIGHTRAG CONFIGURATION
# ==============================================
OPENAI_API_KEY=sk-your-openai-api-key-here
### Server Configuration
# Server Configuration
HOST=0.0.0.0
PORT=9621
WEBUI_TITLE='My Graph KB'
WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
OLLAMA_EMULATING_MODEL_TAG=latest
WEBUI_TITLE=LightRAG - Graph KB
WEBUI_DESCRIPTION=Graph-based RAG with PostgreSQL and pgvector
# LLM Configuration for LightRAG (OpenRouter)
LLM_BINDING=openai
LLM_MODEL=openai/o3-mini
LLM_BINDING_HOST=https://openrouter.ai/api/v1
LLM_BINDING_API_KEY=sk-your-openai-api-key-here
# Embedding Configuration for LightRAG (OpenAI direct)
# EMBEDDING_BINDING=openai
# EMBEDDING_MODEL=text-embedding-ada-002
# EMBEDDING_DIM=1536
# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
# EMBEDDING_BINDING_API_KEY=sk-your-openai-api-key-here
EMBEDDING_BINDING=openai
EMBEDDING_MODEL=gemini-embedding-001
EMBEDDING_DIM=3072
EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com/v1beta/openai/
EMBEDDING_BINDING_API_KEY=AI-your-gemini-api-key-here
# Storage Backend Configuration
LIGHTRAG_KV_STORAGE=PGKVStorage
LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
# PostgreSQL Configuration
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_USER=lightrag_user
POSTGRES_PASSWORD=lightrag_pass
POSTGRES_DATABASE=lightrag
POSTGRES_MAX_CONNECTIONS=12
# LightRAG Processing Configuration
ENABLE_LLM_CACHE=true
ENABLE_LLM_CACHE_FOR_EXTRACT=true
TIMEOUT=240
TEMPERATURE=0
MAX_ASYNC=4
MAX_TOKENS=32768
CHUNK_SIZE=1200
CHUNK_OVERLAP_SIZE=100
TOP_K=60
### ====================================================================================
# OLLAMA_EMULATING_MODEL_TAG=latest
# WORKERS=2
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
@ -53,7 +103,7 @@ OLLAMA_EMULATING_MODEL_TAG=latest
### Entity and ralation summarization configuration
### Language: English, Chinese, French, German ...
SUMMARY_LANGUAGE=English
# SUMMARY_LANGUAGE=English
### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
# FORCE_LLM_SUMMARY_ON_MERGE=6
### Max tokens for entity/relations description after merge
@ -66,34 +116,34 @@ SUMMARY_LANGUAGE=English
# CHUNK_OVERLAP_SIZE=100
### LLM Configuration
ENABLE_LLM_CACHE=true
ENABLE_LLM_CACHE_FOR_EXTRACT=true
# ENABLE_LLM_CACHE=true
# ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Time out in seconds for LLM, None for infinite timeout
TIMEOUT=240
# TIMEOUT=240
### Some models like o1-mini require temperature to be set to 1
TEMPERATURE=0
# TEMPERATURE=0
### Max concurrency requests of LLM
MAX_ASYNC=4
# MAX_ASYNC=4
### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
### MAX_TOKENS: set as num_ctx option for Ollama by API Server
MAX_TOKENS=32768
# MAX_TOKENS=32768
### LLM Binding type: openai, ollama, lollms, azure_openai
LLM_BINDING=openai
LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key
# LLM_BINDING=openai
# LLM_MODEL=gpt-4o
# LLM_BINDING_HOST=https://api.openai.com/v1
# LLM_BINDING_API_KEY=your_api_key
### Optional for Azure
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
### Embedding Configuration
### Embedding Binding type: openai, ollama, lollms, azure_openai
EMBEDDING_BINDING=ollama
EMBEDDING_MODEL=bge-m3:latest
EMBEDDING_DIM=1024
EMBEDDING_BINDING_API_KEY=your_api_key
# EMBEDDING_BINDING=ollama
# EMBEDDING_MODEL=bge-m3:latest
# EMBEDDING_DIM=1024
# EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
EMBEDDING_BINDING_HOST=http://localhost:11434
# EMBEDDING_BINDING_HOST=http://localhost:11434
### Num of chunks send to Embedding in single request
# EMBEDDING_BATCH_NUM=32
### Max concurrency requests for Embedding
@ -120,19 +170,19 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
# TIDB_WORKSPACE=default
### PostgreSQL Configuration
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_USER=your_username
POSTGRES_PASSWORD='your_password'
POSTGRES_DATABASE=your_database
POSTGRES_MAX_CONNECTIONS=12
# POSTGRES_HOST=localhost
# POSTGRES_PORT=5432
# POSTGRES_USER=your_username
# POSTGRES_PASSWORD='your_password'
# POSTGRES_DATABASE=your_database
# POSTGRES_MAX_CONNECTIONS=12
### separating all data from difference Lightrag instances(deprecating)
# POSTGRES_WORKSPACE=default
### Neo4j Configuration
NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD='your_password'
# NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
# NEO4J_USERNAME=neo4j
# NEO4J_PASSWORD='your_password'
### Independent AGM Configuration(not for AMG embedded in PostreSQL)
# AGE_POSTGRES_DB=
@ -146,21 +196,21 @@ NEO4J_PASSWORD='your_password'
# AGE_GRAPH_NAME=lightrag
### MongoDB Configuration
MONGO_URI=mongodb://root:root@localhost:27017/
MONGO_DATABASE=LightRAG
# MONGO_URI=mongodb://root:root@localhost:27017/
# MONGO_DATABASE=LightRAG
### separating all data from difference Lightrag instances(deprecating)
# MONGODB_GRAPH=false
### Milvus Configuration
MILVUS_URI=http://localhost:19530
MILVUS_DB_NAME=lightrag
# MILVUS_URI=http://localhost:19530
# MILVUS_DB_NAME=lightrag
# MILVUS_USER=root
# MILVUS_PASSWORD=your_password
# MILVUS_TOKEN=your_token
### Qdrant
QDRANT_URL=http://localhost:16333
# QDRANT_URL=http://localhost:16333
# QDRANT_API_KEY=your-api-key
### Redis
REDIS_URI=redis://localhost:6379
# REDIS_URI=redis://localhost:6379

5
.gitignore vendored
View file

@ -15,8 +15,9 @@ rag_storage_test/
.venv/
env/
venv/
*.env*
.env_example
.env*
!.env.template
# Build / Distribution
dist/

View file

@ -83,42 +83,54 @@ env:
# Server configuration
HOST: "0.0.0.0"
PORT: "9621"
# Web UI configuration
WEBUI_TITLE: "Apolo Copilot - LightRAG"
WEBUI_TITLE: "LightRAG - Graph Knowledge Base"
WEBUI_DESCRIPTION: "Simple and Fast Graph Based RAG System for Apolo Documentation"
# LLM configuration (OpenRouter)
LLM_BINDING: "openai"
LLM_MODEL: "openai/o3-mini"
LLM_MODEL: "openai/gpt-4o"
LLM_BINDING_HOST: "https://openrouter.ai/api/v1"
# LLM_BINDING_API_KEY: Set via secret
# OPENAI_API_KEY: Set via secret
# Embedding configuration (Gemini)
EMBEDDING_BINDING: "openai"
EMBEDDING_MODEL: "gemini-embedding-001"
EMBEDDING_DIM: "3072"
EMBEDDING_BINDING_HOST: "https://generativelanguage.googleapis.com/v1beta/openai/"
# EMBEDDING_BINDING_API_KEY: Set via secret
# Storage configuration - Minimal setup
LIGHTRAG_KV_STORAGE: "PGKVStorage"
LIGHTRAG_VECTOR_STORAGE: "PGVectorStorage"
LIGHTRAG_DOC_STATUS_STORAGE: "PGDocStatusStorage"
LIGHTRAG_GRAPH_STORAGE: "NetworkXStorage" # Local storage, no external DB needed
LIGHTRAG_GRAPH_STORAGE: "NetworkXStorage" # Local storage, no external DB needed
# PostgreSQL connection (internal service)
POSTGRES_HOST: "{{ include \"lightrag-minimal.fullname\" . }}-postgresql"
POSTGRES_HOST: '{{ include "lightrag-minimal.fullname" . }}-postgresql'
POSTGRES_PORT: "5432"
POSTGRES_USER: "lightrag_user"
POSTGRES_DATABASE: "lightrag"
POSTGRES_WORKSPACE: "default"
# LightRAG Processing Configuration
ENABLE_LLM_CACHE: true
ENABLE_LLM_CACHE_FOR_EXTRACT: true
TIMEOUT: 240
TEMPERATURE: 0
MAX_ASYNC: 4
MAX_TOKENS: 32768
CHUNK_SIZE: 1200
CHUNK_OVERLAP_SIZE: 100
TOP_K: 60
# Secret configuration for API keys
secrets:
# Create a secret with your OpenAI API key
openaiApiKey: "" # Set this or create manually
openaiApiKey: "" # Set this or create manually
# Node selector and affinity
nodeSelector: {}
tolerations: []
@ -152,4 +164,4 @@ healthCheck:
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 5
failureThreshold: 5