MLO-340: Update lightrag settings (#10)

* Updates LLM and embedding configurations to use OpenRouter and Gemini * Renames and significantly expands environment configuration template Renames the environment example file to a standard hidden env template to align with common conventions. Extensively updates and reorganizes configuration options, adding detailed setup for LLM, embedding, storage backends, PostgreSQL, and overall LightRAG processing parameters. Comments out some legacy and optional configuration lines to streamline initial setup and clarify default recommended values. Updates gitignore to exclude various env-related files to protect sensitive keys and improve environment management. * Updates default config with improved LLM and processing settings * Adds openai-compatible environment file to .gitignore * Adds new environment files to ignore list
2025-07-24 17:49:50 +03:00 · 2025-07-24 17:49:50 +03:00 · f494b96a7b
commit f494b96a7b
parent e2a894ee5e
3 changed files with 114 additions and 51 deletions
--- a/.env.example
+++ b/.env.example
@ -1,12 +1,62 @@
-### This is sample file of .env
+# ==============================================
+# LIGHTRAG CONFIGURATION
+# ==============================================
+OPENAI_API_KEY=sk-your-openai-api-key-here

-
-### Server Configuration
+# Server Configuration
 HOST=0.0.0.0
 PORT=9621
-WEBUI_TITLE='My Graph KB'
-WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
-OLLAMA_EMULATING_MODEL_TAG=latest
+WEBUI_TITLE=LightRAG - Graph KB
+WEBUI_DESCRIPTION=Graph-based RAG with PostgreSQL and pgvector
+
+# LLM Configuration for LightRAG (OpenRouter)
+LLM_BINDING=openai
+LLM_MODEL=openai/o3-mini
+LLM_BINDING_HOST=https://openrouter.ai/api/v1
+LLM_BINDING_API_KEY=sk-your-openai-api-key-here
+
+
+# Embedding Configuration for LightRAG (OpenAI direct)
+# EMBEDDING_BINDING=openai
+# EMBEDDING_MODEL=text-embedding-ada-002
+# EMBEDDING_DIM=1536
+# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+# EMBEDDING_BINDING_API_KEY=sk-your-openai-api-key-here
+EMBEDDING_BINDING=openai
+EMBEDDING_MODEL=gemini-embedding-001 
+EMBEDDING_DIM=3072
+EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com/v1beta/openai/
+EMBEDDING_BINDING_API_KEY=AI-your-gemini-api-key-here
+
+# Storage Backend Configuration
+LIGHTRAG_KV_STORAGE=PGKVStorage
+LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
+
+# PostgreSQL Configuration
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_USER=lightrag_user
+POSTGRES_PASSWORD=lightrag_pass
+POSTGRES_DATABASE=lightrag
+POSTGRES_MAX_CONNECTIONS=12
+
+# LightRAG Processing Configuration
+ENABLE_LLM_CACHE=true
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+TIMEOUT=240
+TEMPERATURE=0
+MAX_ASYNC=4
+MAX_TOKENS=32768
+CHUNK_SIZE=1200
+CHUNK_OVERLAP_SIZE=100
+TOP_K=60
+
+### ====================================================================================
+
+
+# OLLAMA_EMULATING_MODEL_TAG=latest
 # WORKERS=2
 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080

@ -53,7 +103,7 @@ OLLAMA_EMULATING_MODEL_TAG=latest

 ### Entity and ralation summarization configuration
 ### Language: English, Chinese, French, German ...
-SUMMARY_LANGUAGE=English
+# SUMMARY_LANGUAGE=English
 ### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
 # FORCE_LLM_SUMMARY_ON_MERGE=6
 ### Max tokens for entity/relations description after merge
@ -66,34 +116,34 @@ SUMMARY_LANGUAGE=English
 # CHUNK_OVERLAP_SIZE=100

 ### LLM Configuration
-ENABLE_LLM_CACHE=true
-ENABLE_LLM_CACHE_FOR_EXTRACT=true
+# ENABLE_LLM_CACHE=true
+# ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Time out in seconds for LLM, None for infinite timeout
-TIMEOUT=240
+# TIMEOUT=240
 ### Some models like o1-mini require temperature to be set to 1
-TEMPERATURE=0
+# TEMPERATURE=0
 ### Max concurrency requests of LLM
-MAX_ASYNC=4
+# MAX_ASYNC=4
 ### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
 ### MAX_TOKENS: set as num_ctx option for Ollama by API Server
-MAX_TOKENS=32768
+# MAX_TOKENS=32768
 ### LLM Binding type: openai, ollama, lollms, azure_openai
-LLM_BINDING=openai
-LLM_MODEL=gpt-4o
-LLM_BINDING_HOST=https://api.openai.com/v1
-LLM_BINDING_API_KEY=your_api_key
+# LLM_BINDING=openai
+# LLM_MODEL=gpt-4o
+# LLM_BINDING_HOST=https://api.openai.com/v1
+# LLM_BINDING_API_KEY=your_api_key
 ### Optional for Azure
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
 # AZURE_OPENAI_DEPLOYMENT=gpt-4o

 ### Embedding Configuration
 ### Embedding Binding type: openai, ollama, lollms, azure_openai
-EMBEDDING_BINDING=ollama
-EMBEDDING_MODEL=bge-m3:latest
-EMBEDDING_DIM=1024
-EMBEDDING_BINDING_API_KEY=your_api_key
+# EMBEDDING_BINDING=ollama
+# EMBEDDING_MODEL=bge-m3:latest
+# EMBEDDING_DIM=1024
+# EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
-EMBEDDING_BINDING_HOST=http://localhost:11434
+# EMBEDDING_BINDING_HOST=http://localhost:11434
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=32
 ### Max concurrency requests for Embedding
@ -120,19 +170,19 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
 # TIDB_WORKSPACE=default

 ### PostgreSQL Configuration
-POSTGRES_HOST=localhost
-POSTGRES_PORT=5432
-POSTGRES_USER=your_username
-POSTGRES_PASSWORD='your_password'
-POSTGRES_DATABASE=your_database
-POSTGRES_MAX_CONNECTIONS=12
+# POSTGRES_HOST=localhost
+# POSTGRES_PORT=5432
+# POSTGRES_USER=your_username
+# POSTGRES_PASSWORD='your_password'
+# POSTGRES_DATABASE=your_database
+# POSTGRES_MAX_CONNECTIONS=12
 ### separating all data from difference Lightrag instances(deprecating)
 # POSTGRES_WORKSPACE=default

 ### Neo4j Configuration
-NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
-NEO4J_USERNAME=neo4j
-NEO4J_PASSWORD='your_password'
+# NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
+# NEO4J_USERNAME=neo4j
+# NEO4J_PASSWORD='your_password'

 ### Independent AGM Configuration(not for AMG embedded in PostreSQL)
 # AGE_POSTGRES_DB=
@ -146,21 +196,21 @@ NEO4J_PASSWORD='your_password'
 # AGE_GRAPH_NAME=lightrag

 ### MongoDB Configuration
-MONGO_URI=mongodb://root:root@localhost:27017/
-MONGO_DATABASE=LightRAG
+# MONGO_URI=mongodb://root:root@localhost:27017/
+# MONGO_DATABASE=LightRAG
 ### separating all data from difference Lightrag instances(deprecating)
 # MONGODB_GRAPH=false

 ### Milvus Configuration
-MILVUS_URI=http://localhost:19530
-MILVUS_DB_NAME=lightrag
+# MILVUS_URI=http://localhost:19530
+# MILVUS_DB_NAME=lightrag
 # MILVUS_USER=root
 # MILVUS_PASSWORD=your_password
 # MILVUS_TOKEN=your_token

 ### Qdrant
-QDRANT_URL=http://localhost:16333
+# QDRANT_URL=http://localhost:16333
 # QDRANT_API_KEY=your-api-key

 ### Redis
-REDIS_URI=redis://localhost:6379
+# REDIS_URI=redis://localhost:6379
--- a/.gitignore
+++ b/.gitignore
@ -15,8 +15,9 @@ rag_storage_test/
 .venv/
 env/
 venv/
-*.env*
-.env_example
+.env*
+!.env.template
+

 # Build / Distribution
 dist/
--- a/k8s-deploy/lightrag-minimal/values.yaml
+++ b/k8s-deploy/lightrag-minimal/values.yaml
@ -83,42 +83,54 @@ env:
  # Server configuration
  HOST: "0.0.0.0"
  PORT: "9621"
-  
+
  # Web UI configuration
-  WEBUI_TITLE: "Apolo Copilot - LightRAG"
+  WEBUI_TITLE: "LightRAG - Graph Knowledge Base"
  WEBUI_DESCRIPTION: "Simple and Fast Graph Based RAG System for Apolo Documentation"
-  
+
  # LLM configuration (OpenRouter)
  LLM_BINDING: "openai"
-  LLM_MODEL: "openai/o3-mini"
+  LLM_MODEL: "openai/gpt-4o"
  LLM_BINDING_HOST: "https://openrouter.ai/api/v1"
  # LLM_BINDING_API_KEY: Set via secret
-  
+  # OPENAI_API_KEY: Set via secret
+
  # Embedding configuration (Gemini)
  EMBEDDING_BINDING: "openai"
  EMBEDDING_MODEL: "gemini-embedding-001"
  EMBEDDING_DIM: "3072"
  EMBEDDING_BINDING_HOST: "https://generativelanguage.googleapis.com/v1beta/openai/"
  # EMBEDDING_BINDING_API_KEY: Set via secret
-  
+
  # Storage configuration - Minimal setup
  LIGHTRAG_KV_STORAGE: "PGKVStorage"
  LIGHTRAG_VECTOR_STORAGE: "PGVectorStorage"
  LIGHTRAG_DOC_STATUS_STORAGE: "PGDocStatusStorage"
-  LIGHTRAG_GRAPH_STORAGE: "NetworkXStorage"  # Local storage, no external DB needed
-  
+  LIGHTRAG_GRAPH_STORAGE: "NetworkXStorage" # Local storage, no external DB needed
+
  # PostgreSQL connection (internal service)
-  POSTGRES_HOST: "{{ include \"lightrag-minimal.fullname\" . }}-postgresql"
+  POSTGRES_HOST: '{{ include "lightrag-minimal.fullname" . }}-postgresql'
  POSTGRES_PORT: "5432"
  POSTGRES_USER: "lightrag_user"
  POSTGRES_DATABASE: "lightrag"
  POSTGRES_WORKSPACE: "default"

+  # LightRAG Processing Configuration
+  ENABLE_LLM_CACHE: true
+  ENABLE_LLM_CACHE_FOR_EXTRACT: true
+  TIMEOUT: 240
+  TEMPERATURE: 0
+  MAX_ASYNC: 4
+  MAX_TOKENS: 32768
+  CHUNK_SIZE: 1200
+  CHUNK_OVERLAP_SIZE: 100
+  TOP_K: 60
+
 # Secret configuration for API keys
 secrets:
  # Create a secret with your OpenAI API key
-  openaiApiKey: ""  # Set this or create manually
-  
+  openaiApiKey: "" # Set this or create manually
+
 # Node selector and affinity
 nodeSelector: {}
 tolerations: []
@ -152,4 +164,4 @@ healthCheck:
  initialDelaySeconds: 60
  periodSeconds: 30
  timeoutSeconds: 10
-  failureThreshold: 5
+  failureThreshold: 5