LightRAG/.env.example
Taddeus 2255b91210 Adds initial LightRAG app integration with schema and processors
Introduces the LightRAG Retrieval-Augmented Generation framework as an Apolo app, including input/output schemas, types, and processors.
Adds Helm chart value processing, environment and persistence configurations, and output service discovery for deployment.
Includes scripts for generating type schemas and testing support, along with CI and linting setup tailored for the new app.
Provides a documentation loader script to ingest markdown files into LightRAG with flexible referencing modes.

Relates to MLO-469
2025-11-03 13:59:52 +02:00

216 lines
6.4 KiB
Text

# ==============================================
# LIGHTRAG CONFIGURATION
# ==============================================
OPENAI_API_KEY=sk-your-openai-api-key-here
# Server Configuration
HOST=0.0.0.0
PORT=9621
WEBUI_TITLE=LightRAG - Graph KB
WEBUI_DESCRIPTION=Graph-based RAG with PostgreSQL and pgvector
# LLM Configuration for LightRAG (OpenRouter)
LLM_BINDING=openai
LLM_MODEL=openai/o3-mini
LLM_BINDING_HOST=https://openrouter.ai/api/v1
LLM_BINDING_API_KEY=sk-your-openai-api-key-here
# Embedding Configuration for LightRAG (OpenAI direct)
# EMBEDDING_BINDING=openai
# EMBEDDING_MODEL=text-embedding-ada-002
# EMBEDDING_DIM=1536
# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
# EMBEDDING_BINDING_API_KEY=sk-your-openai-api-key-here
EMBEDDING_BINDING=openai
EMBEDDING_MODEL=gemini-embedding-001
EMBEDDING_DIM=3072
EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com/v1beta/openai/
EMBEDDING_BINDING_API_KEY=AI-your-gemini-api-key-here
# Storage Backend Configuration
LIGHTRAG_KV_STORAGE=PGKVStorage
LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
# PostgreSQL Configuration
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_USER=lightrag_user
POSTGRES_PASSWORD=lightrag_pass
POSTGRES_DATABASE=lightrag
POSTGRES_MAX_CONNECTIONS=12
# LightRAG Processing Configuration
ENABLE_LLM_CACHE=true
ENABLE_LLM_CACHE_FOR_EXTRACT=true
TIMEOUT=240
TEMPERATURE=0
MAX_ASYNC=4
MAX_TOKENS=32768
CHUNK_SIZE=1200
CHUNK_OVERLAP_SIZE=100
TOP_K=60
### ====================================================================================
# OLLAMA_EMULATING_MODEL_TAG=latest
# WORKERS=2
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
### Login Configuration
# AUTH_ACCOUNTS='admin:admin123,user1:pass456'
# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
# TOKEN_EXPIRE_HOURS=48
# GUEST_TOKEN_EXPIRE_HOURS=24
# JWT_ALGORITHM=HS256
### API-Key to access LightRAG Server API
# LIGHTRAG_API_KEY=your-secure-api-key-here
# WHITELIST_PATHS=/health,/api/*
### Optional SSL Configuration
# SSL=true
# SSL_CERTFILE=/path/to/cert.pem
# SSL_KEYFILE=/path/to/key.pem
### Directory Configuration (defaults to current working directory)
### Should not be set if deploy by docker (Set by Dockerfile instead of .env)
### Default value is ./inputs and ./rag_storage
# INPUT_DIR=<absolute_path_for_doc_input_dir>
# WORKING_DIR=<absolute_path_for_working_dir>
### Max nodes return from grap retrieval
# MAX_GRAPH_NODES=1000
### Logging level
# LOG_LEVEL=INFO
# VERBOSE=False
# LOG_MAX_BYTES=10485760
# LOG_BACKUP_COUNT=5
### Logfile location (defaults to current working directory)
# LOG_DIR=/path/to/log/directory
### Settings for RAG query
# HISTORY_TURNS=3
# COSINE_THRESHOLD=0.2
# TOP_K=60
# MAX_TOKEN_TEXT_CHUNK=4000
# MAX_TOKEN_RELATION_DESC=4000
# MAX_TOKEN_ENTITY_DESC=4000
### Entity and ralation summarization configuration
### Language: English, Chinese, French, German ...
# SUMMARY_LANGUAGE=English
### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
# FORCE_LLM_SUMMARY_ON_MERGE=6
### Max tokens for entity/relations description after merge
# MAX_TOKEN_SUMMARY=500
### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended)
# MAX_PARALLEL_INSERT=2
### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100
### LLM Configuration
# ENABLE_LLM_CACHE=true
# ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Time out in seconds for LLM, None for infinite timeout
# TIMEOUT=240
### Some models like o1-mini require temperature to be set to 1
# TEMPERATURE=0
### Max concurrency requests of LLM
# MAX_ASYNC=4
### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
### MAX_TOKENS: set as num_ctx option for Ollama by API Server
# MAX_TOKENS=32768
### LLM Binding type: openai, ollama, lollms, azure_openai
# LLM_BINDING=openai
# LLM_MODEL=gpt-4o
# LLM_BINDING_HOST=https://api.openai.com/v1
# LLM_BINDING_API_KEY=your_api_key
### Optional for Azure
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
### Embedding Configuration
### Embedding Binding type: openai, ollama, lollms, azure_openai
# EMBEDDING_BINDING=ollama
# EMBEDDING_MODEL=bge-m3:latest
# EMBEDDING_DIM=1024
# EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
# EMBEDDING_BINDING_HOST=http://localhost:11434
### Num of chunks send to Embedding in single request
# EMBEDDING_BATCH_NUM=32
### Max concurrency requests for Embedding
# EMBEDDING_FUNC_MAX_ASYNC=16
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
# MAX_EMBED_TOKENS=8192
### Optional for Azure
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
# AZURE_EMBEDDING_API_VERSION=2023-05-15
### Data storage selection
# LIGHTRAG_KV_STORAGE=PGKVStorage
# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
# LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
### TiDB Configuration (Deprecated)
# TIDB_HOST=localhost
# TIDB_PORT=4000
# TIDB_USER=your_username
# TIDB_PASSWORD='your_password'
# TIDB_DATABASE=your_database
### separating all data from difference Lightrag instances(deprecating)
# TIDB_WORKSPACE=default
### PostgreSQL Configuration
# POSTGRES_HOST=localhost
# POSTGRES_PORT=5432
# POSTGRES_USER=your_username
# POSTGRES_PASSWORD='your_password'
# POSTGRES_DATABASE=your_database
# POSTGRES_MAX_CONNECTIONS=12
### separating all data from difference Lightrag instances(deprecating)
# POSTGRES_WORKSPACE=default
### Neo4j Configuration
# NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
# NEO4J_USERNAME=neo4j
# NEO4J_PASSWORD='your_password'
### Independent AGM Configuration(not for AMG embedded in PostreSQL)
# AGE_POSTGRES_DB=
# AGE_POSTGRES_USER=
# AGE_POSTGRES_PASSWORD=
# AGE_POSTGRES_HOST=
# AGE_POSTGRES_PORT=8529
# AGE Graph Name(apply to PostgreSQL and independent AGM)
### AGE_GRAPH_NAME is precated
# AGE_GRAPH_NAME=lightrag
### MongoDB Configuration
# MONGO_URI=mongodb://root:root@localhost:27017/
# MONGO_DATABASE=LightRAG
### separating all data from difference Lightrag instances(deprecating)
# MONGODB_GRAPH=false
### Milvus Configuration
# MILVUS_URI=http://localhost:19530
# MILVUS_DB_NAME=lightrag
# MILVUS_USER=root
# MILVUS_PASSWORD=your_password
# MILVUS_TOKEN=your_token
### Qdrant
# QDRANT_URL=http://localhost:16333
# QDRANT_API_KEY=your-api-key
### Redis
# REDIS_URI=redis://localhost:6379