########################################################################################### ### LightRAG Configuration - Optimized for Books, Articles, and Podcast Transcripts ### ### FILL IN THE FOLLOWING PLACEHOLDERS: ### - YOUR_OPENAI_API_KEY_HERE (for GPT-5-mini) ### - YOUR_VOYAGE_API_KEY_HERE (for voyage-3-large embeddings) ### - YOUR_JINA_API_KEY_HERE (for Jina reranker) ### - Neo4j connection details (URI, username, password, database) ### - Postgres connection details (host, port, user, password, database) ########################################################################################### ########################### ### Server Configuration ########################### HOST=0.0.0.0 PORT=9621 WEBUI_TITLE='My Graph KB' WEBUI_DESCRIPTION="Knowledge Graph for Books, Articles & Podcasts" ### Worker Configuration ### Optimized for Dell T140 (6-core Xeon E-2226G, 32GB RAM) WORKERS=3 TIMEOUT=180 ### Directory Configuration ### Uncomment and set if you want custom paths (otherwise defaults to ./inputs and ./rag_storage) # INPUT_DIR=/path/to/your/inputs # WORKING_DIR=/path/to/your/working_dir ### Logging LOG_LEVEL=INFO # LOG_DIR=/path/to/logs ##################################### ### Authentication (Optional) ##################################### ### Uncomment if you want to secure your API # LIGHTRAG_API_KEY=your-secure-api-key-here # AUTH_ACCOUNTS='admin:admin123' # TOKEN_SECRET=Your-Secret-Key-For-JWT # TOKEN_EXPIRE_HOURS=48 ###################################################################################### ### Query Configuration ### Optimized for long-form content (books, articles, transcripts) ###################################################################################### ENABLE_LLM_CACHE=true ### Increase these for better context with long documents TOP_K=50 CHUNK_TOP_K=25 MAX_ENTITY_TOKENS=8000 MAX_RELATION_TOKENS=10000 MAX_TOTAL_TOKENS=40000 ### Use VECTOR method for better semantic matching KG_CHUNK_PICK_METHOD=VECTOR ######################################################### ### Reranking Configuration - JINA AI ######################################################### RERANK_BINDING=jina RERANK_BY_DEFAULT=true RERANK_MODEL=jina-reranker-v2-base-multilingual RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank RERANK_BINDING_API_KEY=YOUR_JINA_API_KEY_HERE ### Keep all chunks for reranking (let Jina do the filtering) MIN_RERANK_SCORE=0.0 ######################################## ### Document Processing Configuration ######################################## ENABLE_LLM_CACHE_FOR_EXTRACT=true SUMMARY_LANGUAGE=English ### Chunk size optimized for long-form content CHUNK_SIZE=1200 CHUNK_OVERLAP_SIZE=100 ### Entity types optimized for mixed content (books, articles, podcasts) ENTITY_TYPES='["Person", "Organization", "Location", "Event", "Concept", "Method", "Theory", "Technology", "Product", "Research", "Topic"]' ### Summary configuration FORCE_LLM_SUMMARY_ON_MERGE=8 SUMMARY_MAX_TOKENS=1500 SUMMARY_LENGTH_RECOMMENDED_=800 SUMMARY_CONTEXT_SIZE=15000 ### Related chunks per entity/relation RELATED_CHUNK_NUMBER=5 ############################### ### Concurrency Configuration ### Optimized for Dell T140 (6-core Xeon E-2226G, 32GB RAM) ############################### MAX_ASYNC=6 MAX_PARALLEL_INSERT=2 EMBEDDING_FUNC_MAX_ASYNC=12 EMBEDDING_BATCH_NUM=10 ########################################################################### ### LLM Configuration - OpenAI GPT-5-mini ########################################################################### LLM_BINDING=openai LLM_MODEL=gpt-5-mini LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=YOUR_OPENAI_API_KEY_HERE LLM_TIMEOUT=180 ### GPT-5 configuration OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 OPENAI_LLM_TEMPERATURE=0.7 ####################################################################################### ### Embedding Configuration - Voyage AI (voyage-3-large) ### ⚠️ IMPORTANT: Do NOT change after processing your first file! ####################################################################################### EMBEDDING_BINDING=openai EMBEDDING_MODEL=voyage-3-large EMBEDDING_DIM=2048 EMBEDDING_SEND_DIM=false EMBEDDING_TOKEN_LIMIT=32000 EMBEDDING_BINDING_HOST=https://api.voyageai.com/v1 EMBEDDING_BINDING_API_KEY=YOUR_VOYAGE_API_KEY_HERE EMBEDDING_TIMEOUT=60 #################################################################### ### Workspace Configuration (Optional) ### Uncomment to isolate data between different LightRAG instances #################################################################### # WORKSPACE=my_knowledge_base ############################ ### Storage Configuration ############################ ### Using Neo4j for graph + Postgres for everything else ### Graph Storage LIGHTRAG_GRAPH_STORAGE=Neo4JStorage ### Vector, KV, and Doc Status Storage (All in Postgres) LIGHTRAG_VECTOR_STORAGE=PGVectorStorage LIGHTRAG_KV_STORAGE=PGKVStorage LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage ########################### ### PostgreSQL Configuration ### FILL IN YOUR DETAILS: ### - POSTGRES_HOST: Docker container name (e.g., 'postgres') or IP address ### - POSTGRES_PORT: Usually 5432 ### - POSTGRES_USER: Your postgres username ### - POSTGRES_PASSWORD: Your postgres password ### - POSTGRES_DATABASE: Database name (e.g., 'lightrag') ########################### POSTGRES_HOST=YOUR_POSTGRES_HOST_HERE POSTGRES_PORT=5432 POSTGRES_USER=YOUR_POSTGRES_USER_HERE POSTGRES_PASSWORD='YOUR_POSTGRES_PASSWORD_HERE' POSTGRES_DATABASE=YOUR_POSTGRES_DATABASE_HERE POSTGRES_MAX_CONNECTIONS=20 ### PostgreSQL Vector Storage - HNSW index (best for quality) POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_HNSW_M=16 POSTGRES_HNSW_EF=200 ### Connection retry configuration (good for Docker networking) POSTGRES_CONNECTION_RETRIES=3 POSTGRES_CONNECTION_RETRY_BACKOFF=0.5 POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=5.0 POSTGRES_POOL_CLOSE_TIMEOUT=5.0 ########################### ### Neo4j Configuration ### FILL IN YOUR DETAILS: ### - NEO4J_URI: Connection string ### Examples: ### bolt://neo4j:7687 (Docker, no SSL) ### neo4j://neo4j:7687 (Docker, no SSL) ### neo4j+s://your-instance.databases.neo4j.io (Cloud with SSL) ### - NEO4J_USERNAME: Usually 'neo4j' ### - NEO4J_PASSWORD: Your neo4j password ### - NEO4J_DATABASE: Database name (usually 'neo4j') ########################### NEO4J_URI=YOUR_NEO4J_URI_HERE NEO4J_USERNAME=neo4j NEO4J_PASSWORD='YOUR_NEO4J_PASSWORD_HERE' NEO4J_DATABASE=neo4j ### Neo4j connection pool settings (optimized for your Dell T140) NEO4J_MAX_CONNECTION_POOL_SIZE=75 NEO4J_CONNECTION_TIMEOUT=30 NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30 NEO4J_MAX_TRANSACTION_RETRY_TIME=30 NEO4J_MAX_CONNECTION_LIFETIME=300 NEO4J_LIVENESS_CHECK_TIMEOUT=30 NEO4J_KEEP_ALIVE=true ########################################################################################### ### WORKER CONFIGURATION - OPTIMIZED FOR YOUR DELL T140 ########################################################################################### ### Current settings optimized for: ### - CPU: Intel Xeon E-2226G (6 cores @ 3.40GHz) ### - RAM: 32GB (currently 65% utilized, ~11GB free) ### - Current load: 10-15% CPU usage ### ### Settings: ### WORKERS=3 - 3 Gunicorn server processes (good for 6-core CPU) ### MAX_ASYNC=6 - 6 concurrent LLM API requests (balanced for API limits) ### MAX_PARALLEL_INSERT=2 - 2 documents processed in parallel (prevents API overload) ### EMBEDDING_FUNC_MAX_ASYNC=12 - 12 concurrent embedding requests (embeddings are lighter) ### ### These settings utilize ~40-50% of your CPU during heavy processing, leaving plenty ### of headroom for your other Unraid services. API rate limits are the bottleneck, not ### your server hardware. ### ### If you experience OpenAI rate limit errors, reduce MAX_ASYNC to 4. ### If your server load stays very low (<20%), you can increase MAX_ASYNC to 8. ########################################################################################### ########################################################################################### ### QUICK START CHECKLIST ########################################################################################### ### 1. Fill in API keys: ### - YOUR_OPENAI_API_KEY_HERE (for GPT-5-mini) ### - YOUR_VOYAGE_API_KEY_HERE (for voyage-3-large embeddings) ### - YOUR_JINA_API_KEY_HERE (for reranking) ### ### 2. Fill in Postgres details: ### - POSTGRES_HOST (e.g., 'postgres' if Docker container name) ### - POSTGRES_USER ### - POSTGRES_PASSWORD ### - POSTGRES_DATABASE (create a database named 'lightrag' or similar) ### ### 3. Fill in Neo4j details: ### - NEO4J_URI (e.g., bolt://neo4j:7687 or neo4j://neo4j:7687) ### - NEO4J_PASSWORD ### - NEO4J_DATABASE (usually 'neo4j') ### ### 4. Verify your Postgres has pgvector extension enabled: ### Run in psql: CREATE EXTENSION IF NOT EXISTS vector; ### ### 5. Start LightRAG and verify it connects to both databases ### ### 6. (Optional) Adjust WORKERS/MAX_ASYNC based on your server specs (see guide above) ### ### 7. (Optional) Enable authentication by uncommenting LIGHTRAG_API_KEY ###########################################################################################