Fix logging output in evaluation test harness and examples: - Replace print() statements with logger calls in e2e_test_harness.py - Update copy_llm_cache_to_another_storage.py to use logger instead of print - Remove redundant logging configuration in copy_llm_cache_to_another_storage.py Fix path handling and typos: - Correct makedirs() call in lightrag_openai_demo.py to create log_dir directly - Update constants.py comments to clarify SOURCE_IDS_LIMIT_METHOD options - Remove duplicate return statement in utils.py normalize_extracted_info() - Fix error string formatting in chroma_impl.py with !s conversion - Remove unused pipmaster import from chroma_impl.py
111 lines
3.9 KiB
Python
111 lines
3.9 KiB
Python
"""
|
|
Centralized configuration constants for LightRAG.
|
|
|
|
This module defines default values for configuration constants used across
|
|
different parts of the LightRAG system. Centralizing these values ensures
|
|
consistency and makes maintenance easier.
|
|
"""
|
|
|
|
# Default values for server settings
|
|
DEFAULT_WOKERS = 2
|
|
DEFAULT_MAX_GRAPH_NODES = 1000
|
|
|
|
# Default values for extraction settings
|
|
DEFAULT_SUMMARY_LANGUAGE = 'English' # Default language for document processing
|
|
DEFAULT_MAX_GLEANING = 1
|
|
DEFAULT_ENTITY_NAME_MAX_LENGTH = 256
|
|
|
|
# Number of description fragments to trigger LLM summary
|
|
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 8
|
|
# Max description token size to trigger LLM summary
|
|
DEFAULT_SUMMARY_MAX_TOKENS = 1200
|
|
# Recommended LLM summary output length in tokens
|
|
DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
|
|
# Maximum token size sent to LLM for summary
|
|
DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
|
|
# Default entities to extract if ENTITY_TYPES is not specified in .env
|
|
DEFAULT_ENTITY_TYPES = [
|
|
'Person',
|
|
'Creature',
|
|
'Organization',
|
|
'Location',
|
|
'Event',
|
|
'Concept',
|
|
'Method',
|
|
'Content',
|
|
'Data',
|
|
'Artifact',
|
|
'NaturalObject',
|
|
]
|
|
|
|
# Separator for: description, source_id and relation-key fields(Can not be changed after data inserted)
|
|
GRAPH_FIELD_SEP = '<SEP>'
|
|
|
|
# Query and retrieval configuration defaults
|
|
DEFAULT_TOP_K = 40
|
|
DEFAULT_CHUNK_TOP_K = 20
|
|
DEFAULT_MAX_ENTITY_TOKENS = 6000
|
|
DEFAULT_MAX_RELATION_TOKENS = 8000
|
|
DEFAULT_MAX_TOTAL_TOKENS = 30000
|
|
DEFAULT_COSINE_THRESHOLD = 0.40 # Balanced: 0.35 too permissive, 0.45 breaks local mode
|
|
DEFAULT_RELATED_CHUNK_NUMBER = 8 # Increased from 5 for better context coverage
|
|
DEFAULT_KG_CHUNK_PICK_METHOD = 'VECTOR'
|
|
|
|
# TODO: Deprated. All conversation_history messages is send to LLM.
|
|
DEFAULT_HISTORY_TURNS = 0
|
|
|
|
# Rerank configuration defaults
|
|
DEFAULT_MIN_RERANK_SCORE = 0.0
|
|
DEFAULT_RERANK_BINDING = 'null'
|
|
|
|
# Default source ids limit in meta data for entity and relation
|
|
DEFAULT_MAX_SOURCE_IDS_PER_ENTITY = 300
|
|
DEFAULT_MAX_SOURCE_IDS_PER_RELATION = 300
|
|
### control chunk_ids limitation method: KEEP, FIFO
|
|
### KEEP: Keep oldest (less merge action and faster)
|
|
### FIFO: First in first out
|
|
SOURCE_IDS_LIMIT_METHOD_KEEP = 'KEEP'
|
|
SOURCE_IDS_LIMIT_METHOD_FIFO = 'FIFO'
|
|
DEFAULT_SOURCE_IDS_LIMIT_METHOD = SOURCE_IDS_LIMIT_METHOD_FIFO
|
|
VALID_SOURCE_IDS_LIMIT_METHODS = {
|
|
SOURCE_IDS_LIMIT_METHOD_KEEP,
|
|
SOURCE_IDS_LIMIT_METHOD_FIFO,
|
|
}
|
|
# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
|
|
DEFAULT_MAX_FILE_PATHS = 100
|
|
|
|
# Field length of file_path in Milvus Schema for entity and relation (Should not be changed)
|
|
# file_path must store all file paths up to the DEFAULT_MAX_FILE_PATHS limit within the metadata.
|
|
DEFAULT_MAX_FILE_PATH_LENGTH = 32768
|
|
# Placeholder for more file paths in meta data for entity and relation (Should not be changed)
|
|
DEFAULT_FILE_PATH_MORE_PLACEHOLDER = 'truncated'
|
|
|
|
# Default temperature for LLM
|
|
DEFAULT_TEMPERATURE = 1.0
|
|
|
|
# Async configuration defaults
|
|
DEFAULT_MAX_ASYNC = 4 # Default maximum async operations
|
|
DEFAULT_MAX_PARALLEL_INSERT = 2 # Default maximum parallel insert operations
|
|
|
|
# Embedding configuration defaults
|
|
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding functions
|
|
DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations
|
|
|
|
# Gunicorn worker timeout
|
|
DEFAULT_TIMEOUT = 300
|
|
|
|
# Default llm and embedding timeout
|
|
DEFAULT_LLM_TIMEOUT = 180
|
|
DEFAULT_EMBEDDING_TIMEOUT = 30
|
|
|
|
# Logging configuration defaults
|
|
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
|
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
|
DEFAULT_LOG_FILENAME = 'lightrag.log' # Default log filename
|
|
|
|
# Ollama server configuration defaults
|
|
DEFAULT_OLLAMA_MODEL_NAME = 'lightrag'
|
|
DEFAULT_OLLAMA_MODEL_TAG = 'latest'
|
|
DEFAULT_OLLAMA_MODEL_SIZE = 7365960935
|
|
DEFAULT_OLLAMA_CREATED_AT = '2024-01-15T00:00:00Z'
|
|
DEFAULT_OLLAMA_DIGEST = 'sha256:lightrag'
|