Add citation tracking and display system across backend and frontend components. Backend changes include citation.py for document attribution, enhanced query routes with citation metadata, improved prompt templates, and PostgreSQL schema updates. Frontend includes CitationMarker component, HoverCard UI, QuerySettings refinements, and ChatMessage enhancements for displaying document sources. Update dependencies and docker-compose test configuration for improved development workflow.
111 lines
3.9 KiB
Python
111 lines
3.9 KiB
Python
"""
|
|
Centralized configuration constants for LightRAG.
|
|
|
|
This module defines default values for configuration constants used across
|
|
different parts of the LightRAG system. Centralizing these values ensures
|
|
consistency and makes maintenance easier.
|
|
"""
|
|
|
|
# Default values for server settings
|
|
DEFAULT_WOKERS = 2
|
|
DEFAULT_MAX_GRAPH_NODES = 1000
|
|
|
|
# Default values for extraction settings
|
|
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for document processing
|
|
DEFAULT_MAX_GLEANING = 1
|
|
DEFAULT_ENTITY_NAME_MAX_LENGTH = 256
|
|
|
|
# Number of description fragments to trigger LLM summary
|
|
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 8
|
|
# Max description token size to trigger LLM summary
|
|
DEFAULT_SUMMARY_MAX_TOKENS = 1200
|
|
# Recommended LLM summary output length in tokens
|
|
DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
|
|
# Maximum token size sent to LLM for summary
|
|
DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
|
|
# Default entities to extract if ENTITY_TYPES is not specified in .env
|
|
DEFAULT_ENTITY_TYPES = [
|
|
"Person",
|
|
"Creature",
|
|
"Organization",
|
|
"Location",
|
|
"Event",
|
|
"Concept",
|
|
"Method",
|
|
"Content",
|
|
"Data",
|
|
"Artifact",
|
|
"NaturalObject",
|
|
]
|
|
|
|
# Separator for: description, source_id and relation-key fields(Can not be changed after data inserted)
|
|
GRAPH_FIELD_SEP = "<SEP>"
|
|
|
|
# Query and retrieval configuration defaults
|
|
DEFAULT_TOP_K = 40
|
|
DEFAULT_CHUNK_TOP_K = 20
|
|
DEFAULT_MAX_ENTITY_TOKENS = 6000
|
|
DEFAULT_MAX_RELATION_TOKENS = 8000
|
|
DEFAULT_MAX_TOTAL_TOKENS = 30000
|
|
DEFAULT_COSINE_THRESHOLD = 0.40 # Balanced: 0.35 too permissive, 0.45 breaks local mode
|
|
DEFAULT_RELATED_CHUNK_NUMBER = 8 # Increased from 5 for better context coverage
|
|
DEFAULT_KG_CHUNK_PICK_METHOD = "VECTOR"
|
|
|
|
# TODO: Deprated. All conversation_history messages is send to LLM.
|
|
DEFAULT_HISTORY_TURNS = 0
|
|
|
|
# Rerank configuration defaults
|
|
DEFAULT_MIN_RERANK_SCORE = 0.0
|
|
DEFAULT_RERANK_BINDING = "null"
|
|
|
|
# Default source ids limit in meta data for entity and relation
|
|
DEFAULT_MAX_SOURCE_IDS_PER_ENTITY = 300
|
|
DEFAULT_MAX_SOURCE_IDS_PER_RELATION = 300
|
|
### control chunk_ids limitation method: FIFO, FIFO
|
|
### FIFO: First in first out
|
|
### KEEP: Keep oldest (less merge action and faster)
|
|
SOURCE_IDS_LIMIT_METHOD_KEEP = "KEEP"
|
|
SOURCE_IDS_LIMIT_METHOD_FIFO = "FIFO"
|
|
DEFAULT_SOURCE_IDS_LIMIT_METHOD = SOURCE_IDS_LIMIT_METHOD_FIFO
|
|
VALID_SOURCE_IDS_LIMIT_METHODS = {
|
|
SOURCE_IDS_LIMIT_METHOD_KEEP,
|
|
SOURCE_IDS_LIMIT_METHOD_FIFO,
|
|
}
|
|
# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
|
|
DEFAULT_MAX_FILE_PATHS = 100
|
|
|
|
# Field length of file_path in Milvus Schema for entity and relation (Should not be changed)
|
|
# file_path must store all file paths up to the DEFAULT_MAX_FILE_PATHS limit within the metadata.
|
|
DEFAULT_MAX_FILE_PATH_LENGTH = 32768
|
|
# Placeholder for more file paths in meta data for entity and relation (Should not be changed)
|
|
DEFAULT_FILE_PATH_MORE_PLACEHOLDER = "truncated"
|
|
|
|
# Default temperature for LLM
|
|
DEFAULT_TEMPERATURE = 1.0
|
|
|
|
# Async configuration defaults
|
|
DEFAULT_MAX_ASYNC = 4 # Default maximum async operations
|
|
DEFAULT_MAX_PARALLEL_INSERT = 2 # Default maximum parallel insert operations
|
|
|
|
# Embedding configuration defaults
|
|
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding functions
|
|
DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations
|
|
|
|
# Gunicorn worker timeout
|
|
DEFAULT_TIMEOUT = 300
|
|
|
|
# Default llm and embedding timeout
|
|
DEFAULT_LLM_TIMEOUT = 180
|
|
DEFAULT_EMBEDDING_TIMEOUT = 30
|
|
|
|
# Logging configuration defaults
|
|
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
|
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
|
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
|
|
|
|
# Ollama server configuration defaults
|
|
DEFAULT_OLLAMA_MODEL_NAME = "lightrag"
|
|
DEFAULT_OLLAMA_MODEL_TAG = "latest"
|
|
DEFAULT_OLLAMA_MODEL_SIZE = 7365960935
|
|
DEFAULT_OLLAMA_CREATED_AT = "2024-01-15T00:00:00Z"
|
|
DEFAULT_OLLAMA_DIGEST = "sha256:lightrag"
|