From e0fd31a60d5e346b4cd9566d114789fab915fcbd Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 20 Oct 2025 22:09:09 +0800 Subject: [PATCH] Fix logging message formatting --- env.example | 26 +++++++++++++------------- lightrag/constants.py | 6 +++--- lightrag/operate.py | 16 ++++++++++------ 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/env.example b/env.example index 3529cf58..73f2d7b7 100644 --- a/env.example +++ b/env.example @@ -74,19 +74,6 @@ ENABLE_LLM_CACHE=true ### control the maximum tokens send to LLM (include entities, relations and chunks) # MAX_TOTAL_TOKENS=30000 -### control the maximum chunk_ids stored in vector and graph db -# MAX_SOURCE_IDS_PER_ENTITY=300 -# MAX_SOURCE_IDS_PER_RELATION=300 -### control chunk_ids limitation method: KEEP, FIFO (KEPP: Ingore New Chunks, FIFO: New chunks replace old chunks) -# SOURCE_IDS_LIMIT_METHOD=KEEP -### Maximum number of file paths stored in entity/relation file_path field -# MAX_FILE_PATHS=30 - -### maximum number of related chunks per source entity or relation -### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) -### Higher values increase re-ranking time -# RELATED_CHUNK_NUMBER=5 - ### chunk selection strategies ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM @@ -148,6 +135,19 @@ SUMMARY_LANGUAGE=English ### Maximum context size sent to LLM for description summary # SUMMARY_CONTEXT_SIZE=12000 +### control the maximum chunk_ids stored in vector and graph db +# MAX_SOURCE_IDS_PER_ENTITY=300 +# MAX_SOURCE_IDS_PER_RELATION=300 +### control chunk_ids limitation method: KEEP, FIFO (KEEP: Keep oldest, FIFO: First in first out) +# SOURCE_IDS_LIMIT_METHOD=KEEP +### Maximum number of file paths stored in entity/relation file_path field +# MAX_FILE_PATHS=30 + +### maximum number of related chunks per source entity or relation +### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) +### Higher values increase re-ranking time +# RELATED_CHUNK_NUMBER=5 + ############################### ### Concurrency Configuration ############################### diff --git a/lightrag/constants.py b/lightrag/constants.py index 62ca1888..ad12cccf 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -60,14 +60,14 @@ DEFAULT_RERANK_BINDING = "null" # Default source ids limit in meta data for entity and relation DEFAULT_MAX_SOURCE_IDS_PER_ENTITY = 3 DEFAULT_MAX_SOURCE_IDS_PER_RELATION = 3 -SOURCE_IDS_LIMIT_METHOD_KEEP = "KEEP" -SOURCE_IDS_LIMIT_METHOD_FIFO = "FIFO" +SOURCE_IDS_LIMIT_METHOD_KEEP = "KEEP" # Keep oldest +SOURCE_IDS_LIMIT_METHOD_FIFO = "FIFO" # First In First Out (Keep newest) DEFAULT_SOURCE_IDS_LIMIT_METHOD = SOURCE_IDS_LIMIT_METHOD_KEEP VALID_SOURCE_IDS_LIMIT_METHODS = { SOURCE_IDS_LIMIT_METHOD_KEEP, SOURCE_IDS_LIMIT_METHOD_FIFO, } -# Default file_path limit in meta data for entity and relation +# Default file_path limit in meta data for entity and relation (Use same limit method as source_ids) DEFAULT_MAX_FILE_PATHS = 2 # Field length of file_path in Milvus Schema for entity and relation (Should not be changed) diff --git a/lightrag/operate.py b/lightrag/operate.py index 6b409f21..3e889eb7 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1188,7 +1188,7 @@ async def _rebuild_single_entity( file_paths_list = file_paths_list[:max_file_paths] file_paths_list.append( - f"...{file_path_placeholder}(showing {max_file_paths} of {original_count})..." + f"...{file_path_placeholder}({limit_method} {max_file_paths}/{original_count})..." ) logger.info( f"Limited `{entity_name}`: file_path {original_count} -> {max_file_paths} ({limit_method})" @@ -1347,7 +1347,7 @@ async def _rebuild_single_relationship( file_paths_list = file_paths_list[:max_file_paths] file_paths_list.append( - f"...{file_path_placeholder}(showing {max_file_paths} of {original_count})..." + f"...{file_path_placeholder}({limit_method} {max_file_paths}/{original_count})..." ) logger.info( f"Limited `{src}`~`{tgt}`: file_path {original_count} -> {max_file_paths} ({limit_method})" @@ -1623,7 +1623,9 @@ async def _merge_nodes_then_upsert( truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}" if dd_message or truncation_info: - status_message += f" ({', '.join([truncation_info, dd_message])})" + status_message += ( + f" ({', '.join(filter(None, [truncation_info, dd_message]))})" + ) if already_fragment > 0 or llm_was_used: logger.info(status_message) @@ -1692,7 +1694,7 @@ async def _merge_nodes_then_upsert( file_paths_list = file_paths_list[:max_file_paths] file_paths_list.append( - f"...{file_path_placeholder}(showing {max_file_paths} of {original_count})..." + f"...{file_path_placeholder}({limit_method} {max_file_paths}/{original_count})..." ) logger.info( f"Limited `{entity_name}`: file_path {original_count} -> {max_file_paths} ({limit_method})" @@ -1907,7 +1909,9 @@ async def _merge_edges_then_upsert( truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}" if dd_message or truncation_info: - status_message += f" ({', '.join([truncation_info, dd_message])})" + status_message += ( + f" ({', '.join(filter(None, [truncation_info, dd_message]))})" + ) if already_fragment > 0 or llm_was_used: logger.info(status_message) @@ -1991,7 +1995,7 @@ async def _merge_edges_then_upsert( file_paths_list = file_paths_list[:max_file_paths] file_paths_list.append( - f"...{file_path_placeholder}(showing {max_file_paths} of {original_count})..." + f"...{file_path_placeholder}({limit_method} {max_file_paths}/{original_count})..." ) logger.info( f"Limited `{src_id}`~`{tgt_id}`: file_path {original_count} -> {max_file_paths} ({limit_method})"