From 3acb32f5479d7827f1477dd6b5a2857c02ce44c8 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 15 Aug 2025 02:19:01 +0800 Subject: [PATCH] Add comments explaining chunk deduplication behavior in query context --- lightrag/operate.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lightrag/operate.py b/lightrag/operate.py index ddd70872..1096f28d 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2354,6 +2354,7 @@ async def _build_query_context( seen_edges.add(pair) # Get text chunks based on final filtered data + # To preserve the influence of entity order, entiy-based chunks should not be deduplcicated by vector_chunks if final_node_datas: entity_chunks = await _find_related_text_unit_from_entities( final_node_datas, @@ -2365,6 +2366,8 @@ async def _build_query_context( chunk_tracking=chunk_tracking, ) + # Find deduplcicated chunks from edge + # Deduplication cause chunks solely relation-based to be prioritized and sent to the LLM when re-ranking is disabled if final_edge_datas: relation_chunks = await _find_related_text_unit_from_relations( final_edge_datas,