From ec2ea4fd3ffbabf37b5cfbec273eaa98f87e596a Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 1 Nov 2025 12:15:24 +0800 Subject: [PATCH] Rename function and variables for clarity in context building - Rename _build_llm_context to _build_context_str - Change text_units_context to chunks_context - Move string building before early return - Update log messages and comments - Consistent variable naming throughout --- lightrag/operate.py | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 3354a2ae..5018ae0d 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -3832,7 +3832,7 @@ async def _merge_all_chunks( return merged_chunks -async def _build_llm_context( +async def _build_context_str( entities_context: list[dict], relations_context: list[dict], merged_chunks: list[dict], @@ -3932,23 +3932,32 @@ async def _build_llm_context( truncated_chunks ) - # Rebuild text_units_context with truncated chunks + # Rebuild chunks_context with truncated chunks # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic - text_units_context = [] + chunks_context = [] for i, chunk in enumerate(truncated_chunks): - text_units_context.append( + chunks_context.append( { "reference_id": chunk["reference_id"], "content": chunk["content"], } ) + text_units_str = "\n".join( + json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context + ) + reference_list_str = "\n".join( + f"[{ref['reference_id']}] {ref['file_path']}" + for ref in reference_list + if ref["reference_id"] + ) + logger.info( - f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks" + f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(chunks_context)} chunks" ) # not necessary to use LLM to generate a response - if not entities_context and not relations_context: + if not entities_context and not relations_context and not chunks_context: # Return empty raw data structure when no entities/relations empty_raw_data = convert_to_user_format( [], @@ -3979,15 +3988,6 @@ async def _build_llm_context( if chunk_tracking_log: logger.info(f"Final chunks S+F/O: {' '.join(chunk_tracking_log)}") - text_units_str = "\n".join( - json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context - ) - reference_list_str = "\n".join( - f"[{ref['reference_id']}] {ref['file_path']}" - for ref in reference_list - if ref["reference_id"] - ) - result = kg_context_template.format( entities_str=entities_str, relations_str=relations_str, @@ -3997,7 +3997,7 @@ async def _build_llm_context( # Always return both context and complete data structure (unified approach) logger.debug( - f"[_build_llm_context] Converting to user format: {len(entities_context)} entities, {len(relations_context)} relations, {len(truncated_chunks)} chunks" + f"[_build_context_str] Converting to user format: {len(entities_context)} entities, {len(relations_context)} relations, {len(truncated_chunks)} chunks" ) final_data = convert_to_user_format( entities_context, @@ -4009,7 +4009,7 @@ async def _build_llm_context( relation_id_to_original, ) logger.debug( - f"[_build_llm_context] Final data after conversion: {len(final_data.get('entities', []))} entities, {len(final_data.get('relationships', []))} relationships, {len(final_data.get('chunks', []))} chunks" + f"[_build_context_str] Final data after conversion: {len(final_data.get('entities', []))} entities, {len(final_data.get('relationships', []))} relationships, {len(final_data.get('chunks', []))} chunks" ) return result, final_data @@ -4086,8 +4086,8 @@ async def _build_query_context( return None # Stage 4: Build final LLM context with dynamic token processing - # _build_llm_context now always returns tuple[str, dict] - context, raw_data = await _build_llm_context( + # _build_context_str now always returns tuple[str, dict] + context, raw_data = await _build_context_str( entities_context=truncation_result["entities_context"], relations_context=truncation_result["relations_context"], merged_chunks=merged_chunks, @@ -4860,10 +4860,10 @@ async def naive_query( "final_chunks_count": len(processed_chunks_with_ref_ids), } - # Build text_units_context from processed chunks with reference IDs - text_units_context = [] + # Build chunks_context from processed chunks with reference IDs + chunks_context = [] for i, chunk in enumerate(processed_chunks_with_ref_ids): - text_units_context.append( + chunks_context.append( { "reference_id": chunk["reference_id"], "content": chunk["content"], @@ -4871,7 +4871,7 @@ async def naive_query( ) text_units_str = "\n".join( - json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context + json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context ) reference_list_str = "\n".join( f"[{ref['reference_id']}] {ref['file_path']}"