Rename function and variables for clarity in context building
- Rename _build_llm_context to _build_context_str - Change text_units_context to chunks_context - Move string building before early return - Update log messages and comments - Consistent variable naming throughout
This commit is contained in:
parent
9a8742da59
commit
ec2ea4fd3f
1 changed files with 23 additions and 23 deletions
|
|
@ -3832,7 +3832,7 @@ async def _merge_all_chunks(
|
||||||
return merged_chunks
|
return merged_chunks
|
||||||
|
|
||||||
|
|
||||||
async def _build_llm_context(
|
async def _build_context_str(
|
||||||
entities_context: list[dict],
|
entities_context: list[dict],
|
||||||
relations_context: list[dict],
|
relations_context: list[dict],
|
||||||
merged_chunks: list[dict],
|
merged_chunks: list[dict],
|
||||||
|
|
@ -3932,23 +3932,32 @@ async def _build_llm_context(
|
||||||
truncated_chunks
|
truncated_chunks
|
||||||
)
|
)
|
||||||
|
|
||||||
# Rebuild text_units_context with truncated chunks
|
# Rebuild chunks_context with truncated chunks
|
||||||
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
|
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
|
||||||
text_units_context = []
|
chunks_context = []
|
||||||
for i, chunk in enumerate(truncated_chunks):
|
for i, chunk in enumerate(truncated_chunks):
|
||||||
text_units_context.append(
|
chunks_context.append(
|
||||||
{
|
{
|
||||||
"reference_id": chunk["reference_id"],
|
"reference_id": chunk["reference_id"],
|
||||||
"content": chunk["content"],
|
"content": chunk["content"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
text_units_str = "\n".join(
|
||||||
|
json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context
|
||||||
|
)
|
||||||
|
reference_list_str = "\n".join(
|
||||||
|
f"[{ref['reference_id']}] {ref['file_path']}"
|
||||||
|
for ref in reference_list
|
||||||
|
if ref["reference_id"]
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
|
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(chunks_context)} chunks"
|
||||||
)
|
)
|
||||||
|
|
||||||
# not necessary to use LLM to generate a response
|
# not necessary to use LLM to generate a response
|
||||||
if not entities_context and not relations_context:
|
if not entities_context and not relations_context and not chunks_context:
|
||||||
# Return empty raw data structure when no entities/relations
|
# Return empty raw data structure when no entities/relations
|
||||||
empty_raw_data = convert_to_user_format(
|
empty_raw_data = convert_to_user_format(
|
||||||
[],
|
[],
|
||||||
|
|
@ -3979,15 +3988,6 @@ async def _build_llm_context(
|
||||||
if chunk_tracking_log:
|
if chunk_tracking_log:
|
||||||
logger.info(f"Final chunks S+F/O: {' '.join(chunk_tracking_log)}")
|
logger.info(f"Final chunks S+F/O: {' '.join(chunk_tracking_log)}")
|
||||||
|
|
||||||
text_units_str = "\n".join(
|
|
||||||
json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
|
|
||||||
)
|
|
||||||
reference_list_str = "\n".join(
|
|
||||||
f"[{ref['reference_id']}] {ref['file_path']}"
|
|
||||||
for ref in reference_list
|
|
||||||
if ref["reference_id"]
|
|
||||||
)
|
|
||||||
|
|
||||||
result = kg_context_template.format(
|
result = kg_context_template.format(
|
||||||
entities_str=entities_str,
|
entities_str=entities_str,
|
||||||
relations_str=relations_str,
|
relations_str=relations_str,
|
||||||
|
|
@ -3997,7 +3997,7 @@ async def _build_llm_context(
|
||||||
|
|
||||||
# Always return both context and complete data structure (unified approach)
|
# Always return both context and complete data structure (unified approach)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"[_build_llm_context] Converting to user format: {len(entities_context)} entities, {len(relations_context)} relations, {len(truncated_chunks)} chunks"
|
f"[_build_context_str] Converting to user format: {len(entities_context)} entities, {len(relations_context)} relations, {len(truncated_chunks)} chunks"
|
||||||
)
|
)
|
||||||
final_data = convert_to_user_format(
|
final_data = convert_to_user_format(
|
||||||
entities_context,
|
entities_context,
|
||||||
|
|
@ -4009,7 +4009,7 @@ async def _build_llm_context(
|
||||||
relation_id_to_original,
|
relation_id_to_original,
|
||||||
)
|
)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"[_build_llm_context] Final data after conversion: {len(final_data.get('entities', []))} entities, {len(final_data.get('relationships', []))} relationships, {len(final_data.get('chunks', []))} chunks"
|
f"[_build_context_str] Final data after conversion: {len(final_data.get('entities', []))} entities, {len(final_data.get('relationships', []))} relationships, {len(final_data.get('chunks', []))} chunks"
|
||||||
)
|
)
|
||||||
return result, final_data
|
return result, final_data
|
||||||
|
|
||||||
|
|
@ -4086,8 +4086,8 @@ async def _build_query_context(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Stage 4: Build final LLM context with dynamic token processing
|
# Stage 4: Build final LLM context with dynamic token processing
|
||||||
# _build_llm_context now always returns tuple[str, dict]
|
# _build_context_str now always returns tuple[str, dict]
|
||||||
context, raw_data = await _build_llm_context(
|
context, raw_data = await _build_context_str(
|
||||||
entities_context=truncation_result["entities_context"],
|
entities_context=truncation_result["entities_context"],
|
||||||
relations_context=truncation_result["relations_context"],
|
relations_context=truncation_result["relations_context"],
|
||||||
merged_chunks=merged_chunks,
|
merged_chunks=merged_chunks,
|
||||||
|
|
@ -4860,10 +4860,10 @@ async def naive_query(
|
||||||
"final_chunks_count": len(processed_chunks_with_ref_ids),
|
"final_chunks_count": len(processed_chunks_with_ref_ids),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Build text_units_context from processed chunks with reference IDs
|
# Build chunks_context from processed chunks with reference IDs
|
||||||
text_units_context = []
|
chunks_context = []
|
||||||
for i, chunk in enumerate(processed_chunks_with_ref_ids):
|
for i, chunk in enumerate(processed_chunks_with_ref_ids):
|
||||||
text_units_context.append(
|
chunks_context.append(
|
||||||
{
|
{
|
||||||
"reference_id": chunk["reference_id"],
|
"reference_id": chunk["reference_id"],
|
||||||
"content": chunk["content"],
|
"content": chunk["content"],
|
||||||
|
|
@ -4871,7 +4871,7 @@ async def naive_query(
|
||||||
)
|
)
|
||||||
|
|
||||||
text_units_str = "\n".join(
|
text_units_str = "\n".join(
|
||||||
json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
|
json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context
|
||||||
)
|
)
|
||||||
reference_list_str = "\n".join(
|
reference_list_str = "\n".join(
|
||||||
f"[{ref['reference_id']}] {ref['file_path']}"
|
f"[{ref['reference_id']}] {ref['file_path']}"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue