Improve context item identification with meaningful IDs

- Add EN prefix to entitie IDs
- Add RE prefix to relation IDs
-Add DC prefix chunk IDs
- Enhance traceability across contexts
This commit is contained in:
yangdx 2025-09-21 20:19:14 +08:00
parent f88c2fbdff
commit 18e886d7e9
2 changed files with 12 additions and 5 deletions

View file

@ -2870,7 +2870,7 @@ async def _apply_token_truncation(
entities_context.append( entities_context.append(
{ {
"id": i + 1, "id": f"EN{i + 1}",
"entity": entity_name, "entity": entity_name,
"type": entity.get("entity_type", "UNKNOWN"), "type": entity.get("entity_type", "UNKNOWN"),
"description": entity.get("description", "UNKNOWN"), "description": entity.get("description", "UNKNOWN"),
@ -2898,7 +2898,7 @@ async def _apply_token_truncation(
relations_context.append( relations_context.append(
{ {
"id": i + 1, "id": f"RE{i + 1}",
"entity1": entity1, "entity1": entity1,
"entity2": entity2, "entity2": entity2,
"description": relation.get("description", "UNKNOWN"), "description": relation.get("description", "UNKNOWN"),
@ -3246,7 +3246,7 @@ async def _build_llm_context(
for i, chunk in enumerate(truncated_chunks): for i, chunk in enumerate(truncated_chunks):
text_units_context.append( text_units_context.append(
{ {
"id": i + 1, "id": chunk["id"],
"content": chunk["content"], "content": chunk["content"],
"file_path": chunk.get("file_path", "unknown_source"), "file_path": chunk.get("file_path", "unknown_source"),
} }
@ -4216,7 +4216,7 @@ async def naive_query(
for i, chunk in enumerate(processed_chunks): for i, chunk in enumerate(processed_chunks):
text_units_context.append( text_units_context.append(
{ {
"id": i + 1, "id": chunk["id"],
"content": chunk["content"], "content": chunk["content"],
"file_path": chunk.get("file_path", "unknown_source"), "file_path": chunk.get("file_path", "unknown_source"),
} }

View file

@ -2455,7 +2455,14 @@ async def process_chunks_unified(
f"(chunk available tokens: {chunk_token_limit}, source: {source_type})" f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
) )
return unique_chunks # 5. add id field to each chunk
final_chunks = []
for i, chunk in enumerate(unique_chunks):
chunk_with_id = chunk.copy()
chunk_with_id["id"] = f"DC{i + 1}"
final_chunks.append(chunk_with_id)
return final_chunks
def build_file_path(already_file_paths, data_list, target): def build_file_path(already_file_paths, data_list, target):