Improve context item identification with meaningful IDs

- Add EN prefix to entitie IDs
- Add RE prefix to relation IDs
-Add DC prefix chunk IDs
- Enhance traceability across contexts
This commit is contained in:
yangdx 2025-09-21 20:19:14 +08:00
parent f88c2fbdff
commit 18e886d7e9
2 changed files with 12 additions and 5 deletions

View file

@ -2870,7 +2870,7 @@ async def _apply_token_truncation(
entities_context.append(
{
"id": i + 1,
"id": f"EN{i + 1}",
"entity": entity_name,
"type": entity.get("entity_type", "UNKNOWN"),
"description": entity.get("description", "UNKNOWN"),
@ -2898,7 +2898,7 @@ async def _apply_token_truncation(
relations_context.append(
{
"id": i + 1,
"id": f"RE{i + 1}",
"entity1": entity1,
"entity2": entity2,
"description": relation.get("description", "UNKNOWN"),
@ -3246,7 +3246,7 @@ async def _build_llm_context(
for i, chunk in enumerate(truncated_chunks):
text_units_context.append(
{
"id": i + 1,
"id": chunk["id"],
"content": chunk["content"],
"file_path": chunk.get("file_path", "unknown_source"),
}
@ -4216,7 +4216,7 @@ async def naive_query(
for i, chunk in enumerate(processed_chunks):
text_units_context.append(
{
"id": i + 1,
"id": chunk["id"],
"content": chunk["content"],
"file_path": chunk.get("file_path", "unknown_source"),
}

View file

@ -2455,7 +2455,14 @@ async def process_chunks_unified(
f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
)
return unique_chunks
# 5. add id field to each chunk
final_chunks = []
for i, chunk in enumerate(unique_chunks):
chunk_with_id = chunk.copy()
chunk_with_id["id"] = f"DC{i + 1}"
final_chunks.append(chunk_with_id)
return final_chunks
def build_file_path(already_file_paths, data_list, target):