diff --git a/lightrag/operate.py b/lightrag/operate.py index 1e2a9ddb..22e49066 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2870,7 +2870,7 @@ async def _apply_token_truncation( entities_context.append( { - "id": i + 1, + "id": f"EN{i + 1}", "entity": entity_name, "type": entity.get("entity_type", "UNKNOWN"), "description": entity.get("description", "UNKNOWN"), @@ -2898,7 +2898,7 @@ async def _apply_token_truncation( relations_context.append( { - "id": i + 1, + "id": f"RE{i + 1}", "entity1": entity1, "entity2": entity2, "description": relation.get("description", "UNKNOWN"), @@ -3246,7 +3246,7 @@ async def _build_llm_context( for i, chunk in enumerate(truncated_chunks): text_units_context.append( { - "id": i + 1, + "id": chunk["id"], "content": chunk["content"], "file_path": chunk.get("file_path", "unknown_source"), } @@ -4216,7 +4216,7 @@ async def naive_query( for i, chunk in enumerate(processed_chunks): text_units_context.append( { - "id": i + 1, + "id": chunk["id"], "content": chunk["content"], "file_path": chunk.get("file_path", "unknown_source"), } diff --git a/lightrag/utils.py b/lightrag/utils.py index 504a830a..7692cb2a 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2455,7 +2455,14 @@ async def process_chunks_unified( f"(chunk available tokens: {chunk_token_limit}, source: {source_type})" ) - return unique_chunks + # 5. add id field to each chunk + final_chunks = [] + for i, chunk in enumerate(unique_chunks): + chunk_with_id = chunk.copy() + chunk_with_id["id"] = f"DC{i + 1}" + final_chunks.append(chunk_with_id) + + return final_chunks def build_file_path(already_file_paths, data_list, target):