diff --git a/lightrag/operate.py b/lightrag/operate.py index 22e49066..fb46610f 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2956,19 +2956,26 @@ async def _apply_token_truncation( filtered_entities = [] filtered_entity_id_to_original = {} if entities_context: - final_entity_names = {e["entity"] for e in entities_context} + entity_name_to_id = {e["entity"]: e["id"] for e in entities_context} + final_entity_names = set(entity_name_to_id.keys()) seen_nodes = set() for entity in final_entities: name = entity.get("entity_name") if name in final_entity_names and name not in seen_nodes: - filtered_entities.append(entity) - filtered_entity_id_to_original[name] = entity + entity_with_id = entity.copy() + entity_with_id["id"] = entity_name_to_id.get(name) + + filtered_entities.append(entity_with_id) + filtered_entity_id_to_original[name] = entity_with_id seen_nodes.add(name) filtered_relations = [] filtered_relation_id_to_original = {} if relations_context: - final_relation_pairs = {(r["entity1"], r["entity2"]) for r in relations_context} + relation_pair_to_id = { + (r["entity1"], r["entity2"]): r["id"] for r in relations_context + } + final_relation_pairs = set(relation_pair_to_id.keys()) seen_edges = set() for relation in final_relations: src, tgt = relation.get("src_id"), relation.get("tgt_id") @@ -2977,8 +2984,11 @@ async def _apply_token_truncation( pair = (src, tgt) if pair in final_relation_pairs and pair not in seen_edges: - filtered_relations.append(relation) - filtered_relation_id_to_original[pair] = relation + relation_with_id = relation.copy() + relation_with_id["id"] = relation_pair_to_id.get(pair) + + filtered_relations.append(relation_with_id) + filtered_relation_id_to_original[pair] = relation_with_id seen_edges.add(pair) return { diff --git a/lightrag/utils.py b/lightrag/utils.py index 7692cb2a..43b8c0a3 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2744,6 +2744,7 @@ def _convert_to_user_format( # Use original database data formatted_entities.append( { + "id": original_entity.get("id", "unknown"), "entity_name": original_entity.get("entity_name", entity_name), "entity_type": original_entity.get("entity_type", "UNKNOWN"), "description": original_entity.get("description", ""), @@ -2756,6 +2757,7 @@ def _convert_to_user_format( # Fallback to LLM context data (for backward compatibility) formatted_entities.append( { + "id": entity.get("id", "unknown"), "entity_name": entity_name, "entity_type": entity.get("type", "UNKNOWN"), "description": entity.get("description", ""), @@ -2781,6 +2783,7 @@ def _convert_to_user_format( # Use original database data formatted_relationships.append( { + "id": original_relation.get("id", "unknown"), "src_id": original_relation.get("src_id", entity1), "tgt_id": original_relation.get("tgt_id", entity2), "description": original_relation.get("description", ""), @@ -2795,6 +2798,7 @@ def _convert_to_user_format( # Fallback to LLM context data (for backward compatibility) formatted_relationships.append( { + "id": relation.get("id", "unknown"), "src_id": entity1, "tgt_id": entity2, "description": relation.get("description", ""), @@ -2810,6 +2814,7 @@ def _convert_to_user_format( formatted_chunks = [] for i, chunk in enumerate(final_chunks): chunk_data = { + "id": chunk.get("id", "unknown"), "content": chunk.get("content", ""), "file_path": chunk.get("file_path", "unknown_source"), "chunk_id": chunk.get("chunk_id", ""),