From 29c4a91dc39b4fef49132e1a9849225d1cf4b81d Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 28 Oct 2025 19:13:48 +0800 Subject: [PATCH] Move relationship ID sorting to before vector DB operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove verbose entity rebuild logging • Sort IDs before vector DB updates • Keep graph storage with original order --- lightrag/operate.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 7363ab7a..9a66f309 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -677,14 +677,6 @@ async def rebuild_knowledge_from_chunks( entity_chunks_storage=entity_chunks_storage, ) rebuilt_entities_count += 1 - status_message = ( - f"Rebuild `{entity_name}` from {len(chunk_ids)} chunks" - ) - logger.info(status_message) - if pipeline_status is not None and pipeline_status_lock is not None: - async with pipeline_status_lock: - pipeline_status["latest_message"] = status_message - pipeline_status["history_messages"].append(status_message) except Exception as e: failed_entities_count += 1 status_message = f"Failed to rebuild `{entity_name}`: {e}" @@ -1432,10 +1424,6 @@ async def _rebuild_single_relationship( else: truncation_info = "" - # Sort src and tgt to ensure consistent ordering (smaller string first) - if src > tgt: - src, tgt = tgt, src - # Update relationship in graph storage updated_relationship_data = { **current_relationship, @@ -1510,6 +1498,9 @@ async def _rebuild_single_relationship( await knowledge_graph_inst.upsert_edge(src, tgt, updated_relationship_data) # Update relationship in vector database + # Sort src and tgt to ensure consistent ordering (smaller string first) + if src > tgt: + src, tgt = tgt, src try: rel_vdb_id = compute_mdhash_id(src + tgt, prefix="rel-") rel_vdb_id_reverse = compute_mdhash_id(tgt + src, prefix="rel-") @@ -2145,10 +2136,6 @@ async def _merge_edges_then_upsert( else: logger.debug(status_message) - # Sort src_id and tgt_id to ensure consistent ordering (smaller string first) - if src_id > tgt_id: - src_id, tgt_id = tgt_id, src_id - # 11. Update both graph and vector db for need_insert_id in [src_id, tgt_id]: if not (await knowledge_graph_inst.has_node(need_insert_id)): @@ -2236,6 +2223,10 @@ async def _merge_edges_then_upsert( weight=weight, ) + # Sort src_id and tgt_id to ensure consistent ordering (smaller string first) + if src_id > tgt_id: + src_id, tgt_id = tgt_id, src_id + if relationships_vdb is not None: rel_vdb_id = compute_mdhash_id(src_id + tgt_id, prefix="rel-") rel_vdb_id_reverse = compute_mdhash_id(tgt_id + src_id, prefix="rel-")