Move relationship ID sorting to before vector DB operations

• Remove verbose entity rebuild logging • Sort IDs before vector DB updates • Keep graph storage with original order
2025-10-28 19:13:48 +08:00 · 2025-10-28 19:13:48 +08:00 · 29c4a91dc3
commit 29c4a91dc3
parent c81a56a113
1 changed files with 7 additions and 16 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -677,14 +677,6 @@ async def rebuild_knowledge_from_chunks(
                        entity_chunks_storage=entity_chunks_storage,
                    )
                    rebuilt_entities_count += 1
                    status_message = (
                        f"Rebuild `{entity_name}` from {len(chunk_ids)} chunks"
                    )
                    logger.info(status_message)
                    if pipeline_status is not None and pipeline_status_lock is not None:
                        async with pipeline_status_lock:
                            pipeline_status["latest_message"] = status_message
                            pipeline_status["history_messages"].append(status_message)
                except Exception as e:
                    failed_entities_count += 1
                    status_message = f"Failed to rebuild `{entity_name}`: {e}"
@ -1432,10 +1424,6 @@ async def _rebuild_single_relationship(
    else:
        truncation_info = ""
    # Sort src and tgt to ensure consistent ordering (smaller string first)
    if src > tgt:
        src, tgt = tgt, src
    # Update relationship in graph storage
    updated_relationship_data = {
        **current_relationship,
@ -1510,6 +1498,9 @@ async def _rebuild_single_relationship(
    await knowledge_graph_inst.upsert_edge(src, tgt, updated_relationship_data)
    # Update relationship in vector database
    # Sort src and tgt to ensure consistent ordering (smaller string first)
    if src > tgt:
        src, tgt = tgt, src
    try:
        rel_vdb_id = compute_mdhash_id(src + tgt, prefix="rel-")
        rel_vdb_id_reverse = compute_mdhash_id(tgt + src, prefix="rel-")
@ -2145,10 +2136,6 @@ async def _merge_edges_then_upsert(
    else:
        logger.debug(status_message)
    # Sort src_id and tgt_id to ensure consistent ordering (smaller string first)
    if src_id > tgt_id:
        src_id, tgt_id = tgt_id, src_id
    # 11. Update both graph and vector db
    for need_insert_id in [src_id, tgt_id]:
        if not (await knowledge_graph_inst.has_node(need_insert_id)):
@ -2236,6 +2223,10 @@ async def _merge_edges_then_upsert(
        weight=weight,
    )
    # Sort src_id and tgt_id to ensure consistent ordering (smaller string first)
    if src_id > tgt_id:
        src_id, tgt_id = tgt_id, src_id
    if relationships_vdb is not None:
        rel_vdb_id = compute_mdhash_id(src_id + tgt_id, prefix="rel-")
        rel_vdb_id_reverse = compute_mdhash_id(tgt_id + src_id, prefix="rel-")