Refactor entity edit and merge functions to support merge-on-rename

• Extract internal implementation helpers • Add allow_merge parameter to aedit_entity • Support merging when renaming to existing name • Improve code reusability and modularity • Maintain backward compatibility
2025-10-27 14:23:51 +08:00 · 2025-10-27 14:23:51 +08:00 · 11a1631d76
commit 11a1631d76
parent 411e92e6b9
1 changed files with 683 additions and 650 deletions
--- a/lightrag/utils_graph.py
+++ b/lightrag/utils_graph.py
@ -246,6 +246,272 @@ async def adelete_by_relation(
            )


+async def _edit_entity_impl(
+    chunk_entity_relation_graph,
+    entities_vdb,
+    relationships_vdb,
+    entity_name: str,
+    updated_data: dict[str, str],
+    *,
+    entity_chunks_storage=None,
+    relation_chunks_storage=None,
+) -> dict[str, Any]:
+    """Internal helper that edits an entity without acquiring storage locks.
+
+    This function performs the actual entity edit operations without lock management.
+    It should only be called by public APIs that have already acquired necessary locks.
+
+    Args:
+        chunk_entity_relation_graph: Graph storage instance
+        entities_vdb: Vector database storage for entities
+        relationships_vdb: Vector database storage for relationships
+        entity_name: Name of the entity to edit
+        updated_data: Dictionary containing updated attributes (including optional entity_name for renaming)
+        entity_chunks_storage: Optional KV storage for tracking chunks
+        relation_chunks_storage: Optional KV storage for tracking relation chunks
+
+    Returns:
+        Dictionary containing updated entity information
+
+    Note:
+        Caller must acquire appropriate locks before calling this function.
+        If renaming (entity_name in updated_data), this function will check if the new name exists.
+    """
+    new_entity_name = updated_data.get("entity_name", entity_name)
+    is_renaming = new_entity_name != entity_name
+
+    original_entity_name = entity_name
+
+    node_exists = await chunk_entity_relation_graph.has_node(entity_name)
+    if not node_exists:
+        raise ValueError(f"Entity '{entity_name}' does not exist")
+    node_data = await chunk_entity_relation_graph.get_node(entity_name)
+
+    if is_renaming:
+        existing_node = await chunk_entity_relation_graph.has_node(new_entity_name)
+        if existing_node:
+            raise ValueError(
+                f"Entity name '{new_entity_name}' already exists, cannot rename"
+            )
+
+    new_node_data = {**node_data, **updated_data}
+    new_node_data["entity_id"] = new_entity_name
+
+    if "entity_name" in new_node_data:
+        del new_node_data[
+            "entity_name"
+        ]  # Node data should not contain entity_name field
+
+    if is_renaming:
+        logger.info(f"Entity Edit: renaming `{entity_name}` to `{new_entity_name}`")
+
+        await chunk_entity_relation_graph.upsert_node(new_entity_name, new_node_data)
+
+        relations_to_update = []
+        relations_to_delete = []
+        edges = await chunk_entity_relation_graph.get_node_edges(entity_name)
+        if edges:
+            for source, target in edges:
+                edge_data = await chunk_entity_relation_graph.get_edge(source, target)
+                if edge_data:
+                    relations_to_delete.append(
+                        compute_mdhash_id(source + target, prefix="rel-")
+                    )
+                    relations_to_delete.append(
+                        compute_mdhash_id(target + source, prefix="rel-")
+                    )
+                    if source == entity_name:
+                        await chunk_entity_relation_graph.upsert_edge(
+                            new_entity_name, target, edge_data
+                        )
+                        relations_to_update.append((new_entity_name, target, edge_data))
+                    else:  # target == entity_name
+                        await chunk_entity_relation_graph.upsert_edge(
+                            source, new_entity_name, edge_data
+                        )
+                        relations_to_update.append((source, new_entity_name, edge_data))
+
+        await chunk_entity_relation_graph.delete_node(entity_name)
+
+        old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
+        await entities_vdb.delete([old_entity_id])
+
+        await relationships_vdb.delete(relations_to_delete)
+
+        for src, tgt, edge_data in relations_to_update:
+            normalized_src, normalized_tgt = sorted([src, tgt])
+
+            description = edge_data.get("description", "")
+            keywords = edge_data.get("keywords", "")
+            source_id = edge_data.get("source_id", "")
+            weight = float(edge_data.get("weight", 1.0))
+
+            content = f"{normalized_src}\t{normalized_tgt}\n{keywords}\n{description}"
+
+            relation_id = compute_mdhash_id(
+                normalized_src + normalized_tgt, prefix="rel-"
+            )
+
+            relation_data = {
+                relation_id: {
+                    "content": content,
+                    "src_id": normalized_src,
+                    "tgt_id": normalized_tgt,
+                    "source_id": source_id,
+                    "description": description,
+                    "keywords": keywords,
+                    "weight": weight,
+                }
+            }
+
+            await relationships_vdb.upsert(relation_data)
+
+        entity_name = new_entity_name
+    else:
+        await chunk_entity_relation_graph.upsert_node(entity_name, new_node_data)
+
+    description = new_node_data.get("description", "")
+    source_id = new_node_data.get("source_id", "")
+    entity_type = new_node_data.get("entity_type", "")
+    content = entity_name + "\n" + description
+
+    entity_id = compute_mdhash_id(entity_name, prefix="ent-")
+
+    entity_data = {
+        entity_id: {
+            "content": content,
+            "entity_name": entity_name,
+            "source_id": source_id,
+            "description": description,
+            "entity_type": entity_type,
+        }
+    }
+
+    await entities_vdb.upsert(entity_data)
+
+    if entity_chunks_storage is not None or relation_chunks_storage is not None:
+        from .utils import make_relation_chunk_key, compute_incremental_chunk_ids
+
+        if entity_chunks_storage is not None:
+            storage_key = original_entity_name if is_renaming else entity_name
+            stored_data = await entity_chunks_storage.get_by_id(storage_key)
+            has_stored_data = (
+                stored_data
+                and isinstance(stored_data, dict)
+                and stored_data.get("chunk_ids")
+            )
+
+            old_source_id = node_data.get("source_id", "")
+            old_chunk_ids = [cid for cid in old_source_id.split(GRAPH_FIELD_SEP) if cid]
+
+            new_source_id = new_node_data.get("source_id", "")
+            new_chunk_ids = [cid for cid in new_source_id.split(GRAPH_FIELD_SEP) if cid]
+
+            source_id_changed = set(new_chunk_ids) != set(old_chunk_ids)
+
+            if source_id_changed or not has_stored_data:
+                existing_full_chunk_ids = []
+                if has_stored_data:
+                    existing_full_chunk_ids = [
+                        cid for cid in stored_data.get("chunk_ids", []) if cid
+                    ]
+
+                if not existing_full_chunk_ids:
+                    existing_full_chunk_ids = old_chunk_ids.copy()
+
+                updated_chunk_ids = compute_incremental_chunk_ids(
+                    existing_full_chunk_ids, old_chunk_ids, new_chunk_ids
+                )
+
+                if is_renaming:
+                    await entity_chunks_storage.delete([original_entity_name])
+                    await entity_chunks_storage.upsert(
+                        {
+                            entity_name: {
+                                "chunk_ids": updated_chunk_ids,
+                                "count": len(updated_chunk_ids),
+                            }
+                        }
+                    )
+                else:
+                    await entity_chunks_storage.upsert(
+                        {
+                            entity_name: {
+                                "chunk_ids": updated_chunk_ids,
+                                "count": len(updated_chunk_ids),
+                            }
+                        }
+                    )
+
+                logger.info(
+                    f"Entity Edit: find {len(updated_chunk_ids)} chunks related to `{entity_name}`"
+                )
+
+        if is_renaming and relation_chunks_storage is not None and relations_to_update:
+            for src, tgt, edge_data in relations_to_update:
+                old_src = original_entity_name if src == entity_name else src
+                old_tgt = original_entity_name if tgt == entity_name else tgt
+
+                old_normalized_src, old_normalized_tgt = sorted([old_src, old_tgt])
+                new_normalized_src, new_normalized_tgt = sorted([src, tgt])
+
+                old_storage_key = make_relation_chunk_key(
+                    old_normalized_src, old_normalized_tgt
+                )
+                new_storage_key = make_relation_chunk_key(
+                    new_normalized_src, new_normalized_tgt
+                )
+
+                if old_storage_key != new_storage_key:
+                    old_stored_data = await relation_chunks_storage.get_by_id(
+                        old_storage_key
+                    )
+                    relation_chunk_ids = []
+
+                    if old_stored_data and isinstance(old_stored_data, dict):
+                        relation_chunk_ids = [
+                            cid for cid in old_stored_data.get("chunk_ids", []) if cid
+                        ]
+                    else:
+                        relation_source_id = edge_data.get("source_id", "")
+                        relation_chunk_ids = [
+                            cid
+                            for cid in relation_source_id.split(GRAPH_FIELD_SEP)
+                            if cid
+                        ]
+
+                    await relation_chunks_storage.delete([old_storage_key])
+
+                    if relation_chunk_ids:
+                        await relation_chunks_storage.upsert(
+                            {
+                                new_storage_key: {
+                                    "chunk_ids": relation_chunk_ids,
+                                    "count": len(relation_chunk_ids),
+                                }
+                            }
+                        )
+            logger.info(
+                f"Entity Edit: migrate {len(relations_to_update)} relations after rename"
+            )
+
+    await _persist_graph_updates(
+        entities_vdb=entities_vdb,
+        relationships_vdb=relationships_vdb,
+        chunk_entity_relation_graph=chunk_entity_relation_graph,
+        entity_chunks_storage=entity_chunks_storage,
+        relation_chunks_storage=relation_chunks_storage,
+    )
+
+    logger.info(f"Entity Edit: `{entity_name}` successfully updated")
+    return await get_entity_info(
+        chunk_entity_relation_graph,
+        entities_vdb,
+        entity_name,
+        include_vector_data=True,
+    )
+
+
 async def aedit_entity(
    chunk_entity_relation_graph,
    entities_vdb,
@ -253,6 +519,7 @@ async def aedit_entity(
    entity_name: str,
    updated_data: dict[str, str],
    allow_rename: bool = True,
+    allow_merge: bool = False,
    entity_chunks_storage=None,
    relation_chunks_storage=None,
 ) -> dict[str, Any]:
@ -268,338 +535,82 @@ async def aedit_entity(
        entity_name: Name of the entity to edit
        updated_data: Dictionary containing updated attributes, e.g. {"description": "new description", "entity_type": "new type"}
        allow_rename: Whether to allow entity renaming, defaults to True
+        allow_merge: Whether to merge into an existing entity when renaming to an existing name, defaults to False
        entity_chunks_storage: Optional KV storage for tracking chunks that reference this entity
        relation_chunks_storage: Optional KV storage for tracking chunks that reference relations

    Returns:
        Dictionary containing updated entity information
    """
-    # Determine entities to lock
    new_entity_name = updated_data.get("entity_name", entity_name)
    is_renaming = new_entity_name != entity_name

-    # Lock both original and new entity names if renaming
-    lock_keys = sorted([entity_name, new_entity_name]) if is_renaming else [entity_name]
+    lock_keys = sorted({entity_name, new_entity_name}) if is_renaming else [entity_name]

-    # Use keyed lock for entity to ensure atomic graph and vector db operations
    workspace = entities_vdb.global_config.get("workspace", "")
    namespace = f"{workspace}:GraphDB" if workspace else "GraphDB"
    async with get_storage_keyed_lock(
        lock_keys, namespace=namespace, enable_logging=False
    ):
        try:
-            # Save original entity name for chunk tracking updates
-            original_entity_name = entity_name
-
-            # 1. Get current entity information
-            node_exists = await chunk_entity_relation_graph.has_node(entity_name)
-            if not node_exists:
-                raise ValueError(f"Entity '{entity_name}' does not exist")
-            node_data = await chunk_entity_relation_graph.get_node(entity_name)
-
-            # If renaming, check if new name already exists
-            if is_renaming:
-                if not allow_rename:
+            if is_renaming and not allow_rename:
                raise ValueError(
                    "Entity renaming is not allowed. Set allow_rename=True to enable this feature"
                )

-                existing_node = await chunk_entity_relation_graph.has_node(
+            if is_renaming:
+                target_exists = await chunk_entity_relation_graph.has_node(
                    new_entity_name
                )
-                if existing_node:
+                if target_exists:
+                    if not allow_merge:
                        raise ValueError(
                            f"Entity name '{new_entity_name}' already exists, cannot rename"
                        )

-            # 2. Update entity information in the graph
-            new_node_data = {**node_data, **updated_data}
-            new_node_data["entity_id"] = new_entity_name
-
-            if "entity_name" in new_node_data:
-                del new_node_data[
-                    "entity_name"
-                ]  # Node data should not contain entity_name field
-
-            # If renaming entity
-            if is_renaming:
                    logger.info(
-                    f"Entity Edit: renaming `{entity_name}` to `{new_entity_name}`"
+                        f"Entity Edit: `{entity_name}` will be merged into `{new_entity_name}`"
                    )

-                # Create new entity
-                await chunk_entity_relation_graph.upsert_node(
-                    new_entity_name, new_node_data
-                )
-
-                # Store relationships that need to be updated
-                relations_to_update = []
-                relations_to_delete = []
-                # Get all edges related to the original entity
-                edges = await chunk_entity_relation_graph.get_node_edges(entity_name)
-                if edges:
-                    # Recreate edges for the new entity
-                    for source, target in edges:
-                        edge_data = await chunk_entity_relation_graph.get_edge(
-                            source, target
-                        )
-                        if edge_data:
-                            relations_to_delete.append(
-                                compute_mdhash_id(source + target, prefix="rel-")
-                            )
-                            relations_to_delete.append(
-                                compute_mdhash_id(target + source, prefix="rel-")
-                            )
-                            if source == entity_name:
-                                await chunk_entity_relation_graph.upsert_edge(
-                                    new_entity_name, target, edge_data
-                                )
-                                relations_to_update.append(
-                                    (new_entity_name, target, edge_data)
-                                )
-                            else:  # target == entity_name
-                                await chunk_entity_relation_graph.upsert_edge(
-                                    source, new_entity_name, edge_data
-                                )
-                                relations_to_update.append(
-                                    (source, new_entity_name, edge_data)
-                                )
-
-                # Delete old entity
-                await chunk_entity_relation_graph.delete_node(entity_name)
-
-                # Delete old entity record from vector database
-                old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
-                await entities_vdb.delete([old_entity_id])
-
-                # Delete old relation records from vector database
-                await relationships_vdb.delete(relations_to_delete)
-
-                # Update relationship vector representations
-                for src, tgt, edge_data in relations_to_update:
-                    # Normalize entity order for consistent vector ID generation
-                    normalized_src, normalized_tgt = sorted([src, tgt])
-
-                    description = edge_data.get("description", "")
-                    keywords = edge_data.get("keywords", "")
-                    source_id = edge_data.get("source_id", "")
-                    weight = float(edge_data.get("weight", 1.0))
-
-                    # Create content using normalized order
-                    content = (
-                        f"{normalized_src}\t{normalized_tgt}\n{keywords}\n{description}"
-                    )
-
-                    # Calculate relationship ID using normalized order
-                    relation_id = compute_mdhash_id(
-                        normalized_src + normalized_tgt, prefix="rel-"
-                    )
-
-                    # Prepare data for vector database update
-                    relation_data = {
-                        relation_id: {
-                            "content": content,
-                            "src_id": normalized_src,
-                            "tgt_id": normalized_tgt,
-                            "source_id": source_id,
-                            "description": description,
-                            "keywords": keywords,
-                            "weight": weight,
+                    non_name_updates = {
+                        key: value
+                        for key, value in updated_data.items()
+                        if key != "entity_name"
                    }
-                    }
-
-                    # Update vector database
-                    await relationships_vdb.upsert(relation_data)
-
-                # Update working entity name to new name
-                entity_name = new_entity_name
-
-            else:
-                # If not renaming, directly update node data
-                await chunk_entity_relation_graph.upsert_node(
-                    entity_name, new_node_data
-                )
-
-            # 3. Recalculate entity's vector representation and update vector database
-            description = new_node_data.get("description", "")
-            source_id = new_node_data.get("source_id", "")
-            entity_type = new_node_data.get("entity_type", "")
-            content = entity_name + "\n" + description
-
-            # Calculate entity ID
-            entity_id = compute_mdhash_id(entity_name, prefix="ent-")
-
-            # Prepare data for vector database update
-            entity_data = {
-                entity_id: {
-                    "content": content,
-                    "entity_name": entity_name,
-                    "source_id": source_id,
-                    "description": description,
-                    "entity_type": entity_type,
-                }
-            }
-
-            # Update vector database
-            await entities_vdb.upsert(entity_data)
-
-            # 4. Update chunk tracking storages
-            if entity_chunks_storage is not None or relation_chunks_storage is not None:
-                from .utils import (
-                    make_relation_chunk_key,
-                    compute_incremental_chunk_ids,
-                )
-
-                # 4.1 Handle entity chunk tracking
-                if entity_chunks_storage is not None:
-                    # Get storage key (use original name for renaming scenario)
-                    storage_key = original_entity_name if is_renaming else entity_name
-                    stored_data = await entity_chunks_storage.get_by_id(storage_key)
-                    has_stored_data = (
-                        stored_data
-                        and isinstance(stored_data, dict)
-                        and stored_data.get("chunk_ids")
-                    )
-
-                    # Get old and new source_id
-                    old_source_id = node_data.get("source_id", "")
-                    old_chunk_ids = [
-                        cid for cid in old_source_id.split(GRAPH_FIELD_SEP) if cid
-                    ]
-
-                    new_source_id = new_node_data.get("source_id", "")
-                    new_chunk_ids = [
-                        cid for cid in new_source_id.split(GRAPH_FIELD_SEP) if cid
-                    ]
-
-                    source_id_changed = set(new_chunk_ids) != set(old_chunk_ids)
-
-                    # Update if: source_id changed OR storage has no data
-                    if source_id_changed or not has_stored_data:
-                        # Get existing full chunk_ids from storage
-                        existing_full_chunk_ids = []
-                        if has_stored_data:
-                            existing_full_chunk_ids = [
-                                cid for cid in stored_data.get("chunk_ids", []) if cid
-                            ]
-
-                        # If no stored data exists, use old source_id as baseline
-                        if not existing_full_chunk_ids:
-                            existing_full_chunk_ids = old_chunk_ids.copy()
-
-                        # Use utility function to compute incremental updates
-                        updated_chunk_ids = compute_incremental_chunk_ids(
-                            existing_full_chunk_ids, old_chunk_ids, new_chunk_ids
-                        )
-
-                        # Update storage (even if updated_chunk_ids is empty)
-                        if is_renaming:
-                            # Renaming: delete old + create new
-                            await entity_chunks_storage.delete([original_entity_name])
-                            await entity_chunks_storage.upsert(
-                                {
-                                    entity_name: {
-                                        "chunk_ids": updated_chunk_ids,
-                                        "count": len(updated_chunk_ids),
-                                    }
-                                }
-                            )
-                        else:
-                            # Non-renaming: direct update
-                            await entity_chunks_storage.upsert(
-                                {
-                                    entity_name: {
-                                        "chunk_ids": updated_chunk_ids,
-                                        "count": len(updated_chunk_ids),
-                                    }
-                                }
-                            )
-
+                    if non_name_updates:
                        logger.info(
-                            f"Entity Edit: find {len(updated_chunk_ids)} chunks related to `{entity_name}`"
+                            "Entity Edit: applying non-name updates before merge"
                        )
-
-                # 4.2 Handle relation chunk tracking if entity was renamed
-                if (
-                    is_renaming
-                    and relation_chunks_storage is not None
-                    and relations_to_update
-                ):
-                    for src, tgt, edge_data in relations_to_update:
-                        # Determine old entity pair (before rename)
-                        old_src = original_entity_name if src == entity_name else src
-                        old_tgt = original_entity_name if tgt == entity_name else tgt
-
-                        # Normalize entity order for both old and new keys
-                        old_normalized_src, old_normalized_tgt = sorted(
-                            [old_src, old_tgt]
-                        )
-                        new_normalized_src, new_normalized_tgt = sorted([src, tgt])
-
-                        # Generate storage keys
-                        old_storage_key = make_relation_chunk_key(
-                            old_normalized_src, old_normalized_tgt
-                        )
-                        new_storage_key = make_relation_chunk_key(
-                            new_normalized_src, new_normalized_tgt
-                        )
-
-                        # If keys are different, we need to move the chunk tracking
-                        if old_storage_key != new_storage_key:
-                            # Get complete chunk IDs from storage first (preserves all existing references)
-                            old_stored_data = await relation_chunks_storage.get_by_id(
-                                old_storage_key
-                            )
-                            relation_chunk_ids = []
-
-                            if old_stored_data and isinstance(old_stored_data, dict):
-                                # Use complete chunk_ids from storage
-                                relation_chunk_ids = [
-                                    cid
-                                    for cid in old_stored_data.get("chunk_ids", [])
-                                    if cid
-                                ]
-                            else:
-                                # Fallback: if storage has no data, use graph's source_id
-                                relation_source_id = edge_data.get("source_id", "")
-                                relation_chunk_ids = [
-                                    cid
-                                    for cid in relation_source_id.split(GRAPH_FIELD_SEP)
-                                    if cid
-                                ]
-
-                            # Delete old relation chunk tracking
-                            await relation_chunks_storage.delete([old_storage_key])
-
-                            # Create new relation chunk tracking (migrate complete data)
-                            if relation_chunk_ids:
-                                await relation_chunks_storage.upsert(
-                                    {
-                                        new_storage_key: {
-                                            "chunk_ids": relation_chunk_ids,
-                                            "count": len(relation_chunk_ids),
-                                        }
-                                    }
-                                )
-                    logger.info(
-                        f"Entity Edit: migrate {len(relations_to_update)} relations after rename"
-                    )
-
-            # 5. Save changes
-            await _persist_graph_updates(
-                entities_vdb=entities_vdb,
-                relationships_vdb=relationships_vdb,
-                chunk_entity_relation_graph=chunk_entity_relation_graph,
+                        await _edit_entity_impl(
+                            chunk_entity_relation_graph,
+                            entities_vdb,
+                            relationships_vdb,
+                            entity_name,
+                            non_name_updates,
                            entity_chunks_storage=entity_chunks_storage,
                            relation_chunks_storage=relation_chunks_storage,
                        )

-            logger.info(f"Entity Edit: `{entity_name}` successfully updated")
-            return await get_entity_info(
+                    return await _merge_entities_impl(
                        chunk_entity_relation_graph,
                        entities_vdb,
+                        relationships_vdb,
+                        [entity_name],
+                        new_entity_name,
+                        merge_strategy=None,
+                        target_entity_data=None,
+                        entity_chunks_storage=entity_chunks_storage,
+                        relation_chunks_storage=relation_chunks_storage,
+                    )
+
+            return await _edit_entity_impl(
+                chunk_entity_relation_graph,
+                entities_vdb,
+                relationships_vdb,
                entity_name,
-                include_vector_data=True,
+                updated_data,
+                entity_chunks_storage=entity_chunks_storage,
+                relation_chunks_storage=relation_chunks_storage,
            )
        except Exception as e:
            logger.error(f"Error while editing entity '{entity_name}': {e}")
@ -1054,22 +1065,22 @@ async def acreate_relation(
            raise


-async def amerge_entities(
+async def _merge_entities_impl(
    chunk_entity_relation_graph,
    entities_vdb,
    relationships_vdb,
    source_entities: list[str],
    target_entity: str,
+    *,
    merge_strategy: dict[str, str] = None,
    target_entity_data: dict[str, Any] = None,
    entity_chunks_storage=None,
    relation_chunks_storage=None,
 ) -> dict[str, Any]:
-    """Asynchronously merge multiple entities into one entity.
+    """Internal helper that merges entities without acquiring storage locks.

-    Merges multiple source entities into a target entity, handling all relationships,
-    and updating both the knowledge graph and vector database.
-    Also merges chunk tracking information from entity_chunks_storage and relation_chunks_storage.
+    This function performs the actual entity merge operations without lock management.
+    It should only be called by public APIs that have already acquired necessary locks.

    Args:
        chunk_entity_relation_graph: Graph storage instance
@ -1077,28 +1088,18 @@ async def amerge_entities(
        relationships_vdb: Vector database storage for relationships
        source_entities: List of source entity names to merge
        target_entity: Name of the target entity after merging
-        merge_strategy: Deprecated (Each field uses its own default strategy). If provided,
-            customizations are applied but a warning is logged.
-        target_entity_data: Dictionary of specific values to set for the target entity,
-            overriding any merged values, e.g. {"description": "custom description", "entity_type": "PERSON"}
-        entity_chunks_storage: Optional KV storage for tracking chunks that reference entities
-        relation_chunks_storage: Optional KV storage for tracking chunks that reference relations
+        merge_strategy: Deprecated. Merge strategy for each field (optional)
+        target_entity_data: Dictionary of specific values to set for target entity (optional)
+        entity_chunks_storage: Optional KV storage for tracking chunks
+        relation_chunks_storage: Optional KV storage for tracking relation chunks

    Returns:
        Dictionary containing the merged entity information
-    """
-    # Collect all entities involved (source + target) and lock them all in sorted order
-    all_entities = set(source_entities)
-    all_entities.add(target_entity)
-    lock_keys = sorted(all_entities)

-    # Use keyed lock for all entities to ensure atomic graph and vector db operations
-    workspace = entities_vdb.global_config.get("workspace", "")
-    namespace = f"{workspace}:GraphDB" if workspace else "GraphDB"
-    async with get_storage_keyed_lock(
-        lock_keys, namespace=namespace, enable_logging=False
-    ):
-        try:
+    Note:
+        Caller must acquire appropriate locks before calling this function.
+        All source entities and the target entity should be locked together.
+    """
    # Default merge strategy for entities
    default_entity_merge_strategy = {
        "description": "concatenate",
@ -1115,9 +1116,7 @@ async def amerge_entities(
            **default_entity_merge_strategy,
            **merge_strategy,
        }
-            target_entity_data = (
-                {} if target_entity_data is None else target_entity_data
-            )
+    target_entity_data = {} if target_entity_data is None else target_entity_data

    # 1. Check if all source entities exist
    source_entities_data = {}
@ -1132,8 +1131,8 @@ async def amerge_entities(
    target_exists = await chunk_entity_relation_graph.has_node(target_entity)
    existing_target_entity_data = {}
    if target_exists:
-                existing_target_entity_data = (
-                    await chunk_entity_relation_graph.get_node(target_entity)
+        existing_target_entity_data = await chunk_entity_relation_graph.get_node(
+            target_entity
        )
        logger.info(
            "Entity Merge: target entity already exists, source and target entities will be merged"
@ -1166,22 +1165,16 @@ async def amerge_entities(
            for src, tgt in edges:
                # Ensure src is the current entity
                if src == entity_name:
-                            edge_data = await chunk_entity_relation_graph.get_edge(
-                                src, tgt
-                            )
+                    edge_data = await chunk_entity_relation_graph.get_edge(src, tgt)
                    all_relations.append((src, tgt, edge_data))

    # 5. Create or update the target entity
    merged_entity_data["entity_id"] = target_entity
    if not target_exists:
-                await chunk_entity_relation_graph.upsert_node(
-                    target_entity, merged_entity_data
-                )
+        await chunk_entity_relation_graph.upsert_node(target_entity, merged_entity_data)
        logger.info(f"Entity Merge: created target '{target_entity}'")
    else:
-                await chunk_entity_relation_graph.upsert_node(
-                    target_entity, merged_entity_data
-                )
+        await chunk_entity_relation_graph.upsert_node(target_entity, merged_entity_data)
        logger.info(f"Entity Merge: Updated target '{target_entity}'")

    # 6. Recreate all relations pointing to the target entity in KG
@ -1209,9 +1202,7 @@ async def amerge_entities(

        # Skip relationships between source entities to avoid self-loops
        if new_src == new_tgt:
-                    logger.info(
-                        f"Entity Merge: skipping `{src}`~`{tgt}` to avoid self-loop"
-                    )
+            logger.info(f"Entity Merge: skipping `{src}`~`{tgt}` to avoid self-loop")
            continue

        # Normalize entity order for consistent duplicate detection (undirected relationships)
@ -1220,9 +1211,7 @@ async def amerge_entities(

        # Process chunk tracking for this relation
        if relation_chunks_storage is not None:
-                    storage_key = make_relation_chunk_key(
-                        normalized_src, normalized_tgt
-                    )
+            storage_key = make_relation_chunk_key(normalized_src, normalized_tgt)

            # Get chunk_ids from storage for this original relation
            stored = await relation_chunks_storage.get_by_id(old_storage_key)
@ -1232,9 +1221,7 @@ async def amerge_entities(
            else:
                # Fallback to source_id from graph
                source_id = edge_data.get("source_id", "")
-                        chunk_ids = [
-                            cid for cid in source_id.split(GRAPH_FIELD_SEP) if cid
-                        ]
+                chunk_ids = [cid for cid in source_id.split(GRAPH_FIELD_SEP) if cid]

            # Accumulate chunk_ids with ordered deduplication
            if storage_key not in relation_chunk_tracking:
@ -1301,9 +1288,7 @@ async def amerge_entities(
            )

    # 7. Update relationship vector representations
-            logger.info(
-                f"Entity Merge: deleting {len(relations_to_delete)} relations from vdb"
-            )
+    logger.info(f"Entity Merge: deleting {len(relations_to_delete)} relations from vdb")
    await relationships_vdb.delete(relations_to_delete)
    for rel_data in relation_updates.values():
        edge_data = rel_data["data"]
@ -1316,12 +1301,8 @@ async def amerge_entities(
        weight = float(edge_data.get("weight", 1.0))

        # Use normalized order for content and relation ID
-                content = (
-                    f"{keywords}\t{normalized_src}\n{normalized_tgt}\n{description}"
-                )
-                relation_id = compute_mdhash_id(
-                    normalized_src + normalized_tgt, prefix="rel-"
-                )
+        content = f"{keywords}\t{normalized_src}\n{normalized_tgt}\n{description}"
+        relation_id = compute_mdhash_id(normalized_src + normalized_tgt, prefix="rel-")

        relation_data_for_vdb = {
            relation_id: {
@ -1335,9 +1316,7 @@ async def amerge_entities(
            }
        }
        await relationships_vdb.upsert(relation_data_for_vdb)
-                logger.info(
-                    f"Entity Merge: updating vdb `{normalized_src}`~`{normalized_tgt}`"
-                )
+        logger.info(f"Entity Merge: updating vdb `{normalized_src}`~`{normalized_tgt}`")

    # 8. Update entity vector representation
    description = merged_entity_data.get("description", "")
@ -1392,9 +1371,7 @@ async def amerge_entities(
                    merged_chunk_ids.append(chunk_id)

        # Delete source entities' chunk tracking records
-                entity_keys_to_delete = [
-                    e for e in source_entities if e != target_entity
-                ]
+        entity_keys_to_delete = [e for e in source_entities if e != target_entity]
        if entity_keys_to_delete:
            await entity_chunks_storage.delete(entity_keys_to_delete)

@ -1448,6 +1425,62 @@ async def amerge_entities(
        include_vector_data=True,
    )

+
+async def amerge_entities(
+    chunk_entity_relation_graph,
+    entities_vdb,
+    relationships_vdb,
+    source_entities: list[str],
+    target_entity: str,
+    merge_strategy: dict[str, str] = None,
+    target_entity_data: dict[str, Any] = None,
+    entity_chunks_storage=None,
+    relation_chunks_storage=None,
+) -> dict[str, Any]:
+    """Asynchronously merge multiple entities into one entity.
+
+    Merges multiple source entities into a target entity, handling all relationships,
+    and updating both the knowledge graph and vector database.
+    Also merges chunk tracking information from entity_chunks_storage and relation_chunks_storage.
+
+    Args:
+        chunk_entity_relation_graph: Graph storage instance
+        entities_vdb: Vector database storage for entities
+        relationships_vdb: Vector database storage for relationships
+        source_entities: List of source entity names to merge
+        target_entity: Name of the target entity after merging
+        merge_strategy: Deprecated (Each field uses its own default strategy). If provided,
+            customizations are applied but a warning is logged.
+        target_entity_data: Dictionary of specific values to set for the target entity,
+            overriding any merged values, e.g. {"description": "custom description", "entity_type": "PERSON"}
+        entity_chunks_storage: Optional KV storage for tracking chunks that reference entities
+        relation_chunks_storage: Optional KV storage for tracking chunks that reference relations
+
+    Returns:
+        Dictionary containing the merged entity information
+    """
+    # Collect all entities involved (source + target) and lock them all in sorted order
+    all_entities = set(source_entities)
+    all_entities.add(target_entity)
+    lock_keys = sorted(all_entities)
+
+    workspace = entities_vdb.global_config.get("workspace", "")
+    namespace = f"{workspace}:GraphDB" if workspace else "GraphDB"
+    async with get_storage_keyed_lock(
+        lock_keys, namespace=namespace, enable_logging=False
+    ):
+        try:
+            return await _merge_entities_impl(
+                chunk_entity_relation_graph,
+                entities_vdb,
+                relationships_vdb,
+                source_entities,
+                target_entity,
+                merge_strategy=merge_strategy,
+                target_entity_data=target_entity_data,
+                entity_chunks_storage=entity_chunks_storage,
+                relation_chunks_storage=relation_chunks_storage,
+            )
        except Exception as e:
            logger.error(f"Error merging entities: {e}")
            raise