Normalize entity order for undirected graph consistency

• Normalize entity pairs for storage • Update API docs for undirected edges
2025-10-26 15:53:31 +08:00 · 2025-10-26 15:53:31 +08:00 · bf1897a67e
commit bf1897a67e
parent 3fbd704bf9
2 changed files with 41 additions and 9 deletions
--- a/lightrag/api/routers/graph_routes.py
+++ b/lightrag/api/routers/graph_routes.py
@ -299,6 +299,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
            entity_data (dict): Entity properties including:
                - description (str): Textual description of the entity
                - entity_type (str): Category/type of the entity (e.g., PERSON, ORGANIZATION, LOCATION)
+                - source_id (str): Related chunk_id from which the description originates
                - Additional custom properties as needed

        Response Schema:
@ -309,6 +310,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
                    "entity_name": "Tesla",
                    "description": "Electric vehicle manufacturer",
                    "entity_type": "ORGANIZATION",
+                    "source_id": "chunk-123<SEP>chunk-456"
                    ... (other entity properties)
                }
            }
@ -361,10 +363,11 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
        """
        Create a new relationship between two entities in the knowledge graph

-        This endpoint establishes a directed relationship between two existing entities.
-        Both the source and target entities must already exist in the knowledge graph.
-        The system automatically generates vector embeddings for the relationship to
-        enable semantic search and graph traversal.
+        This endpoint establishes an undirected relationship between two existing entities.
+        The provided source/target order is accepted for convenience, but the backend
+        stored edge is undirected and may be returned with the entities swapped. 
+        Both entities must already exist in the knowledge graph. The system automatically
+        generates vector embeddings for the relationship to enable semantic search and graph traversal.

        Prerequisites:
            - Both source_entity and target_entity must exist in the knowledge graph
@ -376,6 +379,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
            relation_data (dict): Relationship properties including:
                - description (str): Textual description of the relationship
                - keywords (str): Comma-separated keywords describing the relationship type
+                - source_id (str): Related chunk_id from which the description originates
                - weight (float): Relationship strength/importance (default: 1.0)
                - Additional custom properties as needed

@ -388,6 +392,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
                    "tgt_id": "Tesla",
                    "description": "Elon Musk is the CEO of Tesla",
                    "keywords": "CEO, founder",
+                    "source_id": "chunk-123<SEP>chunk-456"
                    "weight": 1.0,
                    ... (other relationship properties)
                }
--- a/lightrag/utils_graph.py
+++ b/lightrag/utils_graph.py
@ -100,6 +100,10 @@ async def adelete_by_relation(
    # Use graph database lock to ensure atomic graph and vector db operations
    async with graph_db_lock:
        try:
+            # Normalize entity order for undirected graph (ensures consistent key generation)
+            if source_entity > target_entity:
+                source_entity, target_entity = target_entity, source_entity
+
            # Check if the relation exists
            edge_exists = await chunk_entity_relation_graph.has_edge(
                source_entity, target_entity
@ -878,6 +882,10 @@ async def acreate_relation(
                source_entity, target_entity, edge_data
            )

+            # Normalize entity order for undirected relation vector (ensures consistent key generation)
+            if source_entity > target_entity:
+                source_entity, target_entity = target_entity, source_entity
+
            # Prepare content for embedding
            description = edge_data.get("description", "")
            keywords = edge_data.get("keywords", "")
@ -1121,19 +1129,27 @@ async def amerge_entities(
                tgt = rel_data["tgt"]
                edge_data = rel_data["data"]

+                # Normalize entity order for consistent vector storage
+                normalized_src, normalized_tgt = sorted([src, tgt])
+
                description = edge_data.get("description", "")
                keywords = edge_data.get("keywords", "")
                source_id = edge_data.get("source_id", "")
                weight = float(edge_data.get("weight", 1.0))

-                content = f"{keywords}\t{src}\n{tgt}\n{description}"
-                relation_id = compute_mdhash_id(src + tgt, prefix="rel-")
+                # Use normalized order for content and relation ID
+                content = (
+                    f"{keywords}\t{normalized_src}\n{normalized_tgt}\n{description}"
+                )
+                relation_id = compute_mdhash_id(
+                    normalized_src + normalized_tgt, prefix="rel-"
+                )

                relation_data_for_vdb = {
                    relation_id: {
                        "content": content,
-                        "src_id": src,
-                        "tgt_id": tgt,
+                        "src_id": normalized_src,
+                        "tgt_id": normalized_tgt,
                        "source_id": source_id,
                        "description": description,
                        "keywords": keywords,
@ -1340,7 +1356,18 @@ async def get_relation_info(
    tgt_entity: str,
    include_vector_data: bool = False,
 ) -> dict[str, str | None | dict[str, str]]:
-    """Get detailed information of a relationship"""
+    """
+    Get detailed information of a relationship between two entities.
+    Relationship is unidirectional, swap src_entity and tgt_entity does not change the relationship.
+
+    Args:
+        src_entity: Source entity name
+        tgt_entity: Target entity name
+        include_vector_data: Whether to include vector database information
+
+    Returns:
+        Dictionary containing relationship information
+    """

    # Get information from the graph
    edge_data = await chunk_entity_relation_graph.get_edge(src_entity, tgt_entity)