From bf1897a67eebc668565f1f1e099e0b8073934552 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 26 Oct 2025 15:53:31 +0800 Subject: [PATCH] Normalize entity order for undirected graph consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Normalize entity pairs for storage • Update API docs for undirected edges --- lightrag/api/routers/graph_routes.py | 13 +++++++--- lightrag/utils_graph.py | 37 ++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index dbf4527e..f4c29fc2 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -299,6 +299,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None): entity_data (dict): Entity properties including: - description (str): Textual description of the entity - entity_type (str): Category/type of the entity (e.g., PERSON, ORGANIZATION, LOCATION) + - source_id (str): Related chunk_id from which the description originates - Additional custom properties as needed Response Schema: @@ -309,6 +310,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None): "entity_name": "Tesla", "description": "Electric vehicle manufacturer", "entity_type": "ORGANIZATION", + "source_id": "chunk-123chunk-456" ... (other entity properties) } } @@ -361,10 +363,11 @@ def create_graph_routes(rag, api_key: Optional[str] = None): """ Create a new relationship between two entities in the knowledge graph - This endpoint establishes a directed relationship between two existing entities. - Both the source and target entities must already exist in the knowledge graph. - The system automatically generates vector embeddings for the relationship to - enable semantic search and graph traversal. + This endpoint establishes an undirected relationship between two existing entities. + The provided source/target order is accepted for convenience, but the backend + stored edge is undirected and may be returned with the entities swapped. + Both entities must already exist in the knowledge graph. The system automatically + generates vector embeddings for the relationship to enable semantic search and graph traversal. Prerequisites: - Both source_entity and target_entity must exist in the knowledge graph @@ -376,6 +379,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None): relation_data (dict): Relationship properties including: - description (str): Textual description of the relationship - keywords (str): Comma-separated keywords describing the relationship type + - source_id (str): Related chunk_id from which the description originates - weight (float): Relationship strength/importance (default: 1.0) - Additional custom properties as needed @@ -388,6 +392,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None): "tgt_id": "Tesla", "description": "Elon Musk is the CEO of Tesla", "keywords": "CEO, founder", + "source_id": "chunk-123chunk-456" "weight": 1.0, ... (other relationship properties) } diff --git a/lightrag/utils_graph.py b/lightrag/utils_graph.py index 97d3dc7a..68d65617 100644 --- a/lightrag/utils_graph.py +++ b/lightrag/utils_graph.py @@ -100,6 +100,10 @@ async def adelete_by_relation( # Use graph database lock to ensure atomic graph and vector db operations async with graph_db_lock: try: + # Normalize entity order for undirected graph (ensures consistent key generation) + if source_entity > target_entity: + source_entity, target_entity = target_entity, source_entity + # Check if the relation exists edge_exists = await chunk_entity_relation_graph.has_edge( source_entity, target_entity @@ -878,6 +882,10 @@ async def acreate_relation( source_entity, target_entity, edge_data ) + # Normalize entity order for undirected relation vector (ensures consistent key generation) + if source_entity > target_entity: + source_entity, target_entity = target_entity, source_entity + # Prepare content for embedding description = edge_data.get("description", "") keywords = edge_data.get("keywords", "") @@ -1121,19 +1129,27 @@ async def amerge_entities( tgt = rel_data["tgt"] edge_data = rel_data["data"] + # Normalize entity order for consistent vector storage + normalized_src, normalized_tgt = sorted([src, tgt]) + description = edge_data.get("description", "") keywords = edge_data.get("keywords", "") source_id = edge_data.get("source_id", "") weight = float(edge_data.get("weight", 1.0)) - content = f"{keywords}\t{src}\n{tgt}\n{description}" - relation_id = compute_mdhash_id(src + tgt, prefix="rel-") + # Use normalized order for content and relation ID + content = ( + f"{keywords}\t{normalized_src}\n{normalized_tgt}\n{description}" + ) + relation_id = compute_mdhash_id( + normalized_src + normalized_tgt, prefix="rel-" + ) relation_data_for_vdb = { relation_id: { "content": content, - "src_id": src, - "tgt_id": tgt, + "src_id": normalized_src, + "tgt_id": normalized_tgt, "source_id": source_id, "description": description, "keywords": keywords, @@ -1340,7 +1356,18 @@ async def get_relation_info( tgt_entity: str, include_vector_data: bool = False, ) -> dict[str, str | None | dict[str, str]]: - """Get detailed information of a relationship""" + """ + Get detailed information of a relationship between two entities. + Relationship is unidirectional, swap src_entity and tgt_entity does not change the relationship. + + Args: + src_entity: Source entity name + tgt_entity: Target entity name + include_vector_data: Whether to include vector database information + + Returns: + Dictionary containing relationship information + """ # Get information from the graph edge_data = await chunk_entity_relation_graph.get_edge(src_entity, tgt_entity)