From bf9a6d699b0747b75c299b8240f86ae8aaedc40d Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 3 Aug 2025 22:14:24 +0800 Subject: [PATCH] Fix(lightrag): Handle undirected edges in data migration The `_migrate_entity_relation_data` function previously processed directed edges from `get_all_edges`, which could lead to duplicates (e.g., (A,B) and (B,A)) and an incorrect relation count. This commit normalizes edges by sorting their source and target nodes before adding them to the relation set. This ensures all edges are treated as undirected and are properly deduplicated. --- lightrag/lightrag.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 288655f7..2bd710df 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -719,7 +719,7 @@ class LightRAG: if doc_id not in doc_relations: doc_relations[doc_id] = set() # Use tuple for set operations, convert to list later - doc_relations[doc_id].add((src, tgt)) + doc_relations[doc_id].add(tuple(sorted((src, tgt)))) # Store the results in full_entities and full_relations migration_count = 0 @@ -728,7 +728,10 @@ class LightRAG: if doc_entities: entities_data = {} for doc_id, entity_set in doc_entities.items(): - entities_data[doc_id] = {"entity_names": list(entity_set)} + entities_data[doc_id] = { + "entity_names": list(entity_set), + "count": len(entity_set), + } await self.full_entities.upsert(entities_data) # Store relations @@ -737,7 +740,8 @@ class LightRAG: for doc_id, relation_set in doc_relations.items(): # Convert tuples back to lists relations_data[doc_id] = { - "relation_pairs": [list(pair) for pair in relation_set] + "relation_pairs": [list(pair) for pair in relation_set], + "count": len(relation_set), } await self.full_relations.upsert(relations_data)