Fix(lightrag): Handle undirected edges in data migration
The `_migrate_entity_relation_data` function previously processed directed edges from `get_all_edges`, which could lead to duplicates (e.g., (A,B) and (B,A)) and an incorrect relation count. This commit normalizes edges by sorting their source and target nodes before adding them to the relation set. This ensures all edges are treated as undirected and are properly deduplicated.
This commit is contained in:
parent
e8d8afa846
commit
bf9a6d699b
1 changed files with 7 additions and 3 deletions
|
|
@ -719,7 +719,7 @@ class LightRAG:
|
|||
if doc_id not in doc_relations:
|
||||
doc_relations[doc_id] = set()
|
||||
# Use tuple for set operations, convert to list later
|
||||
doc_relations[doc_id].add((src, tgt))
|
||||
doc_relations[doc_id].add(tuple(sorted((src, tgt))))
|
||||
|
||||
# Store the results in full_entities and full_relations
|
||||
migration_count = 0
|
||||
|
|
@ -728,7 +728,10 @@ class LightRAG:
|
|||
if doc_entities:
|
||||
entities_data = {}
|
||||
for doc_id, entity_set in doc_entities.items():
|
||||
entities_data[doc_id] = {"entity_names": list(entity_set)}
|
||||
entities_data[doc_id] = {
|
||||
"entity_names": list(entity_set),
|
||||
"count": len(entity_set),
|
||||
}
|
||||
await self.full_entities.upsert(entities_data)
|
||||
|
||||
# Store relations
|
||||
|
|
@ -737,7 +740,8 @@ class LightRAG:
|
|||
for doc_id, relation_set in doc_relations.items():
|
||||
# Convert tuples back to lists
|
||||
relations_data[doc_id] = {
|
||||
"relation_pairs": [list(pair) for pair in relation_set]
|
||||
"relation_pairs": [list(pair) for pair in relation_set],
|
||||
"count": len(relation_set),
|
||||
}
|
||||
await self.full_relations.upsert(relations_data)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue