From c5cba01d3c22072a177b074d73f100346f4cdfb2 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 23 Apr 2025 11:55:37 +0200 Subject: [PATCH] fix: fixes cases when llm generates inconsistent output (#773) ## Description Fixes cases when the llm generated edge list contains nodes that don't exist in the nodelist ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/tasks/graph/extract_graph_from_data.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 3bca1a257..01d8bb618 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -62,6 +62,16 @@ async def extract_graph_from_data( *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] ) + # Note: Filter edges with missing source or target nodes + if graph_model == KnowledgeGraph: + for graph in chunk_graphs: + valid_node_ids = {node.id for node in graph.nodes} + graph.edges = [ + edge + for edge in graph.edges + if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids + ] + return await integrate_chunk_graphs( data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver() )