fix: fixes cases when llm generates inconsistent output (#773)

<!-- .github/pull_request_template.md -->

## Description
Fixes cases when the llm generated edge list contains nodes that don't
exist in the nodelist

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
hajdul88 2025-04-23 11:55:37 +02:00 committed by GitHub
parent 012f1e0a0b
commit c5cba01d3c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -62,6 +62,16 @@ async def extract_graph_from_data(
*[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
)
# Note: Filter edges with missing source or target nodes
if graph_model == KnowledgeGraph:
for graph in chunk_graphs:
valid_node_ids = {node.id for node in graph.nodes}
graph.edges = [
edge
for edge in graph.edges
if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
]
return await integrate_chunk_graphs(
data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
)