fix: Fixes duplicated edges in cognify by limiting the recursion depth in add datapoints

This commit is contained in:
hajdul88 2025-01-07 13:33:05 +01:00
parent fe672ce0e4
commit bd644a1434
3 changed files with 5 additions and 3 deletions

View file

@ -94,7 +94,7 @@ async def run_cognify_pipeline(dataset: Dataset, user: User, graph_model: BaseMo
summarization_model = cognee_config.summarization_model,
task_config = { "batch_size": 10 }
),
Task(add_data_points, task_config = { "batch_size": 10 }),
Task(add_data_points, only_root = True, task_config = { "batch_size": 10 }),
]
pipeline = run_tasks(tasks, data_documents, "cognify_pipeline")

View file

@ -7,6 +7,7 @@ async def get_graph_from_model(
added_nodes: dict,
added_edges: dict,
visited_properties: dict = None,
only_root = False,
include_root = True,
):
if str(data_point.id) in added_nodes:
@ -86,7 +87,7 @@ async def get_graph_from_model(
}))
added_edges[str(edge_key)] = True
if str(field_value.id) in added_nodes:
if str(field_value.id) in added_nodes or only_root:
continue
property_nodes, property_edges = await get_graph_from_model(

View file

@ -5,7 +5,7 @@ from cognee.modules.graph.utils import deduplicate_nodes_and_edges, get_graph_fr
from .index_data_points import index_data_points
async def add_data_points(data_points: list[DataPoint]):
async def add_data_points(data_points: list[DataPoint], only_root = False):
nodes = []
edges = []
@ -19,6 +19,7 @@ async def add_data_points(data_points: list[DataPoint]):
added_nodes = added_nodes,
added_edges = added_edges,
visited_properties = visited_properties,
only_root = only_root,
) for data_point in data_points
])