fix: Fixes duplicated edges in cognify by limiting the recursion depth in add datapoints
This commit is contained in:
parent
fe672ce0e4
commit
bd644a1434
3 changed files with 5 additions and 3 deletions
|
|
@ -94,7 +94,7 @@ async def run_cognify_pipeline(dataset: Dataset, user: User, graph_model: BaseMo
|
||||||
summarization_model = cognee_config.summarization_model,
|
summarization_model = cognee_config.summarization_model,
|
||||||
task_config = { "batch_size": 10 }
|
task_config = { "batch_size": 10 }
|
||||||
),
|
),
|
||||||
Task(add_data_points, task_config = { "batch_size": 10 }),
|
Task(add_data_points, only_root = True, task_config = { "batch_size": 10 }),
|
||||||
]
|
]
|
||||||
|
|
||||||
pipeline = run_tasks(tasks, data_documents, "cognify_pipeline")
|
pipeline = run_tasks(tasks, data_documents, "cognify_pipeline")
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ async def get_graph_from_model(
|
||||||
added_nodes: dict,
|
added_nodes: dict,
|
||||||
added_edges: dict,
|
added_edges: dict,
|
||||||
visited_properties: dict = None,
|
visited_properties: dict = None,
|
||||||
|
only_root = False,
|
||||||
include_root = True,
|
include_root = True,
|
||||||
):
|
):
|
||||||
if str(data_point.id) in added_nodes:
|
if str(data_point.id) in added_nodes:
|
||||||
|
|
@ -86,7 +87,7 @@ async def get_graph_from_model(
|
||||||
}))
|
}))
|
||||||
added_edges[str(edge_key)] = True
|
added_edges[str(edge_key)] = True
|
||||||
|
|
||||||
if str(field_value.id) in added_nodes:
|
if str(field_value.id) in added_nodes or only_root:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
property_nodes, property_edges = await get_graph_from_model(
|
property_nodes, property_edges = await get_graph_from_model(
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from cognee.modules.graph.utils import deduplicate_nodes_and_edges, get_graph_fr
|
||||||
from .index_data_points import index_data_points
|
from .index_data_points import index_data_points
|
||||||
|
|
||||||
|
|
||||||
async def add_data_points(data_points: list[DataPoint]):
|
async def add_data_points(data_points: list[DataPoint], only_root = False):
|
||||||
nodes = []
|
nodes = []
|
||||||
edges = []
|
edges = []
|
||||||
|
|
||||||
|
|
@ -19,6 +19,7 @@ async def add_data_points(data_points: list[DataPoint]):
|
||||||
added_nodes = added_nodes,
|
added_nodes = added_nodes,
|
||||||
added_edges = added_edges,
|
added_edges = added_edges,
|
||||||
visited_properties = visited_properties,
|
visited_properties = visited_properties,
|
||||||
|
only_root = only_root,
|
||||||
) for data_point in data_points
|
) for data_point in data_points
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue