cognee/cognee/tasks/graph/extract_graph_from_data_v2.py
Daniel Molnar 9ba12b25ef
feat: add delete by document (#668)
<!-- .github/pull_request_template.md -->

## Description
Delete by document.

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin
2025-04-17 15:42:10 +02:00

48 lines
1.7 KiB
Python

import asyncio
from typing import List
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
extract_content_nodes_and_relationship_names,
)
from cognee.tasks.graph.cascade_extract.utils.extract_edge_triplets import (
extract_edge_triplets,
)
from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
n_rounds: int = 2,
ontology_adapter: OntologyResolver = None,
) -> List[DocumentChunk]:
"""Extract and update graph data from document chunks in multiple steps."""
chunk_nodes = await asyncio.gather(
*[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
)
chunk_results = await asyncio.gather(
*[
extract_content_nodes_and_relationship_names(chunk.text, nodes, n_rounds)
for chunk, nodes in zip(data_chunks, chunk_nodes)
]
)
updated_nodes, relationships = zip(*chunk_results)
chunk_graphs = await asyncio.gather(
*[
extract_edge_triplets(chunk.text, nodes, rels, n_rounds)
for chunk, nodes, rels in zip(data_chunks, updated_nodes, relationships)
]
)
return await integrate_chunk_graphs(
data_chunks=data_chunks,
chunk_graphs=chunk_graphs,
graph_model=KnowledgeGraph,
ontology_adapter=ontology_adapter or OntologyResolver(),
)