* fix: handle rate limit error coming from llm model * fix: fixes lost edges and nodes in get_graph_from_model * fix: fixes database pruning issue in pgvector (#261) * fix: cognee_demo notebook pipeline is not saving summaries --------- Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com>
42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
import asyncio
|
|
from typing import Type
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
|
from cognee.modules.data.extraction.knowledge_graph import extract_content_graph
|
|
from cognee.modules.graph.utils import (
|
|
expand_with_nodes_and_edges,
|
|
retrieve_existing_edges,
|
|
)
|
|
from cognee.tasks.storage import add_data_points
|
|
|
|
|
|
async def extract_graph_from_data(
|
|
data_chunks: list[DocumentChunk], graph_model: Type[BaseModel]
|
|
):
|
|
chunk_graphs = await asyncio.gather(
|
|
*[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
|
|
)
|
|
graph_engine = await get_graph_engine()
|
|
|
|
existing_edges_map = await retrieve_existing_edges(
|
|
data_chunks,
|
|
chunk_graphs,
|
|
graph_engine,
|
|
)
|
|
|
|
graph_nodes, graph_edges = expand_with_nodes_and_edges(
|
|
data_chunks,
|
|
chunk_graphs,
|
|
existing_edges_map,
|
|
)
|
|
|
|
if len(graph_nodes) > 0:
|
|
await add_data_points(graph_nodes)
|
|
|
|
if len(graph_edges) > 0:
|
|
await graph_engine.add_edges(graph_edges)
|
|
|
|
return data_chunks
|