cognee/cognee/tasks/temporal_graph/extract_events_and_entities.py
2025-08-29 16:07:18 +02:00

32 lines
1.3 KiB
Python

import asyncio
from typing import Type, List
from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.modules.chunking.models import DocumentChunk
from cognee.tasks.temporal_graph.models import EventList
from cognee.modules.engine.utils.generate_event_datapoint import generate_event_datapoint
async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]:
"""
Extracts events and their timestamps from document chunks using an LLM.
Each document chunk is processed with the event graph extractor to identify events.
The extracted events are converted into Event datapoints and appended to the
chunk's `contains` list.
Args:
data_chunks (List[DocumentChunk]): A list of document chunks containing text to process.
Returns:
List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints.
"""
events = await asyncio.gather(
*[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks]
)
for data_chunk, event_list in zip(data_chunks, events):
for event in event_list.events:
event_datapoint = generate_event_datapoint(event)
data_chunk.contains.append(event_datapoint)
return data_chunks