From 97abdeeb2a81e72548fd01c2de918e98f6c9fb1b Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:04:10 +0200 Subject: [PATCH] feat: adds entity kg from events logic --- cognee/tasks/temporal_graph/__init__.py | 3 +- .../temporal_graph/add_entities_to_event.py | 55 +++++++++++++++++++ cognee/tasks/temporal_graph/enrich_events.py | 21 +++++++ .../extract_events_and_entities.py | 2 +- .../extract_knowledge_graph_from_events.py | 26 +++++++++ 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 cognee/tasks/temporal_graph/add_entities_to_event.py create mode 100644 cognee/tasks/temporal_graph/enrich_events.py create mode 100644 cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py diff --git a/cognee/tasks/temporal_graph/__init__.py b/cognee/tasks/temporal_graph/__init__.py index 163fb6840..991553605 100644 --- a/cognee/tasks/temporal_graph/__init__.py +++ b/cognee/tasks/temporal_graph/__init__.py @@ -1,2 +1,3 @@ -from .extract_events_and_entities import extract_events_and_entities +from .extract_events_and_entities import extract_events_and_timestamps +from .extract_knowledge_graph_from_events import extract_knowledge_graph_from_events diff --git a/cognee/tasks/temporal_graph/add_entities_to_event.py b/cognee/tasks/temporal_graph/add_entities_to_event.py new file mode 100644 index 000000000..5585a1b50 --- /dev/null +++ b/cognee/tasks/temporal_graph/add_entities_to_event.py @@ -0,0 +1,55 @@ +from cognee.modules.engine.models import Event +from cognee.tasks.temporal_graph.models import EventWithEntities +from cognee.modules.engine.models.Entity import Entity +from cognee.modules.engine.models.EntityType import EntityType +from cognee.infrastructure.engine.models.Edge import Edge +from cognee.modules.engine.utils import generate_node_id, generate_node_name + +def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None: + """Add entities to event via attributes field.""" + if not event_with_entities.attributes: + return + + # Create entity types cache + entity_types = {} + + # Process each attribute + for attribute in event_with_entities.attributes: + # Get or create entity type + entity_type = get_or_create_entity_type(entity_types, attribute.entity_type) + + # Create entity + entity_id = generate_node_id(attribute.entity) + entity_name = generate_node_name(attribute.entity) + entity = Entity( + id=entity_id, + name=entity_name, + is_a=entity_type, + description=f"Entity {attribute.entity} of type {attribute.entity_type}", + ontology_valid=False, + belongs_to_set=None, + ) + + # Create edge + edge = Edge(relationship_type=attribute.relationship) + + # Add to event attributes + if event.attributes is None: + event.attributes = [] + event.attributes.append((edge, [entity])) + +def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType: + """Get existing entity type or create new one.""" + if entity_type_name not in entity_types: + type_id = generate_node_id(entity_type_name) + type_name = generate_node_name(entity_type_name) + entity_type = EntityType( + id=type_id, + name=type_name, + type=type_name, + description=f"Type for {entity_type_name}", + ontology_valid=False, + ) + entity_types[entity_type_name] = entity_type + + return entity_types[entity_type_name] \ No newline at end of file diff --git a/cognee/tasks/temporal_graph/enrich_events.py b/cognee/tasks/temporal_graph/enrich_events.py new file mode 100644 index 000000000..4c9edb2bb --- /dev/null +++ b/cognee/tasks/temporal_graph/enrich_events.py @@ -0,0 +1,21 @@ +from typing import List + +from cognee.infrastructure.llm import LLMGateway +from cognee.modules.engine.models import Event +from cognee.tasks.temporal_graph.models import EventWithEntities,EventEntityList + +async def enrich_events(events: List[Event]) -> List[EventWithEntities]: + """Extract entities from events and return enriched events.""" + import json + + # Convert events to JSON format for LLM processing + events_json = [ + {"event_name": event.name, "description": event.description or ""} for event in events + ] + + events_json_str = json.dumps(events_json) + + # Extract entities from events + entity_result = await LLMGateway.extract_event_entities(events_json_str, EventEntityList) + + return entity_result.events \ No newline at end of file diff --git a/cognee/tasks/temporal_graph/extract_events_and_entities.py b/cognee/tasks/temporal_graph/extract_events_and_entities.py index 37e113d56..bf4367f6a 100644 --- a/cognee/tasks/temporal_graph/extract_events_and_entities.py +++ b/cognee/tasks/temporal_graph/extract_events_and_entities.py @@ -6,7 +6,7 @@ from cognee.tasks.temporal_graph.models import EventList from cognee.modules.engine.utils.generate_event_datapoint import generate_event_datapoint -async def extract_events_and_entities(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: +async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: """Extracts events and entities from a chunk of documents.""" events = await asyncio.gather( *[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks] diff --git a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py new file mode 100644 index 000000000..0e49c5296 --- /dev/null +++ b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py @@ -0,0 +1,26 @@ +from typing import List +from cognee.modules.chunking.models import DocumentChunk +from cognee.modules.engine.models import Event +from cognee.tasks.temporal_graph.enrich_events import enrich_events +from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_event + +async def extract_knowledge_graph_from_events(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: + """Extract events from chunks and enrich them with entities.""" + # Extract events from chunks + all_events = [] + for chunk in data_chunks: + for item in chunk.contains: + if isinstance(item, Event): + all_events.append(item) + + if not all_events: + return data_chunks + + # Enrich events with entities + enriched_events = await enrich_events(all_events) + + # Add entities to events + for event, enriched_event in zip(all_events, enriched_events): + add_entities_to_event(event, enriched_event) + + return data_chunks \ No newline at end of file