feat: adds event entity extraction

This commit is contained in:
hajdul88 2025-08-27 18:03:38 +02:00
parent 3482f353a9
commit 7468ef6e53
5 changed files with 69 additions and 0 deletions

View file

@ -144,3 +144,12 @@ class LLMGateway:
)
return extract_event_graph(content=content, response_model=response_model)
@staticmethod
def extract_event_entities(content: str, response_model: Type[BaseModel]) -> Coroutine:
# TODO: Add BAML version of category and extraction and update function (consulted with Igor)
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
extract_event_entities,
)
return extract_event_entities(content=content, response_model=response_model)

View file

@ -53,6 +53,7 @@ class LLMConfig(BaseSettings):
transcription_model: str = "whisper-1"
graph_prompt_path: str = "generate_graph_prompt.txt"
temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt"
event_entity_prompt_path: str = "generate_event_entity_prompt.txt"
llm_rate_limit_enabled: bool = False
llm_rate_limit_requests: int = 60
llm_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute)

View file

@ -0,0 +1,25 @@
For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular entities from events text. An entity is any distinct, identifiable thing, person, place, object, organization, concept, or phenomenon that can be named, referenced, or described in the event context. This includes but is not limited to: people, places, objects, organizations, concepts, events, processes, states, conditions, properties, attributes, roles, functions, and any other meaningful referents that contribute to understanding the event.
**Temporal Entity Exclusion**: Do not extract timestamp-like entities (dates, times, durations) as these are handled separately. However, extract named temporal periods, eras, historical epochs, and culturally significant time references
## Input Format
The input will be a list of dictionaries, each containing:
- `event_name`: The name of the event
- `description`: The description of the event
## Task
For each event, extract all entities mentioned in the event description and determine their relationship to the event.
## Output Format
Return the same enriched JSON with an additional key in each dictionary: `attributes`.
The `attributes` should be a list of dictionaries, each containing:
- `entity`: The name of the entity
- `entity_type`: The type/category of the entity (person, place, organization, object, concept, etc.)
- `relationship`: A concise description of how the entity relates to the event
## Requirements
- **Be extremely thorough** - extract EVERY non-temporal entity mentioned, no matter how small, obvious, or seemingly insignificant
- **After you are done with obvious entities, every noun, pronoun, proper noun, and named reference = one entity**
- We expect rich entity networks from any event, easily reaching a dozens of entities per event
- Granularity and richness of the entity extraction is key to our success and is of utmost importance
- **Do not skip any entities** - if you're unsure whether something is an entity, extract it anyway
- Use the event name for context when determining relationships
- Relationships should be technical with one or at most two words. If two words, use underscore camelcase style
- Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc.
- You can combine two words to form a relationship name: subject_role, previous_owner, etc.
- Focus on how the entity specifically relates to the event

View file

@ -2,3 +2,4 @@ from .knowledge_graph.extract_content_graph import extract_content_graph
from .knowledge_graph.extract_event_graph import extract_event_graph
from .extract_categories import extract_categories
from .extract_summary import extract_summary, extract_code_summary
from .extract_event_entities import extract_event_entities

View file

@ -0,0 +1,33 @@
import os
from typing import List, Type
from pydantic import BaseModel
from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.infrastructure.llm.config import (
get_llm_config,
)
async def extract_event_entities(
content: str, response_model: Type[BaseModel]
):
"""Extract event entities from content using LLM."""
llm_config = get_llm_config()
prompt_path = llm_config.event_entity_prompt_path
# Check if the prompt path is an absolute path or just a filename
if os.path.isabs(prompt_path):
# directory containing the file
base_directory = os.path.dirname(prompt_path)
# just the filename itself
prompt_path = os.path.basename(prompt_path)
else:
base_directory = None
system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
content_graph = await LLMGateway.acreate_structured_output(
content, system_prompt, response_model
)
return content_graph