From 9bb36f37c0edb1a89b359cdb87ac142994840654 Mon Sep 17 00:00:00 2001
From: hajdul88 <52442977+hajdul88@users.noreply.github.com>
Date: Wed, 27 Aug 2025 15:17:32 +0200
Subject: [PATCH] feat: adds event graph extraction to LLMGateway for litellm

---
 cognee/infrastructure/llm/LLMGateway.py       |  9 +++++
 cognee/infrastructure/llm/config.py           |  1 +
 .../prompts/generate_event_graph_prompt.txt   | 30 ++++++++++++++++
 .../litellm_instructor/extraction/__init__.py |  1 +
 .../extraction/knowledge_graph/__init__.py    |  1 +
 .../knowledge_graph/extract_event_graph.py    | 34 +++++++++++++++++++
 6 files changed, 76 insertions(+)
 create mode 100644 cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt
 create mode 100644 cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py

diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py
index a88cfb85d..d8364e9ef 100644
--- a/cognee/infrastructure/llm/LLMGateway.py
+++ b/cognee/infrastructure/llm/LLMGateway.py
@@ -135,3 +135,12 @@ class LLMGateway:
             )
 
             return extract_summary(content=content, response_model=response_model)
+
+    @staticmethod
+    def extract_event_graph(content: str, response_model: Type[BaseModel]) -> Coroutine:
+        # TODO: Add BAML version of category and extraction and update function (consulted with Igor)
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
+            extract_event_graph,
+        )
+
+        return extract_event_graph(content=content, response_model=response_model)
diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py
index de2e2168e..199ede986 100644
--- a/cognee/infrastructure/llm/config.py
+++ b/cognee/infrastructure/llm/config.py
@@ -52,6 +52,7 @@ class LLMConfig(BaseSettings):
 
     transcription_model: str = "whisper-1"
     graph_prompt_path: str = "generate_graph_prompt.txt"
+    temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt"
     llm_rate_limit_enabled: bool = False
     llm_rate_limit_requests: int = 60
     llm_rate_limit_interval: int = 60  # in seconds (default is 60 requests per minute)
diff --git a/cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt b/cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt
new file mode 100644
index 000000000..c81ae6d3d
--- /dev/null
+++ b/cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt
@@ -0,0 +1,30 @@
+For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular stream events from a text. The events are defined as follows:
+## Event Definition
+- Anything with a date or a timestamp is an event
+- Anything that took place in time (even if the time is unknown) is an event
+- Anything that lasted over a period of time, or happened in an instant is an event: from historical milestones (wars, presidencies, olympiads) to personal milestones (birth, death, employment, etc.), to mundane actions (a walk, a conversation, etc.)
+- **ANY action or verb represents an event** - this is the most important rule
+- Every single verb in the text corresponds to an event that must be extracted
+- This includes: thinking, feeling, seeing, hearing, moving, speaking, writing, reading, eating, sleeping, working, playing, studying, traveling, meeting, calling, texting, buying, selling, creating, destroying, building, breaking, starting, stopping, beginning, ending, etc.
+- Even the most mundane or obvious actions are events: "he walked", "she sat", "they talked", "I thought", "we waited"
+## Requirements
+- **Be extremely thorough** - extract EVERY event mentioned, no matter how small or obvious
+- **Timestamped first" - every time stamp, or date should have atleast one event
+- **Verbs/actions  = one event** - After you are done with timestamped events -- every verb that is an action should have a corresponding event.
+- We expect long streams of events from any piece of text, easily reaching a hundred events
+- Granularity and richness of the stream is key to our success and is of utmost importance
+- Not all events will have timestamps, add timestamps only to known events
+- For events that were instantaneous, just attach the time_from or time_to property don't create both
+- **Do not skip any events** - if you're unsure whether something is an event, extract it anyway
+- **Quantity over filtering** - it's better to extract too many events than to miss any
+- **Descriptions** - Always include the event description together with entities (Who did what, what happened? What is the event?). If you can include the corresponding part from the text.
+## Output Format
+Your reply should be a JSON: list of dictionaries with the following structure:
+```python
+class Event(BaseModel):
+    name: str [concise]
+    description: Optional[str] = None
+    time_from: Optional[Timestamp] = None
+    time_to: Optional[Timestamp] = None
+    location: Optional[str] = None
+```
\ No newline at end of file
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py
index 3d4edab27..002246a77 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py
@@ -1,3 +1,4 @@
 from .knowledge_graph.extract_content_graph import extract_content_graph
+from .knowledge_graph.extract_event_graph import extract_event_graph
 from .extract_categories import extract_categories
 from .extract_summary import extract_summary, extract_code_summary
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py
index 0939b2b34..f758b8909 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py
@@ -1 +1,2 @@
 from .extract_content_graph import extract_content_graph
+from .extract_event_graph import extract_event_graph
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py
new file mode 100644
index 000000000..2a0c0cab8
--- /dev/null
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py
@@ -0,0 +1,34 @@
+import os
+from pydantic import BaseModel
+from typing import Type
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+
+from cognee.infrastructure.llm.config import (
+    get_llm_config,
+)
+
+async def extract_event_graph(
+    content: str, response_model: Type[BaseModel], system_prompt: str = None
+):
+    """Extract event graph from content using LLM."""
+
+    llm_config = get_llm_config()
+
+    prompt_path = llm_config.graph_prompt_path
+
+    # Check if the prompt path is an absolute path or just a filename
+    if os.path.isabs(prompt_path):
+        # directory containing the file
+        base_directory = os.path.dirname(prompt_path)
+        # just the filename itself
+        prompt_path = os.path.basename(prompt_path)
+    else:
+        base_directory = None
+
+    system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
+
+    content_graph = await LLMGateway.acreate_structured_output(
+        content, system_prompt, response_model
+    )
+
+    return content_graph
\ No newline at end of file