feat: adds event graph extraction to LLMGateway for litellm
This commit is contained in:
parent
f5489f2027
commit
9bb36f37c0
6 changed files with 76 additions and 0 deletions
|
|
@ -135,3 +135,12 @@ class LLMGateway:
|
|||
)
|
||||
|
||||
return extract_summary(content=content, response_model=response_model)
|
||||
|
||||
@staticmethod
|
||||
def extract_event_graph(content: str, response_model: Type[BaseModel]) -> Coroutine:
|
||||
# TODO: Add BAML version of category and extraction and update function (consulted with Igor)
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
|
||||
extract_event_graph,
|
||||
)
|
||||
|
||||
return extract_event_graph(content=content, response_model=response_model)
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ class LLMConfig(BaseSettings):
|
|||
|
||||
transcription_model: str = "whisper-1"
|
||||
graph_prompt_path: str = "generate_graph_prompt.txt"
|
||||
temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt"
|
||||
llm_rate_limit_enabled: bool = False
|
||||
llm_rate_limit_requests: int = 60
|
||||
llm_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular stream events from a text. The events are defined as follows:
|
||||
## Event Definition
|
||||
- Anything with a date or a timestamp is an event
|
||||
- Anything that took place in time (even if the time is unknown) is an event
|
||||
- Anything that lasted over a period of time, or happened in an instant is an event: from historical milestones (wars, presidencies, olympiads) to personal milestones (birth, death, employment, etc.), to mundane actions (a walk, a conversation, etc.)
|
||||
- **ANY action or verb represents an event** - this is the most important rule
|
||||
- Every single verb in the text corresponds to an event that must be extracted
|
||||
- This includes: thinking, feeling, seeing, hearing, moving, speaking, writing, reading, eating, sleeping, working, playing, studying, traveling, meeting, calling, texting, buying, selling, creating, destroying, building, breaking, starting, stopping, beginning, ending, etc.
|
||||
- Even the most mundane or obvious actions are events: "he walked", "she sat", "they talked", "I thought", "we waited"
|
||||
## Requirements
|
||||
- **Be extremely thorough** - extract EVERY event mentioned, no matter how small or obvious
|
||||
- **Timestamped first" - every time stamp, or date should have atleast one event
|
||||
- **Verbs/actions = one event** - After you are done with timestamped events -- every verb that is an action should have a corresponding event.
|
||||
- We expect long streams of events from any piece of text, easily reaching a hundred events
|
||||
- Granularity and richness of the stream is key to our success and is of utmost importance
|
||||
- Not all events will have timestamps, add timestamps only to known events
|
||||
- For events that were instantaneous, just attach the time_from or time_to property don't create both
|
||||
- **Do not skip any events** - if you're unsure whether something is an event, extract it anyway
|
||||
- **Quantity over filtering** - it's better to extract too many events than to miss any
|
||||
- **Descriptions** - Always include the event description together with entities (Who did what, what happened? What is the event?). If you can include the corresponding part from the text.
|
||||
## Output Format
|
||||
Your reply should be a JSON: list of dictionaries with the following structure:
|
||||
```python
|
||||
class Event(BaseModel):
|
||||
name: str [concise]
|
||||
description: Optional[str] = None
|
||||
time_from: Optional[Timestamp] = None
|
||||
time_to: Optional[Timestamp] = None
|
||||
location: Optional[str] = None
|
||||
```
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
from .knowledge_graph.extract_content_graph import extract_content_graph
|
||||
from .knowledge_graph.extract_event_graph import extract_event_graph
|
||||
from .extract_categories import extract_categories
|
||||
from .extract_summary import extract_summary, extract_code_summary
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
from .extract_content_graph import extract_content_graph
|
||||
from .extract_event_graph import extract_event_graph
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
import os
|
||||
from pydantic import BaseModel
|
||||
from typing import Type
|
||||
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||
|
||||
from cognee.infrastructure.llm.config import (
|
||||
get_llm_config,
|
||||
)
|
||||
|
||||
async def extract_event_graph(
|
||||
content: str, response_model: Type[BaseModel], system_prompt: str = None
|
||||
):
|
||||
"""Extract event graph from content using LLM."""
|
||||
|
||||
llm_config = get_llm_config()
|
||||
|
||||
prompt_path = llm_config.graph_prompt_path
|
||||
|
||||
# Check if the prompt path is an absolute path or just a filename
|
||||
if os.path.isabs(prompt_path):
|
||||
# directory containing the file
|
||||
base_directory = os.path.dirname(prompt_path)
|
||||
# just the filename itself
|
||||
prompt_path = os.path.basename(prompt_path)
|
||||
else:
|
||||
base_directory = None
|
||||
|
||||
system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
|
||||
|
||||
content_graph = await LLMGateway.acreate_structured_output(
|
||||
content, system_prompt, response_model
|
||||
)
|
||||
|
||||
return content_graph
|
||||
Loading…
Add table
Reference in a new issue