chore: adds docstrings
This commit is contained in:
parent
90faf22dd0
commit
1970106f1e
10 changed files with 157 additions and 14 deletions
|
|
@ -247,6 +247,25 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
async def get_temporal_tasks(
|
||||
user: User = None, chunker=TextChunker, chunk_size: int = None
|
||||
) -> list[Task]:
|
||||
"""
|
||||
Builds and returns a list of temporal processing tasks to be executed in sequence.
|
||||
|
||||
The pipeline includes:
|
||||
1. Document classification.
|
||||
2. Dataset permission checks (requires "write" access).
|
||||
3. Document chunking with a specified or default chunk size.
|
||||
4. Event and timestamp extraction from chunks.
|
||||
5. Knowledge graph extraction from events.
|
||||
6. Batched insertion of data points.
|
||||
|
||||
Args:
|
||||
user (User, optional): The user requesting task execution, used for permission checks.
|
||||
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
|
||||
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
|
||||
|
||||
Returns:
|
||||
list[Task]: A list of Task objects representing the temporal processing pipeline.
|
||||
"""
|
||||
temporal_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||
|
|
|
|||
|
|
@ -8,7 +8,20 @@ from cognee.infrastructure.llm.config import (
|
|||
|
||||
|
||||
async def extract_event_entities(content: str, response_model: Type[BaseModel]):
|
||||
"""Extract event entities from content using LLM."""
|
||||
"""
|
||||
Extracts event-related entities from the given content using an LLM with structured output.
|
||||
|
||||
This function loads an event entity extraction prompt from the LLM configuration,
|
||||
renders it into a system prompt, and queries the LLM to produce structured entities
|
||||
that conform to the specified response model.
|
||||
|
||||
Args:
|
||||
content (str): The input text from which to extract event entities.
|
||||
response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output.
|
||||
|
||||
Returns:
|
||||
BaseModel: An instance of the response_model populated with extracted event entities.
|
||||
"""
|
||||
llm_config = get_llm_config()
|
||||
|
||||
prompt_path = llm_config.event_entity_prompt_path
|
||||
|
|
|
|||
|
|
@ -8,10 +8,21 @@ from cognee.infrastructure.llm.config import (
|
|||
)
|
||||
|
||||
|
||||
async def extract_event_graph(
|
||||
content: str, response_model: Type[BaseModel], system_prompt: str = None
|
||||
):
|
||||
"""Extract event graph from content using LLM."""
|
||||
async def extract_event_graph(content: str, response_model: Type[BaseModel]):
|
||||
"""
|
||||
Extracts an event graph from the given content using an LLM with a structured output format.
|
||||
|
||||
This function loads a temporal graph extraction prompt from the LLM configuration,
|
||||
renders it as a system prompt, and queries the LLM to produce a structured event
|
||||
graph matching the specified response model.
|
||||
|
||||
Args:
|
||||
content (str): The input text from which to extract the event graph.
|
||||
response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output.
|
||||
|
||||
Returns:
|
||||
BaseModel: An instance of the response_model populated with the extracted event graph.
|
||||
"""
|
||||
|
||||
llm_config = get_llm_config()
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,22 @@ from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_ti
|
|||
|
||||
|
||||
def generate_event_datapoint(event) -> Event:
|
||||
"""Create an Event datapoint from an event model."""
|
||||
"""
|
||||
Generates an Event datapoint from a given event model, including temporal metadata if available.
|
||||
|
||||
The function maps the basic attributes (name, description, location) from the input event
|
||||
and enriches them with temporal information. If start and end times are provided, an
|
||||
Interval is created. If only one timestamp is available, it is added directly. Temporal
|
||||
information is also appended to the event description for context.
|
||||
|
||||
Args:
|
||||
event: An event model instance containing attributes such as name, description,
|
||||
location, time_from, and time_to.
|
||||
|
||||
Returns:
|
||||
Event: A structured Event object with name, description, location, and enriched
|
||||
temporal details.
|
||||
"""
|
||||
# Base event data
|
||||
event_data = {
|
||||
"name": event.name,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,21 @@ from cognee.modules.engine.utils import generate_node_id
|
|||
|
||||
|
||||
def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp:
|
||||
"""Create a Timestamp datapoint from a Timestamp model."""
|
||||
"""
|
||||
Generates a normalized Timestamp datapoint from a given Timestamp model.
|
||||
|
||||
The function converts the provided timestamp into an integer representation,
|
||||
constructs a human-readable string format, and creates a new Timestamp object
|
||||
with a unique identifier.
|
||||
|
||||
Args:
|
||||
ts (Timestamp): The input Timestamp model containing date and time components.
|
||||
|
||||
Returns:
|
||||
Timestamp: A new Timestamp object with a generated ID, integer representation,
|
||||
original components, and formatted string.
|
||||
"""
|
||||
|
||||
time_at = date_to_int(ts)
|
||||
timestamp_str = (
|
||||
f"{ts.year:04d}-{ts.month:02d}-{ts.day:02d} {ts.hour:02d}:{ts.minute:02d}:{ts.second:02d}"
|
||||
|
|
@ -23,7 +37,15 @@ def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp:
|
|||
|
||||
|
||||
def date_to_int(ts: Timestamp) -> int:
|
||||
"""Convert timestamp to integer milliseconds."""
|
||||
"""
|
||||
Converts a Timestamp model into an integer representation in milliseconds since the Unix epoch (UTC).
|
||||
|
||||
Args:
|
||||
ts (Timestamp): The input Timestamp model containing year, month, day, hour, minute, and second.
|
||||
|
||||
Returns:
|
||||
int: The UTC timestamp in milliseconds since January 1, 1970.
|
||||
"""
|
||||
dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc)
|
||||
time = int(dt.timestamp() * 1000)
|
||||
return time
|
||||
|
|
|
|||
|
|
@ -7,7 +7,23 @@ from cognee.modules.engine.utils import generate_node_id, generate_node_name
|
|||
|
||||
|
||||
def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None:
|
||||
"""Add entities to event via attributes field."""
|
||||
"""
|
||||
Adds extracted entities to an Event object by populating its attributes field.
|
||||
|
||||
For each attribute in the provided EventWithEntities, the function ensures that
|
||||
the corresponding entity type exists, creates an Entity node with metadata, and
|
||||
links it to the event via an Edge representing the relationship. Entities are
|
||||
cached by type to avoid duplication.
|
||||
|
||||
Args:
|
||||
event (Event): The target Event object to enrich with entities.
|
||||
event_with_entities (EventWithEntities): An event model containing extracted
|
||||
attributes with entity, type, and relationship metadata.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
if not event_with_entities.attributes:
|
||||
return
|
||||
|
||||
|
|
@ -41,7 +57,19 @@ def add_entities_to_event(event: Event, event_with_entities: EventWithEntities)
|
|||
|
||||
|
||||
def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType:
|
||||
"""Get existing entity type or create new one."""
|
||||
"""
|
||||
Retrieves an existing EntityType from the cache or creates a new one if it does not exist.
|
||||
|
||||
If the given entity type name is not already in the cache, a new EntityType is generated
|
||||
with a unique ID, normalized name, and description, then added to the cache.
|
||||
|
||||
Args:
|
||||
entity_types (dict): A cache mapping entity type names to EntityType objects.
|
||||
entity_type_name (str): The name of the entity type to retrieve or create.
|
||||
|
||||
Returns:
|
||||
EntityType: The existing or newly created EntityType object.
|
||||
"""
|
||||
if entity_type_name not in entity_types:
|
||||
type_id = generate_node_id(entity_type_name)
|
||||
type_name = generate_node_name(entity_type_name)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,19 @@ from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityLis
|
|||
|
||||
|
||||
async def enrich_events(events: List[Event]) -> List[EventWithEntities]:
|
||||
"""Extract entities from events and return enriched events."""
|
||||
"""
|
||||
Enriches a list of events by extracting entities using an LLM.
|
||||
|
||||
The function serializes event data into JSON, sends it to the LLM for
|
||||
entity extraction, and returns enriched events with associated entities.
|
||||
|
||||
Args:
|
||||
events (List[Event]): A list of Event objects to be enriched.
|
||||
|
||||
Returns:
|
||||
List[EventWithEntities]: A list of events augmented with extracted entities.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
# Convert events to JSON format for LLM processing
|
||||
|
|
|
|||
|
|
@ -7,7 +7,19 @@ from cognee.modules.engine.utils.generate_event_datapoint import generate_event_
|
|||
|
||||
|
||||
async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]:
|
||||
"""Extracts events and entities from a chunk of documents."""
|
||||
"""
|
||||
Extracts events and their timestamps from document chunks using an LLM.
|
||||
|
||||
Each document chunk is processed with the event graph extractor to identify events.
|
||||
The extracted events are converted into Event datapoints and appended to the
|
||||
chunk's `contains` list.
|
||||
|
||||
Args:
|
||||
data_chunks (List[DocumentChunk]): A list of document chunks containing text to process.
|
||||
|
||||
Returns:
|
||||
List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints.
|
||||
"""
|
||||
events = await asyncio.gather(
|
||||
*[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,19 @@ from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_ev
|
|||
async def extract_knowledge_graph_from_events(
|
||||
data_chunks: List[DocumentChunk],
|
||||
) -> List[DocumentChunk]:
|
||||
"""Extract events from chunks and enrich them with entities."""
|
||||
"""
|
||||
Extracts events from document chunks and enriches them with entities to form a knowledge graph.
|
||||
|
||||
The function collects all Event objects from the given document chunks,
|
||||
uses an LLM to extract and attach related entities, and updates the events
|
||||
with these enriched attributes.
|
||||
|
||||
Args:
|
||||
data_chunks (List[DocumentChunk]): A list of document chunks containing extracted events.
|
||||
|
||||
Returns:
|
||||
List[DocumentChunk]: The same list of document chunks, with their events enriched by entities.
|
||||
"""
|
||||
# Extract events from chunks
|
||||
all_events = []
|
||||
for chunk in data_chunks:
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ biography_2 = """
|
|||
- Gyldendals Endowment ( 1935 )
|
||||
- Dobloug Prize ( 1951 )
|
||||
- Mads Wiel Nygaards legat ( 1961 )
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue