chore: adds docstrings

This commit is contained in:
hajdul88 2025-08-29 16:07:18 +02:00
parent 90faf22dd0
commit 1970106f1e
10 changed files with 157 additions and 14 deletions

View file

@ -247,6 +247,25 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
async def get_temporal_tasks(
user: User = None, chunker=TextChunker, chunk_size: int = None
) -> list[Task]:
"""
Builds and returns a list of temporal processing tasks to be executed in sequence.
The pipeline includes:
1. Document classification.
2. Dataset permission checks (requires "write" access).
3. Document chunking with a specified or default chunk size.
4. Event and timestamp extraction from chunks.
5. Knowledge graph extraction from events.
6. Batched insertion of data points.
Args:
user (User, optional): The user requesting task execution, used for permission checks.
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
Returns:
list[Task]: A list of Task objects representing the temporal processing pipeline.
"""
temporal_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),

View file

@ -8,7 +8,20 @@ from cognee.infrastructure.llm.config import (
async def extract_event_entities(content: str, response_model: Type[BaseModel]):
"""Extract event entities from content using LLM."""
"""
Extracts event-related entities from the given content using an LLM with structured output.
This function loads an event entity extraction prompt from the LLM configuration,
renders it into a system prompt, and queries the LLM to produce structured entities
that conform to the specified response model.
Args:
content (str): The input text from which to extract event entities.
response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output.
Returns:
BaseModel: An instance of the response_model populated with extracted event entities.
"""
llm_config = get_llm_config()
prompt_path = llm_config.event_entity_prompt_path

View file

@ -8,10 +8,21 @@ from cognee.infrastructure.llm.config import (
)
async def extract_event_graph(
content: str, response_model: Type[BaseModel], system_prompt: str = None
):
"""Extract event graph from content using LLM."""
async def extract_event_graph(content: str, response_model: Type[BaseModel]):
"""
Extracts an event graph from the given content using an LLM with a structured output format.
This function loads a temporal graph extraction prompt from the LLM configuration,
renders it as a system prompt, and queries the LLM to produce a structured event
graph matching the specified response model.
Args:
content (str): The input text from which to extract the event graph.
response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output.
Returns:
BaseModel: An instance of the response_model populated with the extracted event graph.
"""
llm_config = get_llm_config()

View file

@ -3,7 +3,22 @@ from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_ti
def generate_event_datapoint(event) -> Event:
"""Create an Event datapoint from an event model."""
"""
Generates an Event datapoint from a given event model, including temporal metadata if available.
The function maps the basic attributes (name, description, location) from the input event
and enriches them with temporal information. If start and end times are provided, an
Interval is created. If only one timestamp is available, it is added directly. Temporal
information is also appended to the event description for context.
Args:
event: An event model instance containing attributes such as name, description,
location, time_from, and time_to.
Returns:
Event: A structured Event object with name, description, location, and enriched
temporal details.
"""
# Base event data
event_data = {
"name": event.name,

View file

@ -4,7 +4,21 @@ from cognee.modules.engine.utils import generate_node_id
def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp:
"""Create a Timestamp datapoint from a Timestamp model."""
"""
Generates a normalized Timestamp datapoint from a given Timestamp model.
The function converts the provided timestamp into an integer representation,
constructs a human-readable string format, and creates a new Timestamp object
with a unique identifier.
Args:
ts (Timestamp): The input Timestamp model containing date and time components.
Returns:
Timestamp: A new Timestamp object with a generated ID, integer representation,
original components, and formatted string.
"""
time_at = date_to_int(ts)
timestamp_str = (
f"{ts.year:04d}-{ts.month:02d}-{ts.day:02d} {ts.hour:02d}:{ts.minute:02d}:{ts.second:02d}"
@ -23,7 +37,15 @@ def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp:
def date_to_int(ts: Timestamp) -> int:
"""Convert timestamp to integer milliseconds."""
"""
Converts a Timestamp model into an integer representation in milliseconds since the Unix epoch (UTC).
Args:
ts (Timestamp): The input Timestamp model containing year, month, day, hour, minute, and second.
Returns:
int: The UTC timestamp in milliseconds since January 1, 1970.
"""
dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc)
time = int(dt.timestamp() * 1000)
return time

View file

@ -7,7 +7,23 @@ from cognee.modules.engine.utils import generate_node_id, generate_node_name
def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None:
"""Add entities to event via attributes field."""
"""
Adds extracted entities to an Event object by populating its attributes field.
For each attribute in the provided EventWithEntities, the function ensures that
the corresponding entity type exists, creates an Entity node with metadata, and
links it to the event via an Edge representing the relationship. Entities are
cached by type to avoid duplication.
Args:
event (Event): The target Event object to enrich with entities.
event_with_entities (EventWithEntities): An event model containing extracted
attributes with entity, type, and relationship metadata.
Returns:
None
"""
if not event_with_entities.attributes:
return
@ -41,7 +57,19 @@ def add_entities_to_event(event: Event, event_with_entities: EventWithEntities)
def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType:
"""Get existing entity type or create new one."""
"""
Retrieves an existing EntityType from the cache or creates a new one if it does not exist.
If the given entity type name is not already in the cache, a new EntityType is generated
with a unique ID, normalized name, and description, then added to the cache.
Args:
entity_types (dict): A cache mapping entity type names to EntityType objects.
entity_type_name (str): The name of the entity type to retrieve or create.
Returns:
EntityType: The existing or newly created EntityType object.
"""
if entity_type_name not in entity_types:
type_id = generate_node_id(entity_type_name)
type_name = generate_node_name(entity_type_name)

View file

@ -6,7 +6,19 @@ from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityLis
async def enrich_events(events: List[Event]) -> List[EventWithEntities]:
"""Extract entities from events and return enriched events."""
"""
Enriches a list of events by extracting entities using an LLM.
The function serializes event data into JSON, sends it to the LLM for
entity extraction, and returns enriched events with associated entities.
Args:
events (List[Event]): A list of Event objects to be enriched.
Returns:
List[EventWithEntities]: A list of events augmented with extracted entities.
"""
import json
# Convert events to JSON format for LLM processing

View file

@ -7,7 +7,19 @@ from cognee.modules.engine.utils.generate_event_datapoint import generate_event_
async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]:
"""Extracts events and entities from a chunk of documents."""
"""
Extracts events and their timestamps from document chunks using an LLM.
Each document chunk is processed with the event graph extractor to identify events.
The extracted events are converted into Event datapoints and appended to the
chunk's `contains` list.
Args:
data_chunks (List[DocumentChunk]): A list of document chunks containing text to process.
Returns:
List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints.
"""
events = await asyncio.gather(
*[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks]
)

View file

@ -8,7 +8,19 @@ from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_ev
async def extract_knowledge_graph_from_events(
data_chunks: List[DocumentChunk],
) -> List[DocumentChunk]:
"""Extract events from chunks and enrich them with entities."""
"""
Extracts events from document chunks and enriches them with entities to form a knowledge graph.
The function collects all Event objects from the given document chunks,
uses an LLM to extract and attach related entities, and updates the events
with these enriched attributes.
Args:
data_chunks (List[DocumentChunk]): A list of document chunks containing extracted events.
Returns:
List[DocumentChunk]: The same list of document chunks, with their events enriched by entities.
"""
# Extract events from chunks
all_events = []
for chunk in data_chunks:

View file

@ -61,7 +61,6 @@ biography_2 = """
- Gyldendals Endowment ( 1935 )
- Dobloug Prize ( 1951 )
- Mads Wiel Nygaards legat ( 1961 )
"""