From 1970106f1e7b21db97c8ba952e807b986086f56f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 29 Aug 2025 16:07:18 +0200 Subject: [PATCH] chore: adds docstrings --- cognee/api/v1/cognify/cognify.py | 19 +++++++++++ .../extraction/extract_event_entities.py | 15 ++++++++- .../knowledge_graph/extract_event_graph.py | 19 ++++++++--- .../engine/utils/generate_event_datapoint.py | 17 +++++++++- .../utils/generate_timestamp_datapoint.py | 26 +++++++++++++-- .../temporal_graph/add_entities_to_event.py | 32 +++++++++++++++++-- cognee/tasks/temporal_graph/enrich_events.py | 14 +++++++- .../extract_events_and_entities.py | 14 +++++++- .../extract_knowledge_graph_from_events.py | 14 +++++++- examples/python/temporal_example.py | 1 - 10 files changed, 157 insertions(+), 14 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 31a357afa..e4f91b44c 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -247,6 +247,25 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's async def get_temporal_tasks( user: User = None, chunker=TextChunker, chunk_size: int = None ) -> list[Task]: + """ + Builds and returns a list of temporal processing tasks to be executed in sequence. + + The pipeline includes: + 1. Document classification. + 2. Dataset permission checks (requires "write" access). + 3. Document chunking with a specified or default chunk size. + 4. Event and timestamp extraction from chunks. + 5. Knowledge graph extraction from events. + 6. Batched insertion of data points. + + Args: + user (User, optional): The user requesting task execution, used for permission checks. + chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker. + chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default. + + Returns: + list[Task]: A list of Task objects representing the temporal processing pipeline. + """ temporal_tasks = [ Task(classify_documents), Task(check_permissions_on_dataset, user=user, permissions=["write"]), diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py index ad33863b0..b1dd6910d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py @@ -8,7 +8,20 @@ from cognee.infrastructure.llm.config import ( async def extract_event_entities(content: str, response_model: Type[BaseModel]): - """Extract event entities from content using LLM.""" + """ + Extracts event-related entities from the given content using an LLM with structured output. + + This function loads an event entity extraction prompt from the LLM configuration, + renders it into a system prompt, and queries the LLM to produce structured entities + that conform to the specified response model. + + Args: + content (str): The input text from which to extract event entities. + response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output. + + Returns: + BaseModel: An instance of the response_model populated with extracted event entities. + """ llm_config = get_llm_config() prompt_path = llm_config.event_entity_prompt_path diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py index 667e2eb7d..9a40ea855 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py @@ -8,10 +8,21 @@ from cognee.infrastructure.llm.config import ( ) -async def extract_event_graph( - content: str, response_model: Type[BaseModel], system_prompt: str = None -): - """Extract event graph from content using LLM.""" +async def extract_event_graph(content: str, response_model: Type[BaseModel]): + """ + Extracts an event graph from the given content using an LLM with a structured output format. + + This function loads a temporal graph extraction prompt from the LLM configuration, + renders it as a system prompt, and queries the LLM to produce a structured event + graph matching the specified response model. + + Args: + content (str): The input text from which to extract the event graph. + response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output. + + Returns: + BaseModel: An instance of the response_model populated with the extracted event graph. + """ llm_config = get_llm_config() diff --git a/cognee/modules/engine/utils/generate_event_datapoint.py b/cognee/modules/engine/utils/generate_event_datapoint.py index cc56763ae..7768b06ac 100644 --- a/cognee/modules/engine/utils/generate_event_datapoint.py +++ b/cognee/modules/engine/utils/generate_event_datapoint.py @@ -3,7 +3,22 @@ from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_ti def generate_event_datapoint(event) -> Event: - """Create an Event datapoint from an event model.""" + """ + Generates an Event datapoint from a given event model, including temporal metadata if available. + + The function maps the basic attributes (name, description, location) from the input event + and enriches them with temporal information. If start and end times are provided, an + Interval is created. If only one timestamp is available, it is added directly. Temporal + information is also appended to the event description for context. + + Args: + event: An event model instance containing attributes such as name, description, + location, time_from, and time_to. + + Returns: + Event: A structured Event object with name, description, location, and enriched + temporal details. + """ # Base event data event_data = { "name": event.name, diff --git a/cognee/modules/engine/utils/generate_timestamp_datapoint.py b/cognee/modules/engine/utils/generate_timestamp_datapoint.py index 6f2cdf6d1..b078e161e 100644 --- a/cognee/modules/engine/utils/generate_timestamp_datapoint.py +++ b/cognee/modules/engine/utils/generate_timestamp_datapoint.py @@ -4,7 +4,21 @@ from cognee.modules.engine.utils import generate_node_id def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: - """Create a Timestamp datapoint from a Timestamp model.""" + """ + Generates a normalized Timestamp datapoint from a given Timestamp model. + + The function converts the provided timestamp into an integer representation, + constructs a human-readable string format, and creates a new Timestamp object + with a unique identifier. + + Args: + ts (Timestamp): The input Timestamp model containing date and time components. + + Returns: + Timestamp: A new Timestamp object with a generated ID, integer representation, + original components, and formatted string. + """ + time_at = date_to_int(ts) timestamp_str = ( f"{ts.year:04d}-{ts.month:02d}-{ts.day:02d} {ts.hour:02d}:{ts.minute:02d}:{ts.second:02d}" @@ -23,7 +37,15 @@ def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: def date_to_int(ts: Timestamp) -> int: - """Convert timestamp to integer milliseconds.""" + """ + Converts a Timestamp model into an integer representation in milliseconds since the Unix epoch (UTC). + + Args: + ts (Timestamp): The input Timestamp model containing year, month, day, hour, minute, and second. + + Returns: + int: The UTC timestamp in milliseconds since January 1, 1970. + """ dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc) time = int(dt.timestamp() * 1000) return time diff --git a/cognee/tasks/temporal_graph/add_entities_to_event.py b/cognee/tasks/temporal_graph/add_entities_to_event.py index 2cb4b1425..8c1146a9e 100644 --- a/cognee/tasks/temporal_graph/add_entities_to_event.py +++ b/cognee/tasks/temporal_graph/add_entities_to_event.py @@ -7,7 +7,23 @@ from cognee.modules.engine.utils import generate_node_id, generate_node_name def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None: - """Add entities to event via attributes field.""" + """ + Adds extracted entities to an Event object by populating its attributes field. + + For each attribute in the provided EventWithEntities, the function ensures that + the corresponding entity type exists, creates an Entity node with metadata, and + links it to the event via an Edge representing the relationship. Entities are + cached by type to avoid duplication. + + Args: + event (Event): The target Event object to enrich with entities. + event_with_entities (EventWithEntities): An event model containing extracted + attributes with entity, type, and relationship metadata. + + Returns: + None + """ + if not event_with_entities.attributes: return @@ -41,7 +57,19 @@ def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType: - """Get existing entity type or create new one.""" + """ + Retrieves an existing EntityType from the cache or creates a new one if it does not exist. + + If the given entity type name is not already in the cache, a new EntityType is generated + with a unique ID, normalized name, and description, then added to the cache. + + Args: + entity_types (dict): A cache mapping entity type names to EntityType objects. + entity_type_name (str): The name of the entity type to retrieve or create. + + Returns: + EntityType: The existing or newly created EntityType object. + """ if entity_type_name not in entity_types: type_id = generate_node_id(entity_type_name) type_name = generate_node_name(entity_type_name) diff --git a/cognee/tasks/temporal_graph/enrich_events.py b/cognee/tasks/temporal_graph/enrich_events.py index bedd642eb..ef93da462 100644 --- a/cognee/tasks/temporal_graph/enrich_events.py +++ b/cognee/tasks/temporal_graph/enrich_events.py @@ -6,7 +6,19 @@ from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityLis async def enrich_events(events: List[Event]) -> List[EventWithEntities]: - """Extract entities from events and return enriched events.""" + """ + Enriches a list of events by extracting entities using an LLM. + + The function serializes event data into JSON, sends it to the LLM for + entity extraction, and returns enriched events with associated entities. + + Args: + events (List[Event]): A list of Event objects to be enriched. + + Returns: + List[EventWithEntities]: A list of events augmented with extracted entities. + """ + import json # Convert events to JSON format for LLM processing diff --git a/cognee/tasks/temporal_graph/extract_events_and_entities.py b/cognee/tasks/temporal_graph/extract_events_and_entities.py index de0cdd601..8babc0ee5 100644 --- a/cognee/tasks/temporal_graph/extract_events_and_entities.py +++ b/cognee/tasks/temporal_graph/extract_events_and_entities.py @@ -7,7 +7,19 @@ from cognee.modules.engine.utils.generate_event_datapoint import generate_event_ async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: - """Extracts events and entities from a chunk of documents.""" + """ + Extracts events and their timestamps from document chunks using an LLM. + + Each document chunk is processed with the event graph extractor to identify events. + The extracted events are converted into Event datapoints and appended to the + chunk's `contains` list. + + Args: + data_chunks (List[DocumentChunk]): A list of document chunks containing text to process. + + Returns: + List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints. + """ events = await asyncio.gather( *[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks] ) diff --git a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py index 8cbcc3c22..e50fa4ae2 100644 --- a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +++ b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py @@ -8,7 +8,19 @@ from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_ev async def extract_knowledge_graph_from_events( data_chunks: List[DocumentChunk], ) -> List[DocumentChunk]: - """Extract events from chunks and enrich them with entities.""" + """ + Extracts events from document chunks and enriches them with entities to form a knowledge graph. + + The function collects all Event objects from the given document chunks, + uses an LLM to extract and attach related entities, and updates the events + with these enriched attributes. + + Args: + data_chunks (List[DocumentChunk]): A list of document chunks containing extracted events. + + Returns: + List[DocumentChunk]: The same list of document chunks, with their events enriched by entities. + """ # Extract events from chunks all_events = [] for chunk in data_chunks: diff --git a/examples/python/temporal_example.py b/examples/python/temporal_example.py index 4b54b72ed..c79e3c1db 100644 --- a/examples/python/temporal_example.py +++ b/examples/python/temporal_example.py @@ -61,7 +61,6 @@ biography_2 = """ - Gyldendals Endowment ( 1935 ) - Dobloug Prize ( 1951 ) - Mads Wiel Nygaards legat ( 1961 ) - """