From bb68d6a0dff8caedf12161dc1eca7dbe5cf8c559 Mon Sep 17 00:00:00 2001
From: Daniel Molnar <soobrosa@gmail.com>
Date: Tue, 27 May 2025 21:33:16 +0200
Subject: [PATCH] Docstring tasks. (#878)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 cognee/tasks/chunks/chunk_by_paragraph.py     |  23 ++-
 cognee/tasks/chunks/chunk_by_sentence.py      |  37 ++++-
 cognee/tasks/chunks/chunk_by_word.py          |  33 +++--
 .../code/enrich_dependency_graph_checker.py   |   8 +
 .../code/get_repo_dependency_graph_checker.py |   8 +
 .../tasks/completion/exceptions/exceptions.py |  15 ++
 cognee/tasks/documents/classify_documents.py  |  39 ++++-
 cognee/tasks/graph/infer_data_ontology.py     | 106 +++++++++++++-
 cognee/tasks/graph/models.py                  |  53 +++++++
 cognee/tasks/ingestion/get_dlt_destination.py |  12 +-
 cognee/tasks/ingestion/transform_data.py      |  20 +++
 .../repo_processor/get_local_dependencies.py  | 137 +++++++++++++++++-
 .../repo_processor/get_non_code_files.py      |  35 ++++-
 .../get_repo_file_dependencies.py             |  55 ++++++-
 cognee/tasks/summarization/mock_summary.py    |  13 ++
 cognee/tasks/summarization/models.py          |  18 +++
 cognee/tasks/summarization/summarize_text.py  |  23 +++
 .../temporal_awareness/graphiti_model.py      |  12 ++
 18 files changed, 610 insertions(+), 37 deletions(-)

diff --git a/cognee/tasks/chunks/chunk_by_paragraph.py b/cognee/tasks/chunks/chunk_by_paragraph.py
index d4b434eee..1668276c5 100644
--- a/cognee/tasks/chunks/chunk_by_paragraph.py
+++ b/cognee/tasks/chunks/chunk_by_paragraph.py
@@ -10,14 +10,23 @@ def chunk_by_paragraph(
     batch_paragraphs: bool = True,
 ) -> Iterator[Dict[str, Any]]:
     """
-    Chunks text by paragraph while preserving exact text reconstruction capability.
-    When chunks are joined with empty string "", they reproduce the original text exactly.
+    Chunk the input text by paragraph while enabling exact text reconstruction.
 
-    Notes:
-        - Tokenization is handled using our tokenization adapters, ensuring compatibility with the vector engine's embedding model.
-        - If `batch_paragraphs` is False, each paragraph will be yielded as a separate chunk.
-        - Handles cases where paragraphs exceed the specified token or word limits by splitting them as needed.
-        - Remaining text at the end of the input will be yielded as a final chunk.
+    This function divides the given text data into smaller chunks based on the specified
+    maximum chunk size. It ensures that when the generated chunks are concatenated, they
+    reproduce the original text accurately. The tokenization process is handled by adapters
+    compatible with the vector engine's embedding model, and the function can operate in
+    either batch mode or paragraph mode, based on the `batch_paragraphs` flag.
+
+    Parameters:
+    -----------
+
+        - data (str): The input text to be chunked.
+        - max_chunk_size: The maximum allowed size for each chunk, in terms of tokens or
+          words.
+        - batch_paragraphs (bool): Flag indicating whether to yield each paragraph as a
+          separate chunk. If set to False, individual paragraphs are yielded as they are
+          processed. (default True)
     """
     current_chunk = ""
     chunk_index = 0
diff --git a/cognee/tasks/chunks/chunk_by_sentence.py b/cognee/tasks/chunks/chunk_by_sentence.py
index 75ffc39e8..d37aa2a66 100644
--- a/cognee/tasks/chunks/chunk_by_sentence.py
+++ b/cognee/tasks/chunks/chunk_by_sentence.py
@@ -5,6 +5,23 @@ from cognee.infrastructure.databases.vector.embeddings import get_embedding_engi
 
 
 def get_word_size(word: str) -> int:
+    """
+    Calculate the size of a given word in terms of tokens.
+
+    If an embedding engine's tokenizer is available, count the tokens for the provided word.
+    If the tokenizer is not available, assume the word counts as one token.
+
+    Parameters:
+    -----------
+
+        - word (str): The word for which the token size is to be calculated.
+
+    Returns:
+    --------
+
+        - int: The number of tokens representing the word, typically an integer, depending
+          on the tokenizer's output.
+    """
     embedding_engine = get_embedding_engine()
     if embedding_engine.tokenizer:
         return embedding_engine.tokenizer.count_tokens(word)
@@ -16,12 +33,22 @@ def chunk_by_sentence(
     data: str, maximum_size: Optional[int] = None
 ) -> Iterator[Tuple[UUID, str, int, Optional[str]]]:
     """
-    Splits the input text into sentences based on word-level processing, with optional sentence length constraints.
+    Splits text into sentences while preserving word and paragraph boundaries.
 
-    Notes:
-        - Relies on the `chunk_by_word` function for word-level tokenization and classification.
-        - Ensures sentences within paragraphs are uniquely identifiable using UUIDs.
-        - Handles cases where the text ends mid-sentence by appending a special "sentence_cut" type.
+    This function processes the input string, dividing it into sentences based on word-level
+    tokenization. Each sentence is identified with a unique UUID, and it handles scenarios
+    where the text may end mid-sentence by tagging it with a specific type. If a maximum
+    sentence length is specified, the function ensures that sentences do not exceed this
+    length, raising a ValueError if an individual word surpasses it. The function utilizes
+    an external word processing function `chunk_by_word` to determine the structure of the
+    text.
+
+    Parameters:
+    -----------
+
+        - data (str): The input text to be split into sentences.
+        - maximum_size (Optional[int]): An optional limit on the maximum size of sentences
+          generated. (default None)
     """
     sentence = ""
     paragraph_id = uuid4()
diff --git a/cognee/tasks/chunks/chunk_by_word.py b/cognee/tasks/chunks/chunk_by_word.py
index f0782cd9c..51ffd39d3 100644
--- a/cognee/tasks/chunks/chunk_by_word.py
+++ b/cognee/tasks/chunks/chunk_by_word.py
@@ -8,15 +8,23 @@ PARAGRAPH_ENDINGS = r"[\n\r]"
 
 def is_real_paragraph_end(last_char: str, current_pos: int, text: str) -> bool:
     """
-    Determines if the current position represents a real paragraph ending.
+    Determine if the current position represents a valid paragraph end.
 
-    Args:
-        last_char: The last processed character
-        current_pos: Current position in the text
-        text: The input text
+    The function checks if the last character indicates a possible sentence ending, then
+    verifies if the subsequent characters lead to a valid paragraph end based on specific
+    conditions.
+
+    Parameters:
+    -----------
+
+        - last_char (str): The last processed character
+        - current_pos (int): Current position in the text
+        - text (str): The input text
 
     Returns:
-        bool: True if this is a real paragraph end, False otherwise
+    --------
+
+        - bool: True if this is a real paragraph end, False otherwise
     """
     if re.match(SENTENCE_ENDINGS, last_char):
         return True
@@ -38,9 +46,16 @@ def is_real_paragraph_end(last_char: str, current_pos: int, text: str) -> bool:
 
 def chunk_by_word(data: str) -> Iterator[Tuple[str, str]]:
     """
-    Chunks text into words and endings while preserving whitespace.
-    Whitespace is included with the preceding word.
-    Outputs can be joined with "" to recreate the original input.
+    Chunk text into words and sentence endings, preserving whitespace.
+
+    Whitespace is included with the preceding word. Outputs can be joined with "" to
+    recreate the original input.
+
+    Parameters:
+    -----------
+
+        - data (str): The input string of text to be chunked into words and sentence
+          endings.
     """
     current_chunk = ""
     i = 0
diff --git a/cognee/tasks/code/enrich_dependency_graph_checker.py b/cognee/tasks/code/enrich_dependency_graph_checker.py
index 7b04e0357..1b3a80210 100644
--- a/cognee/tasks/code/enrich_dependency_graph_checker.py
+++ b/cognee/tasks/code/enrich_dependency_graph_checker.py
@@ -6,6 +6,14 @@ from cognee.tasks.repo_processor.enrich_dependency_graph import enrich_dependenc
 
 
 def main():
+    """
+    Execute the main logic of the dependency graph processor.
+
+    This function sets up argument parsing to retrieve the repository path, checks the
+    existence of the specified path, and processes the repository to produce a dependency
+    graph. If the repository path does not exist, it logs an error message and terminates
+    without further execution.
+    """
     parser = argparse.ArgumentParser()
     parser.add_argument("repo_path", help="Path to the repository")
     args = parser.parse_args()
diff --git a/cognee/tasks/code/get_repo_dependency_graph_checker.py b/cognee/tasks/code/get_repo_dependency_graph_checker.py
index 3a393d3f3..0e68cf7fe 100644
--- a/cognee/tasks/code/get_repo_dependency_graph_checker.py
+++ b/cognee/tasks/code/get_repo_dependency_graph_checker.py
@@ -5,6 +5,14 @@ from cognee.tasks.repo_processor.get_repo_file_dependencies import get_repo_file
 
 
 def main():
+    """
+    Parse the command line arguments and print the repository file dependencies.
+
+    This function sets up an argument parser to retrieve the path of a repository. It checks
+    if the provided path exists and if it doesn’t, it prints an error message and exits. If
+    the path is valid, it calls an asynchronous function to get the dependencies and prints
+    the nodes and their relations in the dependency graph.
+    """
     parser = argparse.ArgumentParser()
     parser.add_argument("repo_path", help="Path to the repository")
     args = parser.parse_args()
diff --git a/cognee/tasks/completion/exceptions/exceptions.py b/cognee/tasks/completion/exceptions/exceptions.py
index aebece145..ac105a966 100644
--- a/cognee/tasks/completion/exceptions/exceptions.py
+++ b/cognee/tasks/completion/exceptions/exceptions.py
@@ -3,6 +3,21 @@ from fastapi import status
 
 
 class NoRelevantDataError(CogneeApiError):
+    """
+    Represents an error when no relevant data is found during a search. This class is a
+    subclass of CogneeApiError.
+
+    Public methods:
+
+    - __init__
+
+    Instance variables:
+
+    - message
+    - name
+    - status_code
+    """
+
     def __init__(
         self,
         message: str = "Search did not find any data.",
diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py
index 97ff3e483..673e17c75 100644
--- a/cognee/tasks/documents/classify_documents.py
+++ b/cognee/tasks/documents/classify_documents.py
@@ -52,7 +52,21 @@ EXTENSION_TO_DOCUMENT_CLASS = {
 
 
 def update_node_set(document):
-    """Extracts node_set from document's external_metadata."""
+    """
+    Extracts node_set from document's external_metadata.
+
+    Parses the external_metadata of the given document and updates the document's
+    belongs_to_set attribute with NodeSet objects generated from the node_set found in the
+    external_metadata. If the external_metadata is not valid JSON, is not a dictionary, does
+    not contain the 'node_set' key, or if node_set is not a list, the function has no effect
+    and will return early.
+
+    Parameters:
+    -----------
+
+        - document: The document object which contains external_metadata from which the
+          node_set will be extracted.
+    """
     try:
         external_metadata = json.loads(document.external_metadata)
     except json.JSONDecodeError:
@@ -76,11 +90,26 @@ def update_node_set(document):
 
 async def classify_documents(data_documents: list[Data]) -> list[Document]:
     """
-    Classifies a list of data items into specific document types based on file extensions.
+    Classifies a list of data items into specific document types based on their file
+    extensions.
 
-    Notes:
-        - The function relies on `get_metadata` to retrieve metadata information for each data item.
-        - Ensure the `Data` objects and their attributes (e.g., `extension`, `id`) are valid before calling this function.
+    This function processes each item in the provided list of data documents, retrieves
+    relevant metadata, and creates instances of document classes mapped to their extensions.
+    It ensures that the data items are valid before performing the classification and
+    invokes `update_node_set` to extract and set relevant node information from the
+    document's external metadata.
+
+    Parameters:
+    -----------
+
+        - data_documents (list[Data]): A list of Data objects representing the documents to
+          be classified.
+
+    Returns:
+    --------
+
+        - list[Document]: A list of Document objects created based on the classified data
+          documents.
     """
     documents = []
     for data_item in data_documents:
diff --git a/cognee/tasks/graph/infer_data_ontology.py b/cognee/tasks/graph/infer_data_ontology.py
index bec7d6fb3..23ede6656 100644
--- a/cognee/tasks/graph/infer_data_ontology.py
+++ b/cognee/tasks/graph/infer_data_ontology.py
@@ -33,6 +33,25 @@ logger = get_logger("task:infer_data_ontology")
 
 
 async def extract_ontology(content: str, response_model: Type[BaseModel]):
+    """
+    Extracts structured ontology from the provided content using a pre-defined LLM client.
+
+    This asynchronous function retrieves a system prompt from a file and utilizes an LLM
+    client to create a structured output based on the input content and specified response
+    model.
+
+    Parameters:
+    -----------
+
+        - content (str): The content from which to extract the ontology.
+        - response_model (Type[BaseModel]): The model that defines the structure of the
+          output ontology.
+
+    Returns:
+    --------
+
+        The structured ontology extracted from the content.
+    """
     llm_client = get_llm_client()
 
     system_prompt = read_query_prompt("extract_ontology.txt")
@@ -43,10 +62,38 @@ async def extract_ontology(content: str, response_model: Type[BaseModel]):
 
 
 class OntologyEngine:
+    """
+    Manage ontology data and operations for graph structures, providing methods for data
+    loading, flattening models, and adding ontological relationships to a graph database.
+
+    Public methods:
+
+    - flatten_model
+    - recursive_flatten
+    - load_data
+    - add_graph_ontology
+    """
+
     async def flatten_model(
         self, model: NodeModel, parent_id: Optional[str] = None
     ) -> Dict[str, Any]:
-        """Flatten the model to a dictionary."""
+        """
+        Flatten the model to a dictionary including optional parent ID and relationship details
+        if available.
+
+        Parameters:
+        -----------
+
+            - model (NodeModel): The NodeModel instance to flatten.
+            - parent_id (Optional[str]): An optional ID of the parent node for hierarchical
+              purposes. (default None)
+
+        Returns:
+        --------
+
+            - Dict[str, Any]: A dictionary representation of the model with flattened
+              attributes.
+        """
         result = model.dict()
         result["parent_id"] = parent_id
         if model.default_relationship:
@@ -62,7 +109,23 @@ class OntologyEngine:
     async def recursive_flatten(
         self, items: Union[List[Dict[str, Any]], Dict[str, Any]], parent_id: Optional[str] = None
     ) -> List[Dict[str, Any]]:
-        """Recursively flatten the items."""
+        """
+        Recursively flatten a hierarchical structure of models into a flat list of dictionaries.
+
+        Parameters:
+        -----------
+
+            - items (Union[List[Dict[str, Any]], Dict[str, Any]]): A list or dictionary
+              containing models to flatten.
+            - parent_id (Optional[str]): An optional ID of the parent node to maintain hierarchy
+              during flattening. (default None)
+
+        Returns:
+        --------
+
+            - List[Dict[str, Any]]: A flat list of dictionaries representing the hierarchical
+              model structure.
+        """
         flat_list = []
 
         if isinstance(items, list):
@@ -76,7 +139,20 @@ class OntologyEngine:
         return flat_list
 
     async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
-        """Load data from a JSON or CSV file."""
+        """
+        Load data from a specified JSON or CSV file and return it in a structured format.
+
+        Parameters:
+        -----------
+
+            - file_path (str): The path to the file to load data from.
+
+        Returns:
+        --------
+
+            - Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a
+              list of dictionaries or a single dictionary depending on content type.
+        """
         try:
             if file_path.endswith(".json"):
                 async with aiofiles.open(file_path, mode="r") as f:
@@ -96,7 +172,18 @@ class OntologyEngine:
             )
 
     async def add_graph_ontology(self, file_path: str = None, documents: list = None):
-        """Add graph ontology from a JSON or CSV file or infer from documents content."""
+        """
+        Add graph ontology from a JSON or CSV file, or infer relationships from provided
+        document content. Raise exceptions for invalid file types or missing entities.
+
+        Parameters:
+        -----------
+
+            - file_path (str): Optional path to a file containing data to be loaded. (default
+              None)
+            - documents (list): Optional list of document objects for content extraction if no
+              file path is provided. (default None)
+        """
         if file_path is None:
             initial_chunks_and_ids = []
 
@@ -202,6 +289,17 @@ class OntologyEngine:
 
 
 async def infer_data_ontology(documents, ontology_model=KnowledgeGraph, root_node_id=None):
+    """
+    Infer data ontology from provided documents and optionally add it to a graph.
+
+    Parameters:
+    -----------
+
+        - documents: The documents from which to infer the ontology.
+        - ontology_model: The ontology model to use for the inference, defaults to
+          KnowledgeGraph. (default KnowledgeGraph)
+        - root_node_id: An optional root node identifier for the ontology. (default None)
+    """
     if ontology_model == KnowledgeGraph:
         ontology_engine = OntologyEngine()
         root_node_id = await ontology_engine.add_graph_ontology(documents=documents)
diff --git a/cognee/tasks/graph/models.py b/cognee/tasks/graph/models.py
index bfffb0262..e7429824d 100644
--- a/cognee/tasks/graph/models.py
+++ b/cognee/tasks/graph/models.py
@@ -3,12 +3,40 @@ from pydantic import BaseModel, Field
 
 
 class RelationshipModel(BaseModel):
+    """
+    Represents a relationship between two entities in a model.
+
+    This class holds the type of the relationship and the identifiers for the source and
+    target entities. It includes the following public instance variables:
+
+    - type: A string indicating the type of relationship.
+    - source: A string representing the source entity of the relationship.
+    - target: A string representing the target entity of the relationship.
+    """
+
     type: str
     source: str
     target: str
 
 
 class NodeModel(BaseModel):
+    """
+    Represents a node in a hierarchical model structure with relationships to other nodes.
+
+    Public methods:
+
+    - __init__(self, node_id: str, name: str, default_relationship:
+    Optional[RelationshipModel] = None, children: List[Union[Dict[str, Any], NodeModel]] =
+    Field(default_factory=list))
+
+    Instance variables:
+
+    - node_id: Unique identifier for the node.
+    - name: Name of the node.
+    - default_relationship: Default relationship associated with the node, if any.
+    - children: List of child nodes or dictionaries representing children for this node.
+    """
+
     node_id: str
     name: str
     default_relationship: Optional[RelationshipModel] = None
@@ -19,12 +47,28 @@ NodeModel.model_rebuild()
 
 
 class OntologyNode(BaseModel):
+    """
+    Represents a node in an ontology with a unique identifier, name, and description.
+    """
+
     id: str = Field(..., description="Unique identifier made from node name.")
     name: str
     description: str
 
 
 class OntologyEdge(BaseModel):
+    """
+    Represent an edge in an ontology, connecting a source and target with a specific
+    relationship type.
+
+    The class includes the following instance variables:
+    - id: A unique identifier for the edge.
+    - source_id: The identifier of the source node.
+    - target_id: The identifier of the target node.
+    - relationship_type: The type of relationship represented by this edge, defining how the
+    source and target are related.
+    """
+
     id: str
     source_id: str
     target_id: str
@@ -32,5 +76,14 @@ class OntologyEdge(BaseModel):
 
 
 class GraphOntology(BaseModel):
+    """
+    Represents a graph-based structure of ontology consisting of nodes and edges.
+
+    The GraphOntology class contains a collection of OntologyNode instances representing the
+    nodes of the graph and OntologyEdge instances representing the relationships between
+    them. Public methods include the management of nodes and edges as well as any relevant
+    graph operations. Instance variables include a list of nodes and a list of edges.
+    """
+
     nodes: list[OntologyNode]
     edges: list[OntologyEdge]
diff --git a/cognee/tasks/ingestion/get_dlt_destination.py b/cognee/tasks/ingestion/get_dlt_destination.py
index 2de0d7f0d..131834f0b 100644
--- a/cognee/tasks/ingestion/get_dlt_destination.py
+++ b/cognee/tasks/ingestion/get_dlt_destination.py
@@ -10,11 +10,19 @@ from cognee.infrastructure.databases.relational import get_relational_config
 @lru_cache
 def get_dlt_destination() -> Union[type[dlt.destinations.sqlalchemy], None]:
     """
-    Handles propagation of the cognee database configuration to the dlt library
+    Handle the propagation of the cognee database configuration to the dlt library.
+
+    This function determines the appropriate sqlalchemy destination based on the database
+    provider specified in the relational configuration. It constructs the destination
+    credentials for either sqlite or postgres databases accordingly. If the database
+    provider is neither sqlite nor postgres, it returns None.
 
     Returns:
-        sqlachemy: sqlachemy destination used by the dlt library
+    --------
 
+        - Union[type[dlt.destinations.sqlalchemy], None]: An instance of sqlalchemy
+          destination used by the dlt library, or None if the database provider is
+          unsupported.
     """
     relational_config = get_relational_config()
 
diff --git a/cognee/tasks/ingestion/transform_data.py b/cognee/tasks/ingestion/transform_data.py
index cc75c7a65..cd35364d7 100644
--- a/cognee/tasks/ingestion/transform_data.py
+++ b/cognee/tasks/ingestion/transform_data.py
@@ -5,6 +5,26 @@ from typing import Union
 
 
 def get_data_from_llama_index(data_point: Union[Document, ImageDocument], dataset_name: str) -> str:
+    """
+    Retrieve the file path based on the data point type.
+
+    Ensure the data point is an instance of either Document or ImageDocument. If the data
+    point has a metadata or image path file path, return it; otherwise, save the data
+    point's text to a file and return the newly created file path.
+
+    Parameters:
+    -----------
+
+        - data_point (Union[Document, ImageDocument]): An instance of Document or
+          ImageDocument to extract data from.
+        - dataset_name (str): The name of the dataset associated with the data point.
+
+    Returns:
+    --------
+
+        - str: The file path as a string where the data is stored or the existing path from
+          the data point.
+    """
     # Specific type checking is used to ensure it's not a child class from Document
     if isinstance(data_point, Document) and type(data_point) is Document:
         file_path = data_point.metadata.get("file_path")
diff --git a/cognee/tasks/repo_processor/get_local_dependencies.py b/cognee/tasks/repo_processor/get_local_dependencies.py
index a406d6023..ed8e4e14b 100644
--- a/cognee/tasks/repo_processor/get_local_dependencies.py
+++ b/cognee/tasks/repo_processor/get_local_dependencies.py
@@ -19,10 +19,34 @@ logger = get_logger()
 
 
 class FileParser:
+    """
+    Handles the parsing of files into source code and an abstract syntax tree
+    representation. Public methods include:
+
+    - parse_file: Parses a file and returns its source code and syntax tree representation.
+    """
+
     def __init__(self):
         self.parsed_files = {}
 
     async def parse_file(self, file_path: str) -> tuple[str, Tree]:
+        """
+        Parse a file and return its source code along with its syntax tree representation.
+
+        If the file has already been parsed, retrieve the result from memory instead of reading
+        the file again.
+
+        Parameters:
+        -----------
+
+            - file_path (str): The path of the file to parse.
+
+        Returns:
+        --------
+
+            - tuple[str, Tree]: A tuple containing the source code of the file and its
+              corresponding syntax tree representation.
+        """
         PY_LANGUAGE = Language(tspython.language())
         source_code_parser = Parser(PY_LANGUAGE)
 
@@ -35,6 +59,24 @@ class FileParser:
 
 
 async def get_source_code(file_path: str):
+    """
+    Read source code from a file asynchronously.
+
+    This function attempts to open a file specified by the given file path, read its
+    contents, and return the source code. In case of any errors during the file reading
+    process, it logs an error message and returns None.
+
+    Parameters:
+    -----------
+
+        - file_path (str): The path to the file from which to read the source code.
+
+    Returns:
+    --------
+
+        Returns the contents of the file as a string if successful, or None if an error
+        occurs.
+    """
     try:
         async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
             source_code = await f.read()
@@ -45,7 +87,22 @@ async def get_source_code(file_path: str):
 
 
 def resolve_module_path(module_name):
-    """Find the file path of a module."""
+    """
+    Find the file path of a module.
+
+    Return the file path of the specified module if found, or return None if the module does
+    not exist or cannot be located.
+
+    Parameters:
+    -----------
+
+        - module_name: The name of the module whose file path is to be resolved.
+
+    Returns:
+    --------
+
+        The file path of the module as a string or None if the module is not found.
+    """
     try:
         spec = importlib.util.find_spec(module_name)
         if spec and spec.origin:
@@ -58,7 +115,23 @@ def resolve_module_path(module_name):
 def find_function_location(
     module_path: str, function_name: str, parser: FileParser
 ) -> Optional[tuple[str, str]]:
-    """Find the function definition in the module."""
+    """
+    Find the location of a function definition in a specified module.
+
+    Parameters:
+    -----------
+
+        - module_path (str): The path to the module where the function is defined.
+        - function_name (str): The name of the function whose location is to be found.
+        - parser (FileParser): An instance of FileParser used to parse the module's source
+          code.
+
+    Returns:
+    --------
+
+        - Optional[tuple[str, str]]: Returns a tuple containing the module path and the
+          start point of the function if found; otherwise, returns None.
+    """
     if not module_path or not os.path.exists(module_path):
         return None
 
@@ -78,6 +151,24 @@ def find_function_location(
 async def get_local_script_dependencies(
     repo_path: str, script_path: str, detailed_extraction: bool = False
 ) -> CodeFile:
+    """
+    Retrieve local script dependencies and create a CodeFile object.
+
+    Parameters:
+    -----------
+
+        - repo_path (str): The path to the repository that contains the script.
+        - script_path (str): The path of the script for which dependencies are being
+          extracted.
+        - detailed_extraction (bool): A flag indicating whether to perform a detailed
+          extraction of code components.
+
+    Returns:
+    --------
+
+        - CodeFile: Returns a CodeFile object containing information about the script,
+          including its dependencies and definitions.
+    """
     code_file_parser = FileParser()
     source_code, source_code_tree = await code_file_parser.parse_file(script_path)
 
@@ -113,6 +204,24 @@ async def get_local_script_dependencies(
 
 
 def find_node(nodes: list[Node], condition: callable) -> Node:
+    """
+    Find and return the first node that satisfies the given condition.
+
+    Iterate through the provided list of nodes and return the first node for which the
+    condition callable returns True. If no such node is found, return None.
+
+    Parameters:
+    -----------
+
+        - nodes (list[Node]): A list of Node objects to search through.
+        - condition (callable): A callable that takes a Node and returns a boolean
+          indicating if the node meets specified criteria.
+
+    Returns:
+    --------
+
+        - Node: The first Node that matches the condition, or None if no such node exists.
+    """
     for node in nodes:
         if condition(node):
             return node
@@ -123,6 +232,30 @@ def find_node(nodes: list[Node], condition: callable) -> Node:
 async def extract_code_parts(
     tree_root: Node, script_path: str, existing_nodes: list[DataPoint] = {}
 ) -> AsyncGenerator[DataPoint, None]:
+    """
+    Extract code parts from a given AST node tree asynchronously.
+
+    Iteratively yields DataPoint nodes representing import statements, function definitions,
+    and class definitions found in the children of the specified tree root. The function
+    checks
+    if nodes are already present in the existing_nodes dictionary to prevent duplicates.
+    This function has to be used in an asynchronous context, and it requires a valid
+    tree_root
+    and proper initialization of existing_nodes.
+
+    Parameters:
+    -----------
+
+        - tree_root (Node): The root node of the AST tree containing code parts to extract.
+        - script_path (str): The file path of the script from which the AST was generated.
+        - existing_nodes (list[DataPoint]): A dictionary that holds already extracted
+          DataPoint nodes to avoid duplicates. (default {})
+
+    Returns:
+    --------
+
+        Yields DataPoint nodes representing imported modules, functions, and classes.
+    """
     for child_node in tree_root.children:
         if child_node.type == "import_statement" or child_node.type == "import_from_statement":
             parts = child_node.text.decode("utf-8").split()
diff --git a/cognee/tasks/repo_processor/get_non_code_files.py b/cognee/tasks/repo_processor/get_non_code_files.py
index 5117f261d..b9ab1d4c6 100644
--- a/cognee/tasks/repo_processor/get_non_code_files.py
+++ b/cognee/tasks/repo_processor/get_non_code_files.py
@@ -2,7 +2,24 @@ import os
 
 
 async def get_non_py_files(repo_path):
-    """Get files that are not .py files and their contents"""
+    """
+    Get files that are not .py files and their contents.
+
+    Check if the specified repository path exists and if so, traverse the directory,
+    collecting the paths of files that do not have a .py extension and meet the
+    criteria set in the allowed and ignored patterns. Return a list of paths to
+    those files.
+
+    Parameters:
+    -----------
+
+        - repo_path: The file system path to the repository to scan for non-Python files.
+
+    Returns:
+    --------
+
+        A list of file paths that are not Python files and meet the specified criteria.
+    """
     if not os.path.exists(repo_path):
         return {}
 
@@ -111,6 +128,22 @@ async def get_non_py_files(repo_path):
     }
 
     def should_process(path):
+        """
+        Determine if a file should be processed based on its extension and path patterns.
+
+        This function checks if the file extension is in the allowed list and ensures that none
+        of the ignored patterns are present in the provided file path.
+
+        Parameters:
+        -----------
+
+            - path: The file path to check for processing eligibility.
+
+        Returns:
+        --------
+
+            Returns True if the file should be processed; otherwise, False.
+        """
         _, ext = os.path.splitext(path)
         return ext in ALLOWED_EXTENSIONS and not any(
             pattern in path for pattern in IGNORED_PATTERNS
diff --git a/cognee/tasks/repo_processor/get_repo_file_dependencies.py b/cognee/tasks/repo_processor/get_repo_file_dependencies.py
index 92e2d7910..232850936 100644
--- a/cognee/tasks/repo_processor/get_repo_file_dependencies.py
+++ b/cognee/tasks/repo_processor/get_repo_file_dependencies.py
@@ -11,7 +11,24 @@ from cognee.shared.CodeGraphEntities import CodeFile, Repository
 
 
 async def get_source_code_files(repo_path):
-    """Get .py files and their source code"""
+    """
+    Retrieve Python source code files from the specified repository path.
+
+    This function scans the given repository path for files that have the .py extension
+    while excluding test files and files within a virtual environment. It returns a list of
+    absolute paths to the source code files that are not empty.
+
+    Parameters:
+    -----------
+
+        - repo_path: The file path to the repository to search for Python source files.
+
+    Returns:
+    --------
+
+        A list of absolute paths to .py files that contain source code, excluding empty
+        files, test files, and files from a virtual environment.
+    """
     if not os.path.exists(repo_path):
         return {}
 
@@ -40,6 +57,26 @@ async def get_source_code_files(repo_path):
 
 
 def run_coroutine(coroutine_func, *args, **kwargs):
+    """
+    Run a coroutine function until it completes.
+
+    This function creates a new asyncio event loop, sets it as the current loop, and
+    executes the given coroutine function with the provided arguments. Once the coroutine
+    completes, the loop is closed. Intended for use in environments where an existing event
+    loop is not available or desirable.
+
+    Parameters:
+    -----------
+
+        - coroutine_func: The coroutine function to be run.
+        - *args: Positional arguments to pass to the coroutine function.
+        - **kwargs: Keyword arguments to pass to the coroutine function.
+
+    Returns:
+    --------
+
+        The result returned by the coroutine after completion.
+    """
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     result = loop.run_until_complete(coroutine_func(*args, **kwargs))
@@ -50,7 +87,21 @@ def run_coroutine(coroutine_func, *args, **kwargs):
 async def get_repo_file_dependencies(
     repo_path: str, detailed_extraction: bool = False
 ) -> AsyncGenerator[DataPoint, None]:
-    """Generate a dependency graph for Python files in the given repository path."""
+    """
+    Generate a dependency graph for Python files in the given repository path.
+
+    Check the validity of the repository path and yield a repository object followed by the
+    dependencies of Python files within that repository. Raise a FileNotFoundError if the
+    provided path does not exist. The extraction of detailed dependencies can be controlled
+    via the `detailed_extraction` argument.
+
+    Parameters:
+    -----------
+
+        - repo_path (str): The file path to the repository where Python files are located.
+        - detailed_extraction (bool): A flag indicating whether to perform a detailed
+          extraction of dependencies (default is False). (default False)
+    """
 
     if not os.path.exists(repo_path):
         raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
diff --git a/cognee/tasks/summarization/mock_summary.py b/cognee/tasks/summarization/mock_summary.py
index f60ce2d82..6d288b927 100644
--- a/cognee/tasks/summarization/mock_summary.py
+++ b/cognee/tasks/summarization/mock_summary.py
@@ -2,6 +2,19 @@ from cognee.shared.data_models import SummarizedCode, SummarizedClass, Summarize
 
 
 def get_mock_summarized_code() -> SummarizedCode:
+    """
+    Return a summarized representation of mock code.
+
+    This function constructs and returns a `SummarizedCode` object that includes various
+    components such as file name, high-level summary, key features, imports, constants,
+    classes, and functions, all described with placeholders for mock data.
+
+    Returns:
+    --------
+
+        - SummarizedCode: A `SummarizedCode` object containing mock data for file summary,
+          features, imports, constants, classes, and functions.
+    """
     return SummarizedCode(
         file_name="mock_file.py",
         high_level_summary="This is a mock high-level summary.",
diff --git a/cognee/tasks/summarization/models.py b/cognee/tasks/summarization/models.py
index aac1d4178..75ed82d50 100644
--- a/cognee/tasks/summarization/models.py
+++ b/cognee/tasks/summarization/models.py
@@ -6,6 +6,15 @@ from cognee.shared.CodeGraphEntities import CodeFile, CodePart
 
 
 class TextSummary(DataPoint):
+    """
+    Represent a text summary derived from a document chunk.
+
+    This class encapsulates a text summary as well as its associated metadata. The public
+    instance variables include 'text' for the summary content and 'made_from' which
+    indicates the source document chunk. The 'metadata' instance variable contains
+    additional information such as indexed fields.
+    """
+
     text: str
     made_from: DocumentChunk
 
@@ -13,6 +22,15 @@ class TextSummary(DataPoint):
 
 
 class CodeSummary(DataPoint):
+    """
+    Summarizes code and its components.
+
+    This class inherits from DataPoint and contains a text representation alongside the
+    summarized content, which can either be a full code file or a part of it. The metadata
+    dictionary defines index fields for the class's instances, particularly focusing on the
+    'text' attribute. Public attributes include 'text', 'summarizes', and 'metadata'.
+    """
+
     text: str
     summarizes: Union[CodeFile, CodePart]
 
diff --git a/cognee/tasks/summarization/summarize_text.py b/cognee/tasks/summarization/summarize_text.py
index cca41ae88..9a8b7cbd7 100644
--- a/cognee/tasks/summarization/summarize_text.py
+++ b/cognee/tasks/summarization/summarize_text.py
@@ -11,6 +11,29 @@ from .models import TextSummary
 async def summarize_text(
     data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel] = None
 ):
+    """
+    Summarize the text contained in the provided data chunks.
+
+    If no summarization model is provided, the function retrieves the default model from the
+    configuration. It processes the data chunks asynchronously and returns summaries for
+    each chunk. If the provided list of data chunks is empty, it simply returns the list as
+    is.
+
+    Parameters:
+    -----------
+
+        - data_chunks (list[DocumentChunk]): A list of DocumentChunk objects containing text
+          to be summarized.
+        - summarization_model (Type[BaseModel]): An optional model used for summarizing
+          text. If not provided, the default is fetched from the configuration. (default
+          None)
+
+    Returns:
+    --------
+
+        A list of TextSummary objects, each containing the summary of a corresponding
+        DocumentChunk.
+    """
     if len(data_chunks) == 0:
         return data_chunks
 
diff --git a/cognee/tasks/temporal_awareness/graphiti_model.py b/cognee/tasks/temporal_awareness/graphiti_model.py
index 89aef540b..c2a2eeb69 100644
--- a/cognee/tasks/temporal_awareness/graphiti_model.py
+++ b/cognee/tasks/temporal_awareness/graphiti_model.py
@@ -3,6 +3,18 @@ from typing import Optional
 
 
 class GraphitiNode(DataPoint):
+    """
+    Represent a node in a graph with optional content, name, and summary attributes.
+
+    This class extends DataPoint and includes a metadata dictionary that specifies the index
+    fields for the node's data. The public instance variables are:
+
+    - content: an optional string representing the content of the node.
+    - name: an optional string representing the name of the node.
+    - summary: an optional string providing a summary of the node.
+    - metadata: a dictionary outlining the fields used for indexing.
+    """
+
     content: Optional[str] = None
     name: Optional[str] = None
     summary: Optional[str] = None