Docstring tasks. (#878)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
ec68e99438
commit
bb68d6a0df
18 changed files with 610 additions and 37 deletions
|
|
@ -10,14 +10,23 @@ def chunk_by_paragraph(
|
||||||
batch_paragraphs: bool = True,
|
batch_paragraphs: bool = True,
|
||||||
) -> Iterator[Dict[str, Any]]:
|
) -> Iterator[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Chunks text by paragraph while preserving exact text reconstruction capability.
|
Chunk the input text by paragraph while enabling exact text reconstruction.
|
||||||
When chunks are joined with empty string "", they reproduce the original text exactly.
|
|
||||||
|
|
||||||
Notes:
|
This function divides the given text data into smaller chunks based on the specified
|
||||||
- Tokenization is handled using our tokenization adapters, ensuring compatibility with the vector engine's embedding model.
|
maximum chunk size. It ensures that when the generated chunks are concatenated, they
|
||||||
- If `batch_paragraphs` is False, each paragraph will be yielded as a separate chunk.
|
reproduce the original text accurately. The tokenization process is handled by adapters
|
||||||
- Handles cases where paragraphs exceed the specified token or word limits by splitting them as needed.
|
compatible with the vector engine's embedding model, and the function can operate in
|
||||||
- Remaining text at the end of the input will be yielded as a final chunk.
|
either batch mode or paragraph mode, based on the `batch_paragraphs` flag.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data (str): The input text to be chunked.
|
||||||
|
- max_chunk_size: The maximum allowed size for each chunk, in terms of tokens or
|
||||||
|
words.
|
||||||
|
- batch_paragraphs (bool): Flag indicating whether to yield each paragraph as a
|
||||||
|
separate chunk. If set to False, individual paragraphs are yielded as they are
|
||||||
|
processed. (default True)
|
||||||
"""
|
"""
|
||||||
current_chunk = ""
|
current_chunk = ""
|
||||||
chunk_index = 0
|
chunk_index = 0
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,23 @@ from cognee.infrastructure.databases.vector.embeddings import get_embedding_engi
|
||||||
|
|
||||||
|
|
||||||
def get_word_size(word: str) -> int:
|
def get_word_size(word: str) -> int:
|
||||||
|
"""
|
||||||
|
Calculate the size of a given word in terms of tokens.
|
||||||
|
|
||||||
|
If an embedding engine's tokenizer is available, count the tokens for the provided word.
|
||||||
|
If the tokenizer is not available, assume the word counts as one token.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- word (str): The word for which the token size is to be calculated.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- int: The number of tokens representing the word, typically an integer, depending
|
||||||
|
on the tokenizer's output.
|
||||||
|
"""
|
||||||
embedding_engine = get_embedding_engine()
|
embedding_engine = get_embedding_engine()
|
||||||
if embedding_engine.tokenizer:
|
if embedding_engine.tokenizer:
|
||||||
return embedding_engine.tokenizer.count_tokens(word)
|
return embedding_engine.tokenizer.count_tokens(word)
|
||||||
|
|
@ -16,12 +33,22 @@ def chunk_by_sentence(
|
||||||
data: str, maximum_size: Optional[int] = None
|
data: str, maximum_size: Optional[int] = None
|
||||||
) -> Iterator[Tuple[UUID, str, int, Optional[str]]]:
|
) -> Iterator[Tuple[UUID, str, int, Optional[str]]]:
|
||||||
"""
|
"""
|
||||||
Splits the input text into sentences based on word-level processing, with optional sentence length constraints.
|
Splits text into sentences while preserving word and paragraph boundaries.
|
||||||
|
|
||||||
Notes:
|
This function processes the input string, dividing it into sentences based on word-level
|
||||||
- Relies on the `chunk_by_word` function for word-level tokenization and classification.
|
tokenization. Each sentence is identified with a unique UUID, and it handles scenarios
|
||||||
- Ensures sentences within paragraphs are uniquely identifiable using UUIDs.
|
where the text may end mid-sentence by tagging it with a specific type. If a maximum
|
||||||
- Handles cases where the text ends mid-sentence by appending a special "sentence_cut" type.
|
sentence length is specified, the function ensures that sentences do not exceed this
|
||||||
|
length, raising a ValueError if an individual word surpasses it. The function utilizes
|
||||||
|
an external word processing function `chunk_by_word` to determine the structure of the
|
||||||
|
text.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data (str): The input text to be split into sentences.
|
||||||
|
- maximum_size (Optional[int]): An optional limit on the maximum size of sentences
|
||||||
|
generated. (default None)
|
||||||
"""
|
"""
|
||||||
sentence = ""
|
sentence = ""
|
||||||
paragraph_id = uuid4()
|
paragraph_id = uuid4()
|
||||||
|
|
|
||||||
|
|
@ -8,15 +8,23 @@ PARAGRAPH_ENDINGS = r"[\n\r]"
|
||||||
|
|
||||||
def is_real_paragraph_end(last_char: str, current_pos: int, text: str) -> bool:
|
def is_real_paragraph_end(last_char: str, current_pos: int, text: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Determines if the current position represents a real paragraph ending.
|
Determine if the current position represents a valid paragraph end.
|
||||||
|
|
||||||
Args:
|
The function checks if the last character indicates a possible sentence ending, then
|
||||||
last_char: The last processed character
|
verifies if the subsequent characters lead to a valid paragraph end based on specific
|
||||||
current_pos: Current position in the text
|
conditions.
|
||||||
text: The input text
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- last_char (str): The last processed character
|
||||||
|
- current_pos (int): Current position in the text
|
||||||
|
- text (str): The input text
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if this is a real paragraph end, False otherwise
|
--------
|
||||||
|
|
||||||
|
- bool: True if this is a real paragraph end, False otherwise
|
||||||
"""
|
"""
|
||||||
if re.match(SENTENCE_ENDINGS, last_char):
|
if re.match(SENTENCE_ENDINGS, last_char):
|
||||||
return True
|
return True
|
||||||
|
|
@ -38,9 +46,16 @@ def is_real_paragraph_end(last_char: str, current_pos: int, text: str) -> bool:
|
||||||
|
|
||||||
def chunk_by_word(data: str) -> Iterator[Tuple[str, str]]:
|
def chunk_by_word(data: str) -> Iterator[Tuple[str, str]]:
|
||||||
"""
|
"""
|
||||||
Chunks text into words and endings while preserving whitespace.
|
Chunk text into words and sentence endings, preserving whitespace.
|
||||||
Whitespace is included with the preceding word.
|
|
||||||
Outputs can be joined with "" to recreate the original input.
|
Whitespace is included with the preceding word. Outputs can be joined with "" to
|
||||||
|
recreate the original input.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data (str): The input string of text to be chunked into words and sentence
|
||||||
|
endings.
|
||||||
"""
|
"""
|
||||||
current_chunk = ""
|
current_chunk = ""
|
||||||
i = 0
|
i = 0
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,14 @@ from cognee.tasks.repo_processor.enrich_dependency_graph import enrich_dependenc
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
"""
|
||||||
|
Execute the main logic of the dependency graph processor.
|
||||||
|
|
||||||
|
This function sets up argument parsing to retrieve the repository path, checks the
|
||||||
|
existence of the specified path, and processes the repository to produce a dependency
|
||||||
|
graph. If the repository path does not exist, it logs an error message and terminates
|
||||||
|
without further execution.
|
||||||
|
"""
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("repo_path", help="Path to the repository")
|
parser.add_argument("repo_path", help="Path to the repository")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,14 @@ from cognee.tasks.repo_processor.get_repo_file_dependencies import get_repo_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
"""
|
||||||
|
Parse the command line arguments and print the repository file dependencies.
|
||||||
|
|
||||||
|
This function sets up an argument parser to retrieve the path of a repository. It checks
|
||||||
|
if the provided path exists and if it doesn’t, it prints an error message and exits. If
|
||||||
|
the path is valid, it calls an asynchronous function to get the dependencies and prints
|
||||||
|
the nodes and their relations in the dependency graph.
|
||||||
|
"""
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("repo_path", help="Path to the repository")
|
parser.add_argument("repo_path", help="Path to the repository")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,21 @@ from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class NoRelevantDataError(CogneeApiError):
|
class NoRelevantDataError(CogneeApiError):
|
||||||
|
"""
|
||||||
|
Represents an error when no relevant data is found during a search. This class is a
|
||||||
|
subclass of CogneeApiError.
|
||||||
|
|
||||||
|
Public methods:
|
||||||
|
|
||||||
|
- __init__
|
||||||
|
|
||||||
|
Instance variables:
|
||||||
|
|
||||||
|
- message
|
||||||
|
- name
|
||||||
|
- status_code
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Search did not find any data.",
|
message: str = "Search did not find any data.",
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,21 @@ EXTENSION_TO_DOCUMENT_CLASS = {
|
||||||
|
|
||||||
|
|
||||||
def update_node_set(document):
|
def update_node_set(document):
|
||||||
"""Extracts node_set from document's external_metadata."""
|
"""
|
||||||
|
Extracts node_set from document's external_metadata.
|
||||||
|
|
||||||
|
Parses the external_metadata of the given document and updates the document's
|
||||||
|
belongs_to_set attribute with NodeSet objects generated from the node_set found in the
|
||||||
|
external_metadata. If the external_metadata is not valid JSON, is not a dictionary, does
|
||||||
|
not contain the 'node_set' key, or if node_set is not a list, the function has no effect
|
||||||
|
and will return early.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- document: The document object which contains external_metadata from which the
|
||||||
|
node_set will be extracted.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
external_metadata = json.loads(document.external_metadata)
|
external_metadata = json.loads(document.external_metadata)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
|
|
@ -76,11 +90,26 @@ def update_node_set(document):
|
||||||
|
|
||||||
async def classify_documents(data_documents: list[Data]) -> list[Document]:
|
async def classify_documents(data_documents: list[Data]) -> list[Document]:
|
||||||
"""
|
"""
|
||||||
Classifies a list of data items into specific document types based on file extensions.
|
Classifies a list of data items into specific document types based on their file
|
||||||
|
extensions.
|
||||||
|
|
||||||
Notes:
|
This function processes each item in the provided list of data documents, retrieves
|
||||||
- The function relies on `get_metadata` to retrieve metadata information for each data item.
|
relevant metadata, and creates instances of document classes mapped to their extensions.
|
||||||
- Ensure the `Data` objects and their attributes (e.g., `extension`, `id`) are valid before calling this function.
|
It ensures that the data items are valid before performing the classification and
|
||||||
|
invokes `update_node_set` to extract and set relevant node information from the
|
||||||
|
document's external metadata.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data_documents (list[Data]): A list of Data objects representing the documents to
|
||||||
|
be classified.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- list[Document]: A list of Document objects created based on the classified data
|
||||||
|
documents.
|
||||||
"""
|
"""
|
||||||
documents = []
|
documents = []
|
||||||
for data_item in data_documents:
|
for data_item in data_documents:
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,25 @@ logger = get_logger("task:infer_data_ontology")
|
||||||
|
|
||||||
|
|
||||||
async def extract_ontology(content: str, response_model: Type[BaseModel]):
|
async def extract_ontology(content: str, response_model: Type[BaseModel]):
|
||||||
|
"""
|
||||||
|
Extracts structured ontology from the provided content using a pre-defined LLM client.
|
||||||
|
|
||||||
|
This asynchronous function retrieves a system prompt from a file and utilizes an LLM
|
||||||
|
client to create a structured output based on the input content and specified response
|
||||||
|
model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- content (str): The content from which to extract the ontology.
|
||||||
|
- response_model (Type[BaseModel]): The model that defines the structure of the
|
||||||
|
output ontology.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
The structured ontology extracted from the content.
|
||||||
|
"""
|
||||||
llm_client = get_llm_client()
|
llm_client = get_llm_client()
|
||||||
|
|
||||||
system_prompt = read_query_prompt("extract_ontology.txt")
|
system_prompt = read_query_prompt("extract_ontology.txt")
|
||||||
|
|
@ -43,10 +62,38 @@ async def extract_ontology(content: str, response_model: Type[BaseModel]):
|
||||||
|
|
||||||
|
|
||||||
class OntologyEngine:
|
class OntologyEngine:
|
||||||
|
"""
|
||||||
|
Manage ontology data and operations for graph structures, providing methods for data
|
||||||
|
loading, flattening models, and adding ontological relationships to a graph database.
|
||||||
|
|
||||||
|
Public methods:
|
||||||
|
|
||||||
|
- flatten_model
|
||||||
|
- recursive_flatten
|
||||||
|
- load_data
|
||||||
|
- add_graph_ontology
|
||||||
|
"""
|
||||||
|
|
||||||
async def flatten_model(
|
async def flatten_model(
|
||||||
self, model: NodeModel, parent_id: Optional[str] = None
|
self, model: NodeModel, parent_id: Optional[str] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Flatten the model to a dictionary."""
|
"""
|
||||||
|
Flatten the model to a dictionary including optional parent ID and relationship details
|
||||||
|
if available.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- model (NodeModel): The NodeModel instance to flatten.
|
||||||
|
- parent_id (Optional[str]): An optional ID of the parent node for hierarchical
|
||||||
|
purposes. (default None)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- Dict[str, Any]: A dictionary representation of the model with flattened
|
||||||
|
attributes.
|
||||||
|
"""
|
||||||
result = model.dict()
|
result = model.dict()
|
||||||
result["parent_id"] = parent_id
|
result["parent_id"] = parent_id
|
||||||
if model.default_relationship:
|
if model.default_relationship:
|
||||||
|
|
@ -62,7 +109,23 @@ class OntologyEngine:
|
||||||
async def recursive_flatten(
|
async def recursive_flatten(
|
||||||
self, items: Union[List[Dict[str, Any]], Dict[str, Any]], parent_id: Optional[str] = None
|
self, items: Union[List[Dict[str, Any]], Dict[str, Any]], parent_id: Optional[str] = None
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Recursively flatten the items."""
|
"""
|
||||||
|
Recursively flatten a hierarchical structure of models into a flat list of dictionaries.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- items (Union[List[Dict[str, Any]], Dict[str, Any]]): A list or dictionary
|
||||||
|
containing models to flatten.
|
||||||
|
- parent_id (Optional[str]): An optional ID of the parent node to maintain hierarchy
|
||||||
|
during flattening. (default None)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- List[Dict[str, Any]]: A flat list of dictionaries representing the hierarchical
|
||||||
|
model structure.
|
||||||
|
"""
|
||||||
flat_list = []
|
flat_list = []
|
||||||
|
|
||||||
if isinstance(items, list):
|
if isinstance(items, list):
|
||||||
|
|
@ -76,7 +139,20 @@ class OntologyEngine:
|
||||||
return flat_list
|
return flat_list
|
||||||
|
|
||||||
async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
||||||
"""Load data from a JSON or CSV file."""
|
"""
|
||||||
|
Load data from a specified JSON or CSV file and return it in a structured format.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- file_path (str): The path to the file to load data from.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a
|
||||||
|
list of dictionaries or a single dictionary depending on content type.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if file_path.endswith(".json"):
|
if file_path.endswith(".json"):
|
||||||
async with aiofiles.open(file_path, mode="r") as f:
|
async with aiofiles.open(file_path, mode="r") as f:
|
||||||
|
|
@ -96,7 +172,18 @@ class OntologyEngine:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def add_graph_ontology(self, file_path: str = None, documents: list = None):
|
async def add_graph_ontology(self, file_path: str = None, documents: list = None):
|
||||||
"""Add graph ontology from a JSON or CSV file or infer from documents content."""
|
"""
|
||||||
|
Add graph ontology from a JSON or CSV file, or infer relationships from provided
|
||||||
|
document content. Raise exceptions for invalid file types or missing entities.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- file_path (str): Optional path to a file containing data to be loaded. (default
|
||||||
|
None)
|
||||||
|
- documents (list): Optional list of document objects for content extraction if no
|
||||||
|
file path is provided. (default None)
|
||||||
|
"""
|
||||||
if file_path is None:
|
if file_path is None:
|
||||||
initial_chunks_and_ids = []
|
initial_chunks_and_ids = []
|
||||||
|
|
||||||
|
|
@ -202,6 +289,17 @@ class OntologyEngine:
|
||||||
|
|
||||||
|
|
||||||
async def infer_data_ontology(documents, ontology_model=KnowledgeGraph, root_node_id=None):
|
async def infer_data_ontology(documents, ontology_model=KnowledgeGraph, root_node_id=None):
|
||||||
|
"""
|
||||||
|
Infer data ontology from provided documents and optionally add it to a graph.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- documents: The documents from which to infer the ontology.
|
||||||
|
- ontology_model: The ontology model to use for the inference, defaults to
|
||||||
|
KnowledgeGraph. (default KnowledgeGraph)
|
||||||
|
- root_node_id: An optional root node identifier for the ontology. (default None)
|
||||||
|
"""
|
||||||
if ontology_model == KnowledgeGraph:
|
if ontology_model == KnowledgeGraph:
|
||||||
ontology_engine = OntologyEngine()
|
ontology_engine = OntologyEngine()
|
||||||
root_node_id = await ontology_engine.add_graph_ontology(documents=documents)
|
root_node_id = await ontology_engine.add_graph_ontology(documents=documents)
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,40 @@ from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
class RelationshipModel(BaseModel):
|
class RelationshipModel(BaseModel):
|
||||||
|
"""
|
||||||
|
Represents a relationship between two entities in a model.
|
||||||
|
|
||||||
|
This class holds the type of the relationship and the identifiers for the source and
|
||||||
|
target entities. It includes the following public instance variables:
|
||||||
|
|
||||||
|
- type: A string indicating the type of relationship.
|
||||||
|
- source: A string representing the source entity of the relationship.
|
||||||
|
- target: A string representing the target entity of the relationship.
|
||||||
|
"""
|
||||||
|
|
||||||
type: str
|
type: str
|
||||||
source: str
|
source: str
|
||||||
target: str
|
target: str
|
||||||
|
|
||||||
|
|
||||||
class NodeModel(BaseModel):
|
class NodeModel(BaseModel):
|
||||||
|
"""
|
||||||
|
Represents a node in a hierarchical model structure with relationships to other nodes.
|
||||||
|
|
||||||
|
Public methods:
|
||||||
|
|
||||||
|
- __init__(self, node_id: str, name: str, default_relationship:
|
||||||
|
Optional[RelationshipModel] = None, children: List[Union[Dict[str, Any], NodeModel]] =
|
||||||
|
Field(default_factory=list))
|
||||||
|
|
||||||
|
Instance variables:
|
||||||
|
|
||||||
|
- node_id: Unique identifier for the node.
|
||||||
|
- name: Name of the node.
|
||||||
|
- default_relationship: Default relationship associated with the node, if any.
|
||||||
|
- children: List of child nodes or dictionaries representing children for this node.
|
||||||
|
"""
|
||||||
|
|
||||||
node_id: str
|
node_id: str
|
||||||
name: str
|
name: str
|
||||||
default_relationship: Optional[RelationshipModel] = None
|
default_relationship: Optional[RelationshipModel] = None
|
||||||
|
|
@ -19,12 +47,28 @@ NodeModel.model_rebuild()
|
||||||
|
|
||||||
|
|
||||||
class OntologyNode(BaseModel):
|
class OntologyNode(BaseModel):
|
||||||
|
"""
|
||||||
|
Represents a node in an ontology with a unique identifier, name, and description.
|
||||||
|
"""
|
||||||
|
|
||||||
id: str = Field(..., description="Unique identifier made from node name.")
|
id: str = Field(..., description="Unique identifier made from node name.")
|
||||||
name: str
|
name: str
|
||||||
description: str
|
description: str
|
||||||
|
|
||||||
|
|
||||||
class OntologyEdge(BaseModel):
|
class OntologyEdge(BaseModel):
|
||||||
|
"""
|
||||||
|
Represent an edge in an ontology, connecting a source and target with a specific
|
||||||
|
relationship type.
|
||||||
|
|
||||||
|
The class includes the following instance variables:
|
||||||
|
- id: A unique identifier for the edge.
|
||||||
|
- source_id: The identifier of the source node.
|
||||||
|
- target_id: The identifier of the target node.
|
||||||
|
- relationship_type: The type of relationship represented by this edge, defining how the
|
||||||
|
source and target are related.
|
||||||
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
source_id: str
|
source_id: str
|
||||||
target_id: str
|
target_id: str
|
||||||
|
|
@ -32,5 +76,14 @@ class OntologyEdge(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class GraphOntology(BaseModel):
|
class GraphOntology(BaseModel):
|
||||||
|
"""
|
||||||
|
Represents a graph-based structure of ontology consisting of nodes and edges.
|
||||||
|
|
||||||
|
The GraphOntology class contains a collection of OntologyNode instances representing the
|
||||||
|
nodes of the graph and OntologyEdge instances representing the relationships between
|
||||||
|
them. Public methods include the management of nodes and edges as well as any relevant
|
||||||
|
graph operations. Instance variables include a list of nodes and a list of edges.
|
||||||
|
"""
|
||||||
|
|
||||||
nodes: list[OntologyNode]
|
nodes: list[OntologyNode]
|
||||||
edges: list[OntologyEdge]
|
edges: list[OntologyEdge]
|
||||||
|
|
|
||||||
|
|
@ -10,11 +10,19 @@ from cognee.infrastructure.databases.relational import get_relational_config
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_dlt_destination() -> Union[type[dlt.destinations.sqlalchemy], None]:
|
def get_dlt_destination() -> Union[type[dlt.destinations.sqlalchemy], None]:
|
||||||
"""
|
"""
|
||||||
Handles propagation of the cognee database configuration to the dlt library
|
Handle the propagation of the cognee database configuration to the dlt library.
|
||||||
|
|
||||||
|
This function determines the appropriate sqlalchemy destination based on the database
|
||||||
|
provider specified in the relational configuration. It constructs the destination
|
||||||
|
credentials for either sqlite or postgres databases accordingly. If the database
|
||||||
|
provider is neither sqlite nor postgres, it returns None.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
sqlachemy: sqlachemy destination used by the dlt library
|
--------
|
||||||
|
|
||||||
|
- Union[type[dlt.destinations.sqlalchemy], None]: An instance of sqlalchemy
|
||||||
|
destination used by the dlt library, or None if the database provider is
|
||||||
|
unsupported.
|
||||||
"""
|
"""
|
||||||
relational_config = get_relational_config()
|
relational_config = get_relational_config()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,26 @@ from typing import Union
|
||||||
|
|
||||||
|
|
||||||
def get_data_from_llama_index(data_point: Union[Document, ImageDocument], dataset_name: str) -> str:
|
def get_data_from_llama_index(data_point: Union[Document, ImageDocument], dataset_name: str) -> str:
|
||||||
|
"""
|
||||||
|
Retrieve the file path based on the data point type.
|
||||||
|
|
||||||
|
Ensure the data point is an instance of either Document or ImageDocument. If the data
|
||||||
|
point has a metadata or image path file path, return it; otherwise, save the data
|
||||||
|
point's text to a file and return the newly created file path.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data_point (Union[Document, ImageDocument]): An instance of Document or
|
||||||
|
ImageDocument to extract data from.
|
||||||
|
- dataset_name (str): The name of the dataset associated with the data point.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- str: The file path as a string where the data is stored or the existing path from
|
||||||
|
the data point.
|
||||||
|
"""
|
||||||
# Specific type checking is used to ensure it's not a child class from Document
|
# Specific type checking is used to ensure it's not a child class from Document
|
||||||
if isinstance(data_point, Document) and type(data_point) is Document:
|
if isinstance(data_point, Document) and type(data_point) is Document:
|
||||||
file_path = data_point.metadata.get("file_path")
|
file_path = data_point.metadata.get("file_path")
|
||||||
|
|
|
||||||
|
|
@ -19,10 +19,34 @@ logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
class FileParser:
|
class FileParser:
|
||||||
|
"""
|
||||||
|
Handles the parsing of files into source code and an abstract syntax tree
|
||||||
|
representation. Public methods include:
|
||||||
|
|
||||||
|
- parse_file: Parses a file and returns its source code and syntax tree representation.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.parsed_files = {}
|
self.parsed_files = {}
|
||||||
|
|
||||||
async def parse_file(self, file_path: str) -> tuple[str, Tree]:
|
async def parse_file(self, file_path: str) -> tuple[str, Tree]:
|
||||||
|
"""
|
||||||
|
Parse a file and return its source code along with its syntax tree representation.
|
||||||
|
|
||||||
|
If the file has already been parsed, retrieve the result from memory instead of reading
|
||||||
|
the file again.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- file_path (str): The path of the file to parse.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- tuple[str, Tree]: A tuple containing the source code of the file and its
|
||||||
|
corresponding syntax tree representation.
|
||||||
|
"""
|
||||||
PY_LANGUAGE = Language(tspython.language())
|
PY_LANGUAGE = Language(tspython.language())
|
||||||
source_code_parser = Parser(PY_LANGUAGE)
|
source_code_parser = Parser(PY_LANGUAGE)
|
||||||
|
|
||||||
|
|
@ -35,6 +59,24 @@ class FileParser:
|
||||||
|
|
||||||
|
|
||||||
async def get_source_code(file_path: str):
|
async def get_source_code(file_path: str):
|
||||||
|
"""
|
||||||
|
Read source code from a file asynchronously.
|
||||||
|
|
||||||
|
This function attempts to open a file specified by the given file path, read its
|
||||||
|
contents, and return the source code. In case of any errors during the file reading
|
||||||
|
process, it logs an error message and returns None.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- file_path (str): The path to the file from which to read the source code.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
Returns the contents of the file as a string if successful, or None if an error
|
||||||
|
occurs.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
||||||
source_code = await f.read()
|
source_code = await f.read()
|
||||||
|
|
@ -45,7 +87,22 @@ async def get_source_code(file_path: str):
|
||||||
|
|
||||||
|
|
||||||
def resolve_module_path(module_name):
|
def resolve_module_path(module_name):
|
||||||
"""Find the file path of a module."""
|
"""
|
||||||
|
Find the file path of a module.
|
||||||
|
|
||||||
|
Return the file path of the specified module if found, or return None if the module does
|
||||||
|
not exist or cannot be located.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- module_name: The name of the module whose file path is to be resolved.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
The file path of the module as a string or None if the module is not found.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
spec = importlib.util.find_spec(module_name)
|
spec = importlib.util.find_spec(module_name)
|
||||||
if spec and spec.origin:
|
if spec and spec.origin:
|
||||||
|
|
@ -58,7 +115,23 @@ def resolve_module_path(module_name):
|
||||||
def find_function_location(
|
def find_function_location(
|
||||||
module_path: str, function_name: str, parser: FileParser
|
module_path: str, function_name: str, parser: FileParser
|
||||||
) -> Optional[tuple[str, str]]:
|
) -> Optional[tuple[str, str]]:
|
||||||
"""Find the function definition in the module."""
|
"""
|
||||||
|
Find the location of a function definition in a specified module.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- module_path (str): The path to the module where the function is defined.
|
||||||
|
- function_name (str): The name of the function whose location is to be found.
|
||||||
|
- parser (FileParser): An instance of FileParser used to parse the module's source
|
||||||
|
code.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- Optional[tuple[str, str]]: Returns a tuple containing the module path and the
|
||||||
|
start point of the function if found; otherwise, returns None.
|
||||||
|
"""
|
||||||
if not module_path or not os.path.exists(module_path):
|
if not module_path or not os.path.exists(module_path):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -78,6 +151,24 @@ def find_function_location(
|
||||||
async def get_local_script_dependencies(
|
async def get_local_script_dependencies(
|
||||||
repo_path: str, script_path: str, detailed_extraction: bool = False
|
repo_path: str, script_path: str, detailed_extraction: bool = False
|
||||||
) -> CodeFile:
|
) -> CodeFile:
|
||||||
|
"""
|
||||||
|
Retrieve local script dependencies and create a CodeFile object.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- repo_path (str): The path to the repository that contains the script.
|
||||||
|
- script_path (str): The path of the script for which dependencies are being
|
||||||
|
extracted.
|
||||||
|
- detailed_extraction (bool): A flag indicating whether to perform a detailed
|
||||||
|
extraction of code components.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- CodeFile: Returns a CodeFile object containing information about the script,
|
||||||
|
including its dependencies and definitions.
|
||||||
|
"""
|
||||||
code_file_parser = FileParser()
|
code_file_parser = FileParser()
|
||||||
source_code, source_code_tree = await code_file_parser.parse_file(script_path)
|
source_code, source_code_tree = await code_file_parser.parse_file(script_path)
|
||||||
|
|
||||||
|
|
@ -113,6 +204,24 @@ async def get_local_script_dependencies(
|
||||||
|
|
||||||
|
|
||||||
def find_node(nodes: list[Node], condition: callable) -> Node:
|
def find_node(nodes: list[Node], condition: callable) -> Node:
|
||||||
|
"""
|
||||||
|
Find and return the first node that satisfies the given condition.
|
||||||
|
|
||||||
|
Iterate through the provided list of nodes and return the first node for which the
|
||||||
|
condition callable returns True. If no such node is found, return None.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- nodes (list[Node]): A list of Node objects to search through.
|
||||||
|
- condition (callable): A callable that takes a Node and returns a boolean
|
||||||
|
indicating if the node meets specified criteria.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- Node: The first Node that matches the condition, or None if no such node exists.
|
||||||
|
"""
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
if condition(node):
|
if condition(node):
|
||||||
return node
|
return node
|
||||||
|
|
@ -123,6 +232,30 @@ def find_node(nodes: list[Node], condition: callable) -> Node:
|
||||||
async def extract_code_parts(
|
async def extract_code_parts(
|
||||||
tree_root: Node, script_path: str, existing_nodes: list[DataPoint] = {}
|
tree_root: Node, script_path: str, existing_nodes: list[DataPoint] = {}
|
||||||
) -> AsyncGenerator[DataPoint, None]:
|
) -> AsyncGenerator[DataPoint, None]:
|
||||||
|
"""
|
||||||
|
Extract code parts from a given AST node tree asynchronously.
|
||||||
|
|
||||||
|
Iteratively yields DataPoint nodes representing import statements, function definitions,
|
||||||
|
and class definitions found in the children of the specified tree root. The function
|
||||||
|
checks
|
||||||
|
if nodes are already present in the existing_nodes dictionary to prevent duplicates.
|
||||||
|
This function has to be used in an asynchronous context, and it requires a valid
|
||||||
|
tree_root
|
||||||
|
and proper initialization of existing_nodes.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- tree_root (Node): The root node of the AST tree containing code parts to extract.
|
||||||
|
- script_path (str): The file path of the script from which the AST was generated.
|
||||||
|
- existing_nodes (list[DataPoint]): A dictionary that holds already extracted
|
||||||
|
DataPoint nodes to avoid duplicates. (default {})
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
Yields DataPoint nodes representing imported modules, functions, and classes.
|
||||||
|
"""
|
||||||
for child_node in tree_root.children:
|
for child_node in tree_root.children:
|
||||||
if child_node.type == "import_statement" or child_node.type == "import_from_statement":
|
if child_node.type == "import_statement" or child_node.type == "import_from_statement":
|
||||||
parts = child_node.text.decode("utf-8").split()
|
parts = child_node.text.decode("utf-8").split()
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,24 @@ import os
|
||||||
|
|
||||||
|
|
||||||
async def get_non_py_files(repo_path):
|
async def get_non_py_files(repo_path):
|
||||||
"""Get files that are not .py files and their contents"""
|
"""
|
||||||
|
Get files that are not .py files and their contents.
|
||||||
|
|
||||||
|
Check if the specified repository path exists and if so, traverse the directory,
|
||||||
|
collecting the paths of files that do not have a .py extension and meet the
|
||||||
|
criteria set in the allowed and ignored patterns. Return a list of paths to
|
||||||
|
those files.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- repo_path: The file system path to the repository to scan for non-Python files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
A list of file paths that are not Python files and meet the specified criteria.
|
||||||
|
"""
|
||||||
if not os.path.exists(repo_path):
|
if not os.path.exists(repo_path):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
@ -111,6 +128,22 @@ async def get_non_py_files(repo_path):
|
||||||
}
|
}
|
||||||
|
|
||||||
def should_process(path):
|
def should_process(path):
|
||||||
|
"""
|
||||||
|
Determine if a file should be processed based on its extension and path patterns.
|
||||||
|
|
||||||
|
This function checks if the file extension is in the allowed list and ensures that none
|
||||||
|
of the ignored patterns are present in the provided file path.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- path: The file path to check for processing eligibility.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
Returns True if the file should be processed; otherwise, False.
|
||||||
|
"""
|
||||||
_, ext = os.path.splitext(path)
|
_, ext = os.path.splitext(path)
|
||||||
return ext in ALLOWED_EXTENSIONS and not any(
|
return ext in ALLOWED_EXTENSIONS and not any(
|
||||||
pattern in path for pattern in IGNORED_PATTERNS
|
pattern in path for pattern in IGNORED_PATTERNS
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,24 @@ from cognee.shared.CodeGraphEntities import CodeFile, Repository
|
||||||
|
|
||||||
|
|
||||||
async def get_source_code_files(repo_path):
|
async def get_source_code_files(repo_path):
|
||||||
"""Get .py files and their source code"""
|
"""
|
||||||
|
Retrieve Python source code files from the specified repository path.
|
||||||
|
|
||||||
|
This function scans the given repository path for files that have the .py extension
|
||||||
|
while excluding test files and files within a virtual environment. It returns a list of
|
||||||
|
absolute paths to the source code files that are not empty.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- repo_path: The file path to the repository to search for Python source files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
A list of absolute paths to .py files that contain source code, excluding empty
|
||||||
|
files, test files, and files from a virtual environment.
|
||||||
|
"""
|
||||||
if not os.path.exists(repo_path):
|
if not os.path.exists(repo_path):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
@ -40,6 +57,26 @@ async def get_source_code_files(repo_path):
|
||||||
|
|
||||||
|
|
||||||
def run_coroutine(coroutine_func, *args, **kwargs):
|
def run_coroutine(coroutine_func, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Run a coroutine function until it completes.
|
||||||
|
|
||||||
|
This function creates a new asyncio event loop, sets it as the current loop, and
|
||||||
|
executes the given coroutine function with the provided arguments. Once the coroutine
|
||||||
|
completes, the loop is closed. Intended for use in environments where an existing event
|
||||||
|
loop is not available or desirable.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- coroutine_func: The coroutine function to be run.
|
||||||
|
- *args: Positional arguments to pass to the coroutine function.
|
||||||
|
- **kwargs: Keyword arguments to pass to the coroutine function.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
The result returned by the coroutine after completion.
|
||||||
|
"""
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
result = loop.run_until_complete(coroutine_func(*args, **kwargs))
|
result = loop.run_until_complete(coroutine_func(*args, **kwargs))
|
||||||
|
|
@ -50,7 +87,21 @@ def run_coroutine(coroutine_func, *args, **kwargs):
|
||||||
async def get_repo_file_dependencies(
|
async def get_repo_file_dependencies(
|
||||||
repo_path: str, detailed_extraction: bool = False
|
repo_path: str, detailed_extraction: bool = False
|
||||||
) -> AsyncGenerator[DataPoint, None]:
|
) -> AsyncGenerator[DataPoint, None]:
|
||||||
"""Generate a dependency graph for Python files in the given repository path."""
|
"""
|
||||||
|
Generate a dependency graph for Python files in the given repository path.
|
||||||
|
|
||||||
|
Check the validity of the repository path and yield a repository object followed by the
|
||||||
|
dependencies of Python files within that repository. Raise a FileNotFoundError if the
|
||||||
|
provided path does not exist. The extraction of detailed dependencies can be controlled
|
||||||
|
via the `detailed_extraction` argument.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- repo_path (str): The file path to the repository where Python files are located.
|
||||||
|
- detailed_extraction (bool): A flag indicating whether to perform a detailed
|
||||||
|
extraction of dependencies (default is False). (default False)
|
||||||
|
"""
|
||||||
|
|
||||||
if not os.path.exists(repo_path):
|
if not os.path.exists(repo_path):
|
||||||
raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
|
raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,19 @@ from cognee.shared.data_models import SummarizedCode, SummarizedClass, Summarize
|
||||||
|
|
||||||
|
|
||||||
def get_mock_summarized_code() -> SummarizedCode:
|
def get_mock_summarized_code() -> SummarizedCode:
|
||||||
|
"""
|
||||||
|
Return a summarized representation of mock code.
|
||||||
|
|
||||||
|
This function constructs and returns a `SummarizedCode` object that includes various
|
||||||
|
components such as file name, high-level summary, key features, imports, constants,
|
||||||
|
classes, and functions, all described with placeholders for mock data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
- SummarizedCode: A `SummarizedCode` object containing mock data for file summary,
|
||||||
|
features, imports, constants, classes, and functions.
|
||||||
|
"""
|
||||||
return SummarizedCode(
|
return SummarizedCode(
|
||||||
file_name="mock_file.py",
|
file_name="mock_file.py",
|
||||||
high_level_summary="This is a mock high-level summary.",
|
high_level_summary="This is a mock high-level summary.",
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,15 @@ from cognee.shared.CodeGraphEntities import CodeFile, CodePart
|
||||||
|
|
||||||
|
|
||||||
class TextSummary(DataPoint):
|
class TextSummary(DataPoint):
|
||||||
|
"""
|
||||||
|
Represent a text summary derived from a document chunk.
|
||||||
|
|
||||||
|
This class encapsulates a text summary as well as its associated metadata. The public
|
||||||
|
instance variables include 'text' for the summary content and 'made_from' which
|
||||||
|
indicates the source document chunk. The 'metadata' instance variable contains
|
||||||
|
additional information such as indexed fields.
|
||||||
|
"""
|
||||||
|
|
||||||
text: str
|
text: str
|
||||||
made_from: DocumentChunk
|
made_from: DocumentChunk
|
||||||
|
|
||||||
|
|
@ -13,6 +22,15 @@ class TextSummary(DataPoint):
|
||||||
|
|
||||||
|
|
||||||
class CodeSummary(DataPoint):
|
class CodeSummary(DataPoint):
|
||||||
|
"""
|
||||||
|
Summarizes code and its components.
|
||||||
|
|
||||||
|
This class inherits from DataPoint and contains a text representation alongside the
|
||||||
|
summarized content, which can either be a full code file or a part of it. The metadata
|
||||||
|
dictionary defines index fields for the class's instances, particularly focusing on the
|
||||||
|
'text' attribute. Public attributes include 'text', 'summarizes', and 'metadata'.
|
||||||
|
"""
|
||||||
|
|
||||||
text: str
|
text: str
|
||||||
summarizes: Union[CodeFile, CodePart]
|
summarizes: Union[CodeFile, CodePart]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,29 @@ from .models import TextSummary
|
||||||
async def summarize_text(
|
async def summarize_text(
|
||||||
data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel] = None
|
data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel] = None
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Summarize the text contained in the provided data chunks.
|
||||||
|
|
||||||
|
If no summarization model is provided, the function retrieves the default model from the
|
||||||
|
configuration. It processes the data chunks asynchronously and returns summaries for
|
||||||
|
each chunk. If the provided list of data chunks is empty, it simply returns the list as
|
||||||
|
is.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data_chunks (list[DocumentChunk]): A list of DocumentChunk objects containing text
|
||||||
|
to be summarized.
|
||||||
|
- summarization_model (Type[BaseModel]): An optional model used for summarizing
|
||||||
|
text. If not provided, the default is fetched from the configuration. (default
|
||||||
|
None)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
|
||||||
|
A list of TextSummary objects, each containing the summary of a corresponding
|
||||||
|
DocumentChunk.
|
||||||
|
"""
|
||||||
if len(data_chunks) == 0:
|
if len(data_chunks) == 0:
|
||||||
return data_chunks
|
return data_chunks
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,18 @@ from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class GraphitiNode(DataPoint):
|
class GraphitiNode(DataPoint):
|
||||||
|
"""
|
||||||
|
Represent a node in a graph with optional content, name, and summary attributes.
|
||||||
|
|
||||||
|
This class extends DataPoint and includes a metadata dictionary that specifies the index
|
||||||
|
fields for the node's data. The public instance variables are:
|
||||||
|
|
||||||
|
- content: an optional string representing the content of the node.
|
||||||
|
- name: an optional string representing the name of the node.
|
||||||
|
- summary: an optional string providing a summary of the node.
|
||||||
|
- metadata: a dictionary outlining the fields used for indexing.
|
||||||
|
"""
|
||||||
|
|
||||||
content: Optional[str] = None
|
content: Optional[str] = None
|
||||||
name: Optional[str] = None
|
name: Optional[str] = None
|
||||||
summary: Optional[str] = None
|
summary: Optional[str] = None
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue