""" This module contains utility functions for the cognitive architecture. """ import os import uuid import random import string import logging import graphistry from pathlib import Path from jinja2 import Environment, FileSystemLoader, select_autoescape from sqlalchemy import or_ from sqlalchemy.future import select from sqlalchemy.orm import contains_eager from sqlalchemy.ext.asyncio import AsyncSession from cognitive_architecture.database.relationaldb.models.docs import DocsModel from cognitive_architecture.database.relationaldb.models.memory import MemoryModel from cognitive_architecture.database.relationaldb.models.operation import Operation from cognitive_architecture.config import Config config = Config() config.load() class Node: def __init__(self, id, description, color): self.id = id self.description = description self.color = color class Edge: def __init__(self, source, target, label, color): self.source = source self.target = target self.label = label self.color = color def get_document_names(doc_input): """ Get a list of document names. This function takes doc_input, which can be a folder path, a single document file path, or a document name as a string. It returns a list of document names based on the doc_input. Args: doc_input (str): The doc_input can be a folder path, a single document file path, or a document name as a string. Returns: list: A list of document names. Example usage: - Folder path: get_document_names(".data") - Single document file path: get_document_names(".data/example.pdf") - Document name provided as a string: get_document_names("example.docx") """ if isinstance(doc_input, list): return doc_input if os.path.isdir(doc_input): # doc_input is a folder folder_path = doc_input document_names = [] for filename in os.listdir(folder_path): if os.path.isfile(os.path.join(folder_path, filename)): document_names.append(filename) return document_names elif os.path.isfile(doc_input): # doc_input is a single document file return [os.path.basename(doc_input)] elif isinstance(doc_input, str): # doc_input is a document name provided as a string return [doc_input] else: # doc_input is not valid return [] def format_dict(d): """ Format a dictionary as a string.""" # Initialize an empty list to store formatted items formatted_items = [] # Iterate through all key-value pairs for key, value in d.items(): # Format key-value pairs with a colon and space, and adding quotes for string values formatted_item = ( f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}" ) formatted_items.append(formatted_item) # Join all formatted items with a comma and a space formatted_string = ", ".join(formatted_items) # Add curly braces to mimic a dictionary formatted_string = f"{{{formatted_string}}}" return formatted_string def append_uuid_to_variable_names(variable_mapping): """ Append a UUID to the variable names to make them unique.""" unique_variable_mapping = {} for original_name in variable_mapping.values(): unique_name = f"{original_name}_{uuid.uuid4().hex}" unique_variable_mapping[original_name] = unique_name return unique_variable_mapping # Update the functions to use the unique variable names def create_node_variable_mapping(nodes): """ Create a mapping of node identifiers to unique variable names.""" mapping = {} for node in nodes: variable_name = f"{node['category']}{node['id']}".lower() mapping[node["id"]] = variable_name return mapping def create_edge_variable_mapping(edges): """ Create a mapping of edge identifiers to unique variable names.""" mapping = {} for edge in edges: # Construct a unique identifier for the edge variable_name = f"edge{edge['source']}to{edge['target']}".lower() mapping[(edge["source"], edge["target"])] = variable_name return mapping def generate_letter_uuid(length=8): """Generate a random string of uppercase letters with the specified length.""" letters = string.ascii_uppercase # A-Z return "".join(random.choice(letters) for _ in range(length)) async def get_vectordb_namespace(session: AsyncSession, user_id: str): """ Asynchronously retrieves the latest memory names for a given user.""" try: result = await session.execute( select(MemoryModel.memory_name) .where(MemoryModel.user_id == user_id) .order_by(MemoryModel.created_at.desc()) ) namespace = [row[0] for row in result.fetchall()] return namespace except Exception as e: logging.error( f"An error occurred while retrieving the Vectordb_namespace: {str(e)}" ) return None async def get_vectordb_document_name(session: AsyncSession, user_id: str): """ Asynchronously retrieves the latest memory names for a given user.""" try: result = await session.execute( select(DocsModel.doc_name) .where(DocsModel.user_id == user_id) .order_by(DocsModel.created_at.desc()) ) doc_names = [row[0] for row in result.fetchall()] return doc_names except Exception as e: logging.error( f"An error occurred while retrieving the Vectordb_namespace: {str(e)}" ) return None async def get_model_id_name(session: AsyncSession, id: str): """ Asynchronously retrieves the latest memory names for a given user.""" try: result = await session.execute( select(MemoryModel.memory_name) .where(MemoryModel.id == id) .order_by(MemoryModel.created_at.desc()) ) doc_names = [row[0] for row in result.fetchall()] return doc_names except Exception as e: logging.error( f"An error occurred while retrieving the Vectordb_namespace: {str(e)}" ) return None async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: str): """ Asynchronously retrieves the latest memory names and document details for a given user. This function executes a database query to fetch memory names and document details associated with operations performed by a specific user. It leverages explicit joins with the 'docs' and 'memories' tables and applies eager loading to optimize performance. Parameters: - session (AsyncSession): The database session for executing the query. - user_id (str): The unique identifier of the user. Returns: - Tuple[List[str], List[Tuple[str, str]]]: A tuple containing a list of memory names and a list of tuples with document names and their corresponding IDs. Returns None if an exception occurs. Raises: - Exception: Propagates any exceptions that occur during query execution. Example Usage: """ # try: result = await session.execute( select(Operation) .join(Operation.docs) # Explicit join with docs table .join(Operation.memories) # Explicit join with memories table .options( contains_eager(Operation.docs), # Informs ORM of the join for docs contains_eager(Operation.memories), # Informs ORM of the join for memories ) .where( (Operation.user_id == user_id) & or_( # Filter by user_id DocsModel.graph_summary == False, # Condition 1: graph_summary is False DocsModel.graph_summary == None, # Condition 3: graph_summary is None ) # Filter by user_id ) .order_by(Operation.created_at.desc()) # Order by creation date ) operations = result.unique().scalars().all() # Extract memory names and document names and IDs # memory_names = [memory.memory_name for op in operations for memory in op.memories] memory_details = [ (memory.memory_name, memory.memory_category) for op in operations for memory in op.memories ] docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs] return memory_details, docs async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str): """ Asynchronously retrieves memory names associated with a specific document ID. This function executes a database query to fetch memory names linked to a document through operations. The query is filtered based on a given document ID and retrieves only the memory names without loading the entire Operation entity. Parameters: - session (AsyncSession): The database session for executing the query. - docs_id (str): The unique identifier of the document. Returns: - List[str]: A list of memory names associated with the given document ID. Returns None if an exception occurs. Raises: - Exception: Propagates any exceptions that occur during query execution. """ try: result = await session.execute( select(MemoryModel.memory_name) .join( Operation, Operation.id == MemoryModel.operation_id ) # Join with Operation .join( DocsModel, DocsModel.operation_id == Operation.id ) # Join with DocsModel .where(DocsModel.id == docs_id) # Filtering based on the passed document ID .distinct() # To avoid duplicate memory names ) memory_names = [row[0] for row in result.fetchall()] return memory_names except Exception as e: # Handle the exception as needed print(f"An error occurred: {e}") return None async def read_query_prompt(filename: str) -> str: """Read a query prompt from a file. :param filename: The name of the file to read. :return: The content of the file as a string. """ script_directory = Path(__file__).parent # Set the base directory relative to the script's directory base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts" # Construct the full file path file_path = base_directory / filename try: return file_path.read_text() except FileNotFoundError: logging.error(f"File not found: {file_path.absolute()}") except Exception as e: logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}") return None async def print_file_content(file_path): # Create a Path object for the file path path = Path(file_path) # Check if the file exists if path.is_file(): # Open and read the file, then print its content with path.open('r') as file: print(file.read()) else: # Print an error message if the file does not exist print(f"The file '{file_path}' does not exist.") async def async_render_template(filename: str, context: dict) -> str: """Render a Jinja2 template asynchronously. :param filename: The name of the template file to render. :param context: The context to render the template with. :return: The rendered template as a string.""" # Initialize the Jinja2 environment to load templates from the filesystem script_directory = Path(__file__).parent # Set the base directory relative to the script's directory base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts" # Construct the full file path file_path = base_directory / filename env = Environment( loader=FileSystemLoader(base_directory), autoescape=select_autoescape(['html', 'xml', 'txt']) ) # Load the template by name template = env.get_template(filename) # Render the template with the provided context rendered_template = template.render(context) return rendered_template async def render_graph(graph, graph_type): # Authenticate with your Graphistry API key import networkx as nx import pandas as pd graphistry.register(api=3, username=config.graphistry_username, password=config.graphistry_password) # Convert the NetworkX graph to a Pandas DataFrame representing the edge list edges = nx.to_pandas_edgelist(graph) # Visualize the graph using Graphistry plotter = graphistry.edges(edges, 'source', 'target') # Visualize the graph (this will open a URL in your default web browser) url = plotter.plot(render=False, as_files=True) print(f"Graph is visualized at: {url}") # import networkx as nx # # Create a simple NetworkX graph # G = nx.Graph() # # # Add nodes # G.add_node(1) # G.add_node(2) # # # Add an edge between nodes # G.add_edge(1, 2) # # import asyncio # # # Define the graph type (for this example, it's just a placeholder as the function doesn't use it yet) # graph_type = "simple" # # # Call the render_graph function # asyncio.run(render_graph(G, graph_type))