Added graph intefrace, added neo4j + networkx structure and updates to the notebook

2024-03-06 21:42:50 +01:00 · 2024-03-06 21:42:50 +01:00 · 2433e4ed93
commit 2433e4ed93
parent 5426f68d2c
21 changed files with 618 additions and 21 deletions
--- a/Demo_graph.ipynb
+++ b/Demo_graph.ipynb
@ -4022,7 +4022,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "\n"
+    "\n",
+    "#pre filtering\n",
+    "# each semantic layer -> make categories, dimensions, on semantic layer given on the LLM\n",
+    "# weights need to be used topk and cutoff\n",
+    "# entry through entities\n",
+    "# combine unstructured and structured\n",
+    "# address / entrypoint node/ "
   ]
  },
  {
--- a/cognitive_architecture/database/graphdb/graph.py
+++ b/cognitive_architecture/database/graphdb/graph.py
@ -1,4 +1,3 @@
-import logging
 import os

 from neo4j import AsyncSession
@ -6,32 +5,23 @@ from neo4j.exceptions import Neo4jError

 print(os.getcwd())

-import networkx as nx
-
-from langchain.graphs import Neo4jGraph
 import os

-import openai
 import instructor
 from openai import OpenAI
-from openai import AsyncOpenAI
-import pickle

 from abc import ABC, abstractmethod

 # Adds response_model to ChatCompletion
 # Allows the return of Pydantic model rather than raw JSON

-from pydantic import BaseModel, Field
-from typing import List, Dict, Optional
 from ...utils import (
    format_dict,
    append_uuid_to_variable_names,
    create_edge_variable_mapping,
    create_node_variable_mapping,
-    get_unsumarized_vector_db_namespace,
 )
-from ...llm.queries import generate_summary, generate_graph
+from cognitive_architecture.infrastructure.llm.openai.queries import generate_summary, generate_graph
 import logging
 from neo4j import AsyncGraphDatabase
 from contextlib import asynccontextmanager
@ -45,11 +35,8 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 from ...config import Config

 from ...shared.data_models import (
-    Node,
-    Edge,
    KnowledgeGraph,
    GraphQLQuery,
-    MemorySummary,
 )

 config = Config()
--- a/cognitive_architecture/infrastructure/init.py
+++ b/cognitive_architecture/infrastructure/init.py
--- a/cognitive_architecture/infrastructure/llm/init.py
+++ b/cognitive_architecture/infrastructure/llm/init.py
--- a/cognitive_architecture/infrastructure/llm/get_llm_client.py
+++ b/cognitive_architecture/infrastructure/llm/get_llm_client.py
@ -0,0 +1,10 @@
+"""Get the LLM client."""
+from cognitive_architecture.config import Config
+from .openai.adapter import OpenAIAdapter
+
+config = Config()
+config.load()
+
+def get_llm_client():
+    """Get the LLM client."""
+    return OpenAIAdapter(config.openai_key, config.model)
--- a/cognitive_architecture/infrastructure/llm/llm_interface.py
+++ b/cognitive_architecture/infrastructure/llm/llm_interface.py
@ -0,0 +1,35 @@
+""" LLM Interface """
+
+from typing import List, Type, Protocol
+from abc import abstractmethod
+from pydantic import BaseModel
+class LLMInterface(Protocol):
+    """ LLM Interface """
+
+    @abstractmethod
+    async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"):
+        """To get text embeddings, import/call this function"""
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"):
+        """To get text embeddings, import/call this function"""
+        raise NotImplementedError
+
+    @abstractmethod
+    async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]):
+        """To get multiple text embeddings in parallel, import/call this function"""
+        raise NotImplementedError
+
+    # """ Get completions """
+    # async def acompletions_with_backoff(self, **kwargs):
+    #     raise NotImplementedError
+    #
+    """ Structured output """
+    @abstractmethod
+    async def acreate_structured_output(self,
+                                        text_input: str,
+                                        system_prompt_path: str,
+                                        response_model: Type[BaseModel]) -> BaseModel:
+        """To get structured output, import/call this function"""
+        raise NotImplementedError
--- a/cognitive_architecture/infrastructure/llm/openai/init.py
+++ b/cognitive_architecture/infrastructure/llm/openai/init.py
--- a/cognitive_architecture/infrastructure/llm/openai/adapter.py
+++ b/cognitive_architecture/infrastructure/llm/openai/adapter.py
@ -0,0 +1,197 @@
+"""Adapter for OpenAI's GPT-3, GPT=4 API."""
+import os
+import time
+import random
+import asyncio
+from typing import List, Type
+import openai
+import instructor
+from openai import OpenAI,AsyncOpenAI
+from pydantic import BaseModel
+from cognitive_architecture.config import Config
+from cognitive_architecture.utils import read_query_prompt
+from ..llm_interface import LLMInterface
+
+#
+# config = Config()
+# config.load()
+
+# aclient = instructor.apatch(AsyncOpenAI())
+# OPENAI_API_KEY = config.openai_key
+
+class OpenAIAdapter(LLMInterface):
+    """Adapter for OpenAI's GPT-3, GPT=4 API"""
+    def __init__(self, api_key: str, model:str):
+        openai.api_key = api_key
+        self.aclient = instructor.apatch(AsyncOpenAI())
+        self.model = model
+        # OPENAI_API_KEY = config.openai_key
+
+    @staticmethod
+    def retry_with_exponential_backoff(
+            func,
+            initial_delay: float = 1,
+            exponential_base: float = 2,
+            jitter: bool = True,
+            max_retries: int = 20,
+            errors: tuple = (openai.RateLimitError,),
+    ):
+        """Retry a function with exponential backoff."""
+
+        def wrapper(*args, **kwargs):
+            """Wrapper for sync functions."""
+            # Initialize variables
+            num_retries = 0
+            delay = initial_delay
+
+            # Loop until a successful response or max_retries is hit or an exception is raised
+            while True:
+                try:
+                    return func(*args, **kwargs)
+
+                # Retry on specified errors
+                except errors:
+                    # Increment retries
+                    num_retries += 1
+
+                    # Check if max retries has been reached
+                    if num_retries > max_retries:
+                        raise Exception(
+                            f"Maximum number of retries ({max_retries}) exceeded."
+                        )
+
+                    # Increment the delay
+                    delay *= exponential_base * (1 + jitter * random.random())
+
+                    # Sleep for the delay
+                    time.sleep(delay)
+
+                # Raise exceptions for any errors not specified
+                except Exception as e:
+                    raise e
+
+        return wrapper
+
+
+    @staticmethod
+    async def aretry_with_exponential_backoff(
+            func,
+            initial_delay: float = 1,
+            exponential_base: float = 2,
+            jitter: bool = True,
+            max_retries: int = 20,
+            errors: tuple = (openai.RateLimitError,),
+    ):
+        """Retry a function with exponential backoff."""
+
+        async def wrapper(*args, **kwargs):
+            """Wrapper for async functions.
+            :param args: list
+            :param kwargs: dict"""
+            # Initialize variables
+            num_retries = 0
+            delay = initial_delay
+
+            # Loop until a successful response or max_retries is hit or an exception is raised
+            while True:
+                try:
+                    return await func(*args, **kwargs)
+
+                # Retry on specified errors
+                except errors as e:
+                    print(f"acreate (backoff): caught error: {e}")
+                    # Increment retries
+                    num_retries += 1
+
+                    # Check if max retries has been reached
+                    if num_retries > max_retries:
+                        raise Exception(
+                            f"Maximum number of retries ({max_retries}) exceeded."
+                        )
+
+                    # Increment the delay
+                    delay *= exponential_base * (1 + jitter * random.random())
+
+                    # Sleep for the delay
+                    await asyncio.sleep(delay)
+
+                # Raise exceptions for any errors not specified
+                except Exception as e:
+                    raise e
+
+        return wrapper
+
+
+    @retry_with_exponential_backoff
+    def completions_with_backoff(self, **kwargs):
+        """Wrapper around ChatCompletion.create w/ backoff"""
+        # Local model
+        return openai.chat.completions.create(**kwargs)
+
+    @aretry_with_exponential_backoff
+    async def acompletions_with_backoff(self,**kwargs):
+        """Wrapper around ChatCompletion.acreate w/ backoff"""
+        return await openai.chat.completions.acreate(**kwargs)
+
+    @aretry_with_exponential_backoff
+    async def acreate_embedding_with_backoff(self,**kwargs):
+        """Wrapper around Embedding.acreate w/ backoff"""
+
+        client = openai.AsyncOpenAI(
+            # This is the default and can be omitted
+            api_key=os.environ.get("OPENAI_API_KEY"),
+        )
+
+        return await client.embeddings.create(**kwargs)
+
+    async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"):
+        """To get text embeddings, import/call this function
+        It specifies defaults + handles rate-limiting + is async"""
+        text = text.replace("\n", " ")
+        response = await self.acreate_embedding_with_backoff(input=[text], model=model)
+        embedding = response.data[0].embedding
+        return embedding
+
+    @retry_with_exponential_backoff
+    def create_embedding_with_backoff(self, **kwargs):
+        """Wrapper around Embedding.create w/ backoff"""
+        return openai.embeddings.create(**kwargs)
+
+    def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"):
+        """To get text embeddings, import/call this function
+        It specifies defaults + handles rate-limiting
+        :param text: str
+        :param model: str
+        """
+        text = text.replace("\n", " ")
+        response = self.create_embedding_with_backoff(input=[text], model=model)
+        embedding = response.data[0].embedding
+        return embedding
+
+    async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]):
+        """To get multiple text embeddings in parallel, import/call this function
+        It specifies defaults + handles rate-limiting + is async"""
+        # Create a generator of coroutines
+        coroutines = (self.async_get_embedding_with_backoff(text, model)
+                      for text, model in zip(texts, models))
+
+        # Run the coroutines in parallel and gather the results
+        embeddings = await asyncio.gather(*coroutines)
+
+        return embeddings
+
+    async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel], model:str) -> BaseModel:
+        """Generate a response from a user query."""
+        system_prompt = read_query_prompt(system_prompt_path)
+        return self.aclient.chat.completions.create(
+            model=model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": f"""Use the given format to
+                     extract information from the following input: {text_input}. """,
+                },
+                {"role": "system", "content": system_prompt},
+            ],
+            response_model=response_model,
+        )
--- a/cognitive_architecture/infrastructure/llm/openai/openai_tools.py
+++ b/cognitive_architecture/infrastructure/llm/openai/openai_tools.py
@ -158,7 +158,7 @@ def get_embedding_with_backoff(text:str, model:str="text-embedding-ada-002"):



-async def async_get_multiple_embeddings_with_backoff(texts: List[str], models: List[str]) :
+async def async_get_batch_embeddings_with_backoff(texts: List[str], models: List[str]) :
    """To get multiple text embeddings in parallel, import/call this function
    It specifies defaults + handles rate-limiting + is async"""
    # Create a generator of coroutines
--- a/cognitive_architecture/infrastructure/llm/openai/queries.py
+++ b/cognitive_architecture/infrastructure/llm/openai/queries.py
@ -3,8 +3,8 @@ import os
 import instructor
 from openai import OpenAI
 import logging
-from ..shared.data_models import  KnowledgeGraph,  MemorySummary
-from ..config import Config
+from cognitive_architecture.shared.data_models import  KnowledgeGraph,  MemorySummary
+from cognitive_architecture.config import Config



--- a/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt
+++ b/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt
@ -0,0 +1,175 @@
+{
+    "Natural Language Text": {
+        "type": "TEXT",
+        "subclass": [
+            "Articles, essays, and reports",
+            "Books and manuscripts",
+            "News stories and blog posts",
+            "Research papers and academic publications",
+            "Social media posts and comments",
+            "Website content and product descriptions",
+            "Personal narratives and stories"
+        ]
+    },
+    "Structured Documents": {
+        "type": "TEXT",
+        "subclass": [
+            "Spreadsheets and tables",
+            "Forms and surveys",
+            "Databases and CSV files"
+        ]
+    },
+    "Code and Scripts": {
+        "type": "TEXT",
+        "subclass": [
+            "Source code in various programming languages",
+            "Shell commands and scripts",
+            "Markup languages (HTML, XML)",
+            "Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
+        ]
+    },
+    "Conversational Data": {
+        "type": "TEXT",
+        "subclass": [
+            "Chat transcripts and messaging history",
+            "Customer service logs and interactions",
+            "Conversational AI training data"
+        ]
+    },
+    "Educational Content": {
+        "type": "TEXT",
+        "subclass": [
+            "Textbook content and lecture notes",
+            "Exam questions and academic exercises",
+            "E-learning course materials"
+        ]
+    },
+    "Creative Writing": {
+        "type": "TEXT",
+        "subclass": [
+            "Poetry and prose",
+            "Scripts for plays, movies, and television",
+            "Song lyrics"
+        ]
+    },
+    "Technical Documentation": {
+        "type": "TEXT",
+        "subclass": [
+            "Manuals and user guides",
+            "Technical specifications and API documentation",
+            "Helpdesk articles and FAQs"
+        ]
+    },
+    "Legal and Regulatory Documents": {
+        "type": "TEXT",
+        "subclass": [
+            "Contracts and agreements",
+            "Laws, regulations, and legal case documents",
+            "Policy documents and compliance materials"
+        ]
+    },
+    "Medical and Scientific Texts": {
+        "type": "TEXT",
+        "subclass": [
+            "Clinical trial reports",
+            "Patient records and case notes",
+            "Scientific journal articles"
+        ]
+    },
+    "Financial and Business Documents": {
+        "type": "TEXT",
+        "subclass": [
+            "Financial reports and statements",
+            "Business plans and proposals",
+            "Market research and analysis reports"
+        ]
+    },
+    "Advertising and Marketing Materials": {
+        "type": "TEXT",
+        "subclass": [
+            "Ad copies and marketing slogans",
+            "Product catalogs and brochures",
+            "Press releases and promotional content"
+        ]
+    },
+    "Emails and Correspondence": {
+        "type": "TEXT",
+        "subclass": [
+            "Professional and formal correspondence",
+            "Personal emails and letters"
+        ]
+    },
+    "Metadata and Annotations": {
+        "type": "TEXT",
+        "subclass": [
+            "Image and video captions",
+            "Annotations and metadata for various media"
+        ]
+    },
+    "Language Learning Materials": {
+        "type": "TEXT",
+        "subclass": [
+            "Vocabulary lists and grammar rules",
+            "Language exercises and quizzes"
+        ]
+    },
+    "Audio Content": {
+    "type": "AUDIO",
+    "subclass": [
+        "Music tracks and albums",
+        "Podcasts and radio broadcasts",
+        "Audiobooks and audio guides",
+        "Recorded interviews and speeches",
+        "Sound effects and ambient sounds"
+    ]
+    },
+    "Image Content": {
+        "type": "IMAGE",
+        "subclass": [
+            "Photographs and digital images",
+            "Illustrations, diagrams, and charts",
+            "Infographics and visual data representations",
+            "Artwork and paintings",
+            "Screenshots and graphical user interfaces"
+        ]
+    },
+    "Video Content": {
+        "type": "VIDEO",
+        "subclass": [
+            "Movies and short films",
+            "Documentaries and educational videos",
+            "Video tutorials and how-to guides",
+            "Animated features and cartoons",
+            "Live event recordings and sports broadcasts"
+        ]
+    },
+    "Multimedia Content": {
+        "type": "MULTIMEDIA",
+        "subclass": [
+            "Interactive web content and games",
+            "Virtual reality (VR) and augmented reality (AR) experiences",
+            "Mixed media presentations and slide decks",
+            "E-learning modules with integrated multimedia",
+            "Digital exhibitions and virtual tours"
+        ]
+    },
+    "3D Models and CAD Content": {
+        "type": "3D_MODEL",
+        "subclass": [
+            "Architectural renderings and building plans",
+            "Product design models and prototypes",
+            "3D animations and character models",
+            "Scientific simulations and visualizations",
+            "Virtual objects for AR/VR environments"
+        ]
+    },
+    "Procedural Content": {
+        "type": "PROCEDURAL",
+        "subclass": [
+            "Tutorials and step-by-step guides",
+            "Workflow and process descriptions",
+            "Simulation and training exercises",
+            "Recipes and crafting instructions"
+        ]
+    }
+}
--- a/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt
+++ b/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt
@ -0,0 +1,8 @@
+    You are tasked with analyzing a {{data_type}} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {{data_type}}
+    These layers can help in understanding the content, context, and characteristics of the {{data_type}}
+    Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
+    Approach this task by considering the unique characteristics and inherent properties of the data at hand.
+    VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {{layer_name}}
+    Guidelines for Layer Extraction:
+    Take into account: The content type that in this case is: {{layer_name}} should play a major role in how you decompose into layers.
+    Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
--- a/cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt
+++ b/cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt
@ -1,8 +1,10 @@
 You are a top-tier algorithm
 designed for extracting information in structured formats to build a knowledge graph.
 - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
+- **Edges** represent relationships between concepts. They're akin to Wikipedia links.
 - The aim is to achieve simplicity and clarity in the
 knowledge graph, making it accessible for a vast audience.
+YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {{layer}}
 ## 2. Labeling Nodes
 - **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person,
@ -31,4 +33,4 @@ always use the most complete identifier for that entity throughout the knowledge
 Remember, the knowledge graph should be coherent and easily understandable,
 so maintaining consistency in entity references is crucial.
 ## 5. Strict Compliance
-Adhere to the rules strictly. Non-compliance will result in termination
+Adhere to the rules strictly. Non-compliance will result in termination"""
--- a/cognitive_architecture/modules/cognify/init.py
+++ b/cognitive_architecture/modules/cognify/init.py
@ -0,0 +1 @@
+from .create_vector_memory import create_vector_memory
--- a/cognitive_architecture/modules/cognify/llm/init.py
+++ b/cognitive_architecture/modules/cognify/llm/init.py
@ -0,0 +1 @@
+from .create_vector_memory import create_vector_memory
--- a/cognitive_architecture/modules/cognify/llm/classify_content.py
+++ b/cognitive_architecture/modules/cognify/llm/classify_content.py
@ -0,0 +1,13 @@
+from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
+
+
+async def content_to_cog_layers(memory_name: str, payload: list):
+    llm_client = get_llm_client()
+
+    # data_points = list()
+    # for point in map(create_data_point, payload):
+    #     data_points.append(await point)
+
+    return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002")
+
+
--- a/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py
+++ b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py
@ -0,0 +1,15 @@
+from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
+
+
+async def content_to_cog_layers(memory_name: str, payload: list):
+    llm_client = get_llm_client()
+
+    # data_points = list()
+    # for point in map(create_data_point, payload):
+    #     data_points.append(await point)
+
+    return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002")
+
+
+
+
--- a/cognitive_architecture/modules/cognify/llm/content_to_propositions.py
+++ b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py
@ -0,0 +1,9 @@
+""" Content to Propositions"""
+from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
+
+
+async def generate_graph(memory_name: str, payload: str):
+    doc_path = "cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt"
+    llm_client = get_llm_client()
+    return await llm_client.generate_graph(memory_name,  doc_path=doc_path,payload= payload)
+
--- a/cognitive_architecture/modules/users/memory/create_information_points.py
+++ b/cognitive_architecture/modules/users/memory/create_information_points.py
@ -2,7 +2,7 @@ import uuid
 from typing import List
 from qdrant_client.models import PointStruct
 from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
-from cognitive_architecture.openai_tools import async_get_embedding_with_backoff
+from cognitive_architecture.infrastructure.llm.openai.openai_tools import async_get_embedding_with_backoff

 async def create_information_points(memory_name: str, payload: List[str]):
    vector_db = get_vector_database()
--- a/cognitive_architecture/shared/data_models.py
+++ b/cognitive_architecture/shared/data_models.py
@ -1,5 +1,6 @@
 """Data models for the cognitive architecture."""
-from typing import Optional, List
+from enum import Enum
+from typing import Optional, List, Union
 from pydantic import BaseModel, Field


@ -39,3 +40,129 @@ class MemorySummary(BaseModel):
    """ Memory summary. """
    nodes: List[Node] = Field(..., default_factory=list)
    edges: List[Edge] = Field(..., default_factory=list)
+
+
+
+class TextSubclass(str, Enum):
+    ARTICLES = "Articles, essays, and reports"
+    BOOKS = "Books and manuscripts"
+    NEWS_STORIES = "News stories and blog posts"
+    RESEARCH_PAPERS = "Research papers and academic publications"
+    SOCIAL_MEDIA = "Social media posts and comments"
+    WEBSITE_CONTENT = "Website content and product descriptions"
+    PERSONAL_NARRATIVES = "Personal narratives and stories"
+    SPREADSHEETS = "Spreadsheets and tables"
+    FORMS = "Forms and surveys"
+    DATABASES = "Databases and CSV files"
+    SOURCE_CODE = "Source code in various programming languages"
+    SHELL_SCRIPTS = "Shell commands and scripts"
+    MARKUP_LANGUAGES = "Markup languages (HTML, XML)"
+    STYLESHEETS = "Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
+    CHAT_TRANSCRIPTS = "Chat transcripts and messaging history"
+    CUSTOMER_SERVICE_LOGS = "Customer service logs and interactions"
+    CONVERSATIONAL_AI = "Conversational AI training data"
+    TEXTBOOK_CONTENT = "Textbook content and lecture notes"
+    EXAM_QUESTIONS = "Exam questions and academic exercises"
+    E_LEARNING_MATERIALS = "E-learning course materials"
+    POETRY = "Poetry and prose"
+    SCRIPTS = "Scripts for plays, movies, and television"
+    SONG_LYRICS = "Song lyrics"
+    MANUALS = "Manuals and user guides"
+    TECH_SPECS = "Technical specifications and API documentation"
+    HELPDESK_ARTICLES = "Helpdesk articles and FAQs"
+    LEGAL_CONTRACTS = "Contracts and agreements"
+    LAWS = "Laws, regulations, and legal case documents"
+    POLICY_DOCUMENTS = "Policy documents and compliance materials"
+    CLINICAL_TRIALS = "Clinical trial reports"
+    PATIENT_RECORDS = "Patient records and case notes"
+    SCIENTIFIC_ARTICLES = "Scientific journal articles"
+    FINANCIAL_REPORTS = "Financial reports and statements"
+    BUSINESS_PLANS = "Business plans and proposals"
+    MARKET_RESEARCH = "Market research and analysis reports"
+    AD_COPIES = "Ad copies and marketing slogans"
+    PRODUCT_CATALOGS = "Product catalogs and brochures"
+    PRESS_RELEASES = "Press releases and promotional content"
+    PROFESSIONAL_EMAILS = "Professional and formal correspondence"
+    PERSONAL_EMAILS = "Personal emails and letters"
+    IMAGE_CAPTIONS = "Image and video captions"
+    ANNOTATIONS = "Annotations and metadata for various media"
+    VOCAB_LISTS = "Vocabulary lists and grammar rules"
+    LANGUAGE_EXERCISES = "Language exercises and quizzes"
+
+class AudioSubclass(str, Enum):
+    MUSIC_TRACKS = "Music tracks and albums"
+    PODCASTS = "Podcasts and radio broadcasts"
+    AUDIOBOOKS = "Audiobooks and audio guides"
+    INTERVIEWS = "Recorded interviews and speeches"
+    SOUND_EFFECTS = "Sound effects and ambient sounds"
+
+class ImageSubclass(str, Enum):
+    PHOTOGRAPHS = "Photographs and digital images"
+    ILLUSTRATIONS = "Illustrations, diagrams, and charts"
+    INFOGRAPHICS = "Infographics and visual data representations"
+    ARTWORK = "Artwork and paintings"
+    SCREENSHOTS = "Screenshots and graphical user interfaces"
+
+class VideoSubclass(str, Enum):
+    MOVIES = "Movies and short films"
+    DOCUMENTARIES = "Documentaries and educational videos"
+    TUTORIALS = "Video tutorials and how-to guides"
+    ANIMATED_FEATURES = "Animated features and cartoons"
+    LIVE_EVENTS = "Live event recordings and sports broadcasts"
+
+class MultimediaSubclass(str, Enum):
+    WEB_CONTENT = "Interactive web content and games"
+    VR_EXPERIENCES = "Virtual reality (VR) and augmented reality (AR) experiences"
+    MIXED_MEDIA = "Mixed media presentations and slide decks"
+    E_LEARNING_MODULES = "E-learning modules with integrated multimedia"
+    DIGITAL_EXHIBITIONS = "Digital exhibitions and virtual tours"
+
+class Model3DSubclass(str, Enum):
+    ARCHITECTURAL_RENDERINGS = "Architectural renderings and building plans"
+    PRODUCT_MODELS = "Product design models and prototypes"
+    ANIMATIONS = "3D animations and character models"
+    SCIENTIFIC_VISUALIZATIONS = "Scientific simulations and visualizations"
+    VR_OBJECTS = "Virtual objects for AR/VR applications"
+
+class ProceduralSubclass(str, Enum):
+    TUTORIALS_GUIDES = "Tutorials and step-by-step guides"
+    WORKFLOW_DESCRIPTIONS = "Workflow and process descriptions"
+    SIMULATIONS = "Simulation and training exercises"
+    RECIPES = "Recipes and crafting instructions"
+class ContentType(BaseModel):
+    """Base class for different types of content."""
+    type: str
+
+class TextContent(ContentType):
+    type = "TEXT"
+    subclass: List[TextSubclass]
+
+class AudioContent(ContentType):
+    type = "AUDIO"
+    subclass: List[AudioSubclass]
+
+class ImageContent(ContentType):
+    type = "IMAGE"
+    subclass: List[ImageSubclass]
+
+class VideoContent(ContentType):
+    type = "VIDEO"
+    subclass: List[VideoSubclass]
+
+class MultimediaContent(ContentType):
+    type = "MULTIMEDIA"
+    subclass: List[MultimediaSubclass]
+
+class Model3DContent(ContentType):
+    type = "3D_MODEL"
+    subclass: List[Model3DSubclass]
+
+class ProceduralContent(ContentType):
+    type = "PROCEDURAL"
+    subclass: List[ProceduralSubclass]
+
+class SinglePrediction(BaseModel):
+    """Class for a single class label prediction."""
+
+    label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
+
--- a/cognitive_architecture/utils.py
+++ b/cognitive_architecture/utils.py
@ -4,6 +4,7 @@ import os
 import random
 import string
 import uuid
+from pathlib import Path

 from graphviz import Digraph
 from sqlalchemy import or_
@ -284,3 +285,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
        return None


+def read_query_prompt(filename: str) -> str:
+    """Read a query prompt from a file."""
+    file_path = Path(filename)
+    try:
+        return file_path.read_text()
+    except FileNotFoundError:
+        logging.error(f"File not found: {file_path.absolute()}")
+    except Exception as e:
+        logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}")
+    return None
				`@ -0,0 +1 @@`
				`from .create_vector_memory import create_vector_memory`