diff --git a/Demo_graph.ipynb b/Demo_graph.ipynb index 777612008..e626ef4db 100644 --- a/Demo_graph.ipynb +++ b/Demo_graph.ipynb @@ -4022,7 +4022,13 @@ "metadata": {}, "outputs": [], "source": [ - "\n" + "\n", + "#pre filtering\n", + "# each semantic layer -> make categories, dimensions, on semantic layer given on the LLM\n", + "# weights need to be used topk and cutoff\n", + "# entry through entities\n", + "# combine unstructured and structured\n", + "# address / entrypoint node/ " ] }, { diff --git a/cognitive_architecture/database/graphdb/graph.py b/cognitive_architecture/database/graphdb/graph.py index 604e89d26..df69ca341 100644 --- a/cognitive_architecture/database/graphdb/graph.py +++ b/cognitive_architecture/database/graphdb/graph.py @@ -1,4 +1,3 @@ -import logging import os from neo4j import AsyncSession @@ -6,32 +5,23 @@ from neo4j.exceptions import Neo4jError print(os.getcwd()) -import networkx as nx - -from langchain.graphs import Neo4jGraph import os -import openai import instructor from openai import OpenAI -from openai import AsyncOpenAI -import pickle from abc import ABC, abstractmethod # Adds response_model to ChatCompletion # Allows the return of Pydantic model rather than raw JSON -from pydantic import BaseModel, Field -from typing import List, Dict, Optional from ...utils import ( format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping, - get_unsumarized_vector_db_namespace, ) -from ...llm.queries import generate_summary, generate_graph +from cognitive_architecture.infrastructure.llm.openai.queries import generate_summary, generate_graph import logging from neo4j import AsyncGraphDatabase from contextlib import asynccontextmanager @@ -45,11 +35,8 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") from ...config import Config from ...shared.data_models import ( - Node, - Edge, KnowledgeGraph, GraphQLQuery, - MemorySummary, ) config = Config() diff --git a/cognitive_architecture/infrastructure/__init__.py b/cognitive_architecture/infrastructure/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognitive_architecture/infrastructure/llm/__init__.py b/cognitive_architecture/infrastructure/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognitive_architecture/infrastructure/llm/get_llm_client.py b/cognitive_architecture/infrastructure/llm/get_llm_client.py new file mode 100644 index 000000000..013f4455a --- /dev/null +++ b/cognitive_architecture/infrastructure/llm/get_llm_client.py @@ -0,0 +1,10 @@ +"""Get the LLM client.""" +from cognitive_architecture.config import Config +from .openai.adapter import OpenAIAdapter + +config = Config() +config.load() + +def get_llm_client(): + """Get the LLM client.""" + return OpenAIAdapter(config.openai_key, config.model) \ No newline at end of file diff --git a/cognitive_architecture/infrastructure/llm/llm_interface.py b/cognitive_architecture/infrastructure/llm/llm_interface.py new file mode 100644 index 000000000..f8d0e091b --- /dev/null +++ b/cognitive_architecture/infrastructure/llm/llm_interface.py @@ -0,0 +1,35 @@ +""" LLM Interface """ + +from typing import List, Type, Protocol +from abc import abstractmethod +from pydantic import BaseModel +class LLMInterface(Protocol): + """ LLM Interface """ + + @abstractmethod + async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"): + """To get text embeddings, import/call this function""" + raise NotImplementedError + + @abstractmethod + def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"): + """To get text embeddings, import/call this function""" + raise NotImplementedError + + @abstractmethod + async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]): + """To get multiple text embeddings in parallel, import/call this function""" + raise NotImplementedError + + # """ Get completions """ + # async def acompletions_with_backoff(self, **kwargs): + # raise NotImplementedError + # + """ Structured output """ + @abstractmethod + async def acreate_structured_output(self, + text_input: str, + system_prompt_path: str, + response_model: Type[BaseModel]) -> BaseModel: + """To get structured output, import/call this function""" + raise NotImplementedError diff --git a/cognitive_architecture/infrastructure/llm/openai/__init__.py b/cognitive_architecture/infrastructure/llm/openai/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognitive_architecture/infrastructure/llm/openai/adapter.py b/cognitive_architecture/infrastructure/llm/openai/adapter.py new file mode 100644 index 000000000..ef8fd8f3b --- /dev/null +++ b/cognitive_architecture/infrastructure/llm/openai/adapter.py @@ -0,0 +1,197 @@ +"""Adapter for OpenAI's GPT-3, GPT=4 API.""" +import os +import time +import random +import asyncio +from typing import List, Type +import openai +import instructor +from openai import OpenAI,AsyncOpenAI +from pydantic import BaseModel +from cognitive_architecture.config import Config +from cognitive_architecture.utils import read_query_prompt +from ..llm_interface import LLMInterface + +# +# config = Config() +# config.load() + +# aclient = instructor.apatch(AsyncOpenAI()) +# OPENAI_API_KEY = config.openai_key + +class OpenAIAdapter(LLMInterface): + """Adapter for OpenAI's GPT-3, GPT=4 API""" + def __init__(self, api_key: str, model:str): + openai.api_key = api_key + self.aclient = instructor.apatch(AsyncOpenAI()) + self.model = model + # OPENAI_API_KEY = config.openai_key + + @staticmethod + def retry_with_exponential_backoff( + func, + initial_delay: float = 1, + exponential_base: float = 2, + jitter: bool = True, + max_retries: int = 20, + errors: tuple = (openai.RateLimitError,), + ): + """Retry a function with exponential backoff.""" + + def wrapper(*args, **kwargs): + """Wrapper for sync functions.""" + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + return func(*args, **kwargs) + + # Retry on specified errors + except errors: + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception( + f"Maximum number of retries ({max_retries}) exceeded." + ) + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + time.sleep(delay) + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + + @staticmethod + async def aretry_with_exponential_backoff( + func, + initial_delay: float = 1, + exponential_base: float = 2, + jitter: bool = True, + max_retries: int = 20, + errors: tuple = (openai.RateLimitError,), + ): + """Retry a function with exponential backoff.""" + + async def wrapper(*args, **kwargs): + """Wrapper for async functions. + :param args: list + :param kwargs: dict""" + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + return await func(*args, **kwargs) + + # Retry on specified errors + except errors as e: + print(f"acreate (backoff): caught error: {e}") + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception( + f"Maximum number of retries ({max_retries}) exceeded." + ) + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + await asyncio.sleep(delay) + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + + @retry_with_exponential_backoff + def completions_with_backoff(self, **kwargs): + """Wrapper around ChatCompletion.create w/ backoff""" + # Local model + return openai.chat.completions.create(**kwargs) + + @aretry_with_exponential_backoff + async def acompletions_with_backoff(self,**kwargs): + """Wrapper around ChatCompletion.acreate w/ backoff""" + return await openai.chat.completions.acreate(**kwargs) + + @aretry_with_exponential_backoff + async def acreate_embedding_with_backoff(self,**kwargs): + """Wrapper around Embedding.acreate w/ backoff""" + + client = openai.AsyncOpenAI( + # This is the default and can be omitted + api_key=os.environ.get("OPENAI_API_KEY"), + ) + + return await client.embeddings.create(**kwargs) + + async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"): + """To get text embeddings, import/call this function + It specifies defaults + handles rate-limiting + is async""" + text = text.replace("\n", " ") + response = await self.acreate_embedding_with_backoff(input=[text], model=model) + embedding = response.data[0].embedding + return embedding + + @retry_with_exponential_backoff + def create_embedding_with_backoff(self, **kwargs): + """Wrapper around Embedding.create w/ backoff""" + return openai.embeddings.create(**kwargs) + + def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"): + """To get text embeddings, import/call this function + It specifies defaults + handles rate-limiting + :param text: str + :param model: str + """ + text = text.replace("\n", " ") + response = self.create_embedding_with_backoff(input=[text], model=model) + embedding = response.data[0].embedding + return embedding + + async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]): + """To get multiple text embeddings in parallel, import/call this function + It specifies defaults + handles rate-limiting + is async""" + # Create a generator of coroutines + coroutines = (self.async_get_embedding_with_backoff(text, model) + for text, model in zip(texts, models)) + + # Run the coroutines in parallel and gather the results + embeddings = await asyncio.gather(*coroutines) + + return embeddings + + async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel], model:str) -> BaseModel: + """Generate a response from a user query.""" + system_prompt = read_query_prompt(system_prompt_path) + return self.aclient.chat.completions.create( + model=model, + messages=[ + { + "role": "user", + "content": f"""Use the given format to + extract information from the following input: {text_input}. """, + }, + {"role": "system", "content": system_prompt}, + ], + response_model=response_model, + ) diff --git a/cognitive_architecture/openai_tools.py b/cognitive_architecture/infrastructure/llm/openai/openai_tools.py similarity index 98% rename from cognitive_architecture/openai_tools.py rename to cognitive_architecture/infrastructure/llm/openai/openai_tools.py index 8ae8663c2..8cc902d16 100644 --- a/cognitive_architecture/openai_tools.py +++ b/cognitive_architecture/infrastructure/llm/openai/openai_tools.py @@ -158,7 +158,7 @@ def get_embedding_with_backoff(text:str, model:str="text-embedding-ada-002"): -async def async_get_multiple_embeddings_with_backoff(texts: List[str], models: List[str]) : +async def async_get_batch_embeddings_with_backoff(texts: List[str], models: List[str]) : """To get multiple text embeddings in parallel, import/call this function It specifies defaults + handles rate-limiting + is async""" # Create a generator of coroutines diff --git a/cognitive_architecture/llm/queries.py b/cognitive_architecture/infrastructure/llm/openai/queries.py similarity index 95% rename from cognitive_architecture/llm/queries.py rename to cognitive_architecture/infrastructure/llm/openai/queries.py index 816a80167..10f3982d0 100644 --- a/cognitive_architecture/llm/queries.py +++ b/cognitive_architecture/infrastructure/llm/openai/queries.py @@ -3,8 +3,8 @@ import os import instructor from openai import OpenAI import logging -from ..shared.data_models import KnowledgeGraph, MemorySummary -from ..config import Config +from cognitive_architecture.shared.data_models import KnowledgeGraph, MemorySummary +from cognitive_architecture.config import Config diff --git a/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt b/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt new file mode 100644 index 000000000..5f6e8038d --- /dev/null +++ b/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt @@ -0,0 +1,175 @@ +{ + "Natural Language Text": { + "type": "TEXT", + "subclass": [ + "Articles, essays, and reports", + "Books and manuscripts", + "News stories and blog posts", + "Research papers and academic publications", + "Social media posts and comments", + "Website content and product descriptions", + "Personal narratives and stories" + ] + }, + "Structured Documents": { + "type": "TEXT", + "subclass": [ + "Spreadsheets and tables", + "Forms and surveys", + "Databases and CSV files" + ] + }, + "Code and Scripts": { + "type": "TEXT", + "subclass": [ + "Source code in various programming languages", + "Shell commands and scripts", + "Markup languages (HTML, XML)", + "Stylesheets (CSS) and configuration files (YAML, JSON, INI)" + ] + }, + "Conversational Data": { + "type": "TEXT", + "subclass": [ + "Chat transcripts and messaging history", + "Customer service logs and interactions", + "Conversational AI training data" + ] + }, + "Educational Content": { + "type": "TEXT", + "subclass": [ + "Textbook content and lecture notes", + "Exam questions and academic exercises", + "E-learning course materials" + ] + }, + "Creative Writing": { + "type": "TEXT", + "subclass": [ + "Poetry and prose", + "Scripts for plays, movies, and television", + "Song lyrics" + ] + }, + "Technical Documentation": { + "type": "TEXT", + "subclass": [ + "Manuals and user guides", + "Technical specifications and API documentation", + "Helpdesk articles and FAQs" + ] + }, + "Legal and Regulatory Documents": { + "type": "TEXT", + "subclass": [ + "Contracts and agreements", + "Laws, regulations, and legal case documents", + "Policy documents and compliance materials" + ] + }, + "Medical and Scientific Texts": { + "type": "TEXT", + "subclass": [ + "Clinical trial reports", + "Patient records and case notes", + "Scientific journal articles" + ] + }, + "Financial and Business Documents": { + "type": "TEXT", + "subclass": [ + "Financial reports and statements", + "Business plans and proposals", + "Market research and analysis reports" + ] + }, + "Advertising and Marketing Materials": { + "type": "TEXT", + "subclass": [ + "Ad copies and marketing slogans", + "Product catalogs and brochures", + "Press releases and promotional content" + ] + }, + "Emails and Correspondence": { + "type": "TEXT", + "subclass": [ + "Professional and formal correspondence", + "Personal emails and letters" + ] + }, + "Metadata and Annotations": { + "type": "TEXT", + "subclass": [ + "Image and video captions", + "Annotations and metadata for various media" + ] + }, + "Language Learning Materials": { + "type": "TEXT", + "subclass": [ + "Vocabulary lists and grammar rules", + "Language exercises and quizzes" + ] + }, + "Audio Content": { + "type": "AUDIO", + "subclass": [ + "Music tracks and albums", + "Podcasts and radio broadcasts", + "Audiobooks and audio guides", + "Recorded interviews and speeches", + "Sound effects and ambient sounds" + ] + }, + "Image Content": { + "type": "IMAGE", + "subclass": [ + "Photographs and digital images", + "Illustrations, diagrams, and charts", + "Infographics and visual data representations", + "Artwork and paintings", + "Screenshots and graphical user interfaces" + ] + }, + "Video Content": { + "type": "VIDEO", + "subclass": [ + "Movies and short films", + "Documentaries and educational videos", + "Video tutorials and how-to guides", + "Animated features and cartoons", + "Live event recordings and sports broadcasts" + ] + }, + "Multimedia Content": { + "type": "MULTIMEDIA", + "subclass": [ + "Interactive web content and games", + "Virtual reality (VR) and augmented reality (AR) experiences", + "Mixed media presentations and slide decks", + "E-learning modules with integrated multimedia", + "Digital exhibitions and virtual tours" + ] + }, + "3D Models and CAD Content": { + "type": "3D_MODEL", + "subclass": [ + "Architectural renderings and building plans", + "Product design models and prototypes", + "3D animations and character models", + "Scientific simulations and visualizations", + "Virtual objects for AR/VR environments" + ] + }, + "Procedural Content": { + "type": "PROCEDURAL", + "subclass": [ + "Tutorials and step-by-step guides", + "Workflow and process descriptions", + "Simulation and training exercises", + "Recipes and crafting instructions" + ] + } +} \ No newline at end of file diff --git a/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt b/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt new file mode 100644 index 000000000..a56eee1ce --- /dev/null +++ b/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt @@ -0,0 +1,8 @@ + You are tasked with analyzing a {{data_type}} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {{data_type}} + These layers can help in understanding the content, context, and characteristics of the {{data_type}} + Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph. + Approach this task by considering the unique characteristics and inherent properties of the data at hand. + VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {{layer_name}} + Guidelines for Layer Extraction: + Take into account: The content type that in this case is: {{layer_name}} should play a major role in how you decompose into layers. + Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval. diff --git a/cognitive_architecture/llm/prompts/generate_graph_prompt.txt b/cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt similarity index 92% rename from cognitive_architecture/llm/prompts/generate_graph_prompt.txt rename to cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt index 744903cda..da6e0115a 100644 --- a/cognitive_architecture/llm/prompts/generate_graph_prompt.txt +++ b/cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt @@ -1,8 +1,10 @@ You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph. - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. +- **Edges** represent relationships between concepts. They're akin to Wikipedia links. - The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience. +YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {{layer}} ## 2. Labeling Nodes - **Consistency**: Ensure you use basic or elementary types for node labels. - For example, when you identify an entity representing a person, @@ -31,4 +33,4 @@ always use the most complete identifier for that entity throughout the knowledge Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. ## 5. Strict Compliance -Adhere to the rules strictly. Non-compliance will result in termination \ No newline at end of file +Adhere to the rules strictly. Non-compliance will result in termination""" \ No newline at end of file diff --git a/cognitive_architecture/modules/cognify/__init__.py b/cognitive_architecture/modules/cognify/__init__.py new file mode 100644 index 000000000..23a274d96 --- /dev/null +++ b/cognitive_architecture/modules/cognify/__init__.py @@ -0,0 +1 @@ +from .create_vector_memory import create_vector_memory diff --git a/cognitive_architecture/modules/cognify/llm/__init__.py b/cognitive_architecture/modules/cognify/llm/__init__.py new file mode 100644 index 000000000..23a274d96 --- /dev/null +++ b/cognitive_architecture/modules/cognify/llm/__init__.py @@ -0,0 +1 @@ +from .create_vector_memory import create_vector_memory diff --git a/cognitive_architecture/modules/cognify/llm/classify_content.py b/cognitive_architecture/modules/cognify/llm/classify_content.py new file mode 100644 index 000000000..65f8a4c41 --- /dev/null +++ b/cognitive_architecture/modules/cognify/llm/classify_content.py @@ -0,0 +1,13 @@ +from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client + + +async def content_to_cog_layers(memory_name: str, payload: list): + llm_client = get_llm_client() + + # data_points = list() + # for point in map(create_data_point, payload): + # data_points.append(await point) + + return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002") + + diff --git a/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py new file mode 100644 index 000000000..0a1d6a548 --- /dev/null +++ b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py @@ -0,0 +1,15 @@ +from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client + + +async def content_to_cog_layers(memory_name: str, payload: list): + llm_client = get_llm_client() + + # data_points = list() + # for point in map(create_data_point, payload): + # data_points.append(await point) + + return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002") + + + + diff --git a/cognitive_architecture/modules/cognify/llm/content_to_propositions.py b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py new file mode 100644 index 000000000..cc94340c3 --- /dev/null +++ b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py @@ -0,0 +1,9 @@ +""" Content to Propositions""" +from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client + + +async def generate_graph(memory_name: str, payload: str): + doc_path = "cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt" + llm_client = get_llm_client() + return await llm_client.generate_graph(memory_name, doc_path=doc_path,payload= payload) + diff --git a/cognitive_architecture/modules/users/memory/create_information_points.py b/cognitive_architecture/modules/users/memory/create_information_points.py index 6dfe12413..abd3866e2 100644 --- a/cognitive_architecture/modules/users/memory/create_information_points.py +++ b/cognitive_architecture/modules/users/memory/create_information_points.py @@ -2,7 +2,7 @@ import uuid from typing import List from qdrant_client.models import PointStruct from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database -from cognitive_architecture.openai_tools import async_get_embedding_with_backoff +from cognitive_architecture.infrastructure.llm.openai.openai_tools import async_get_embedding_with_backoff async def create_information_points(memory_name: str, payload: List[str]): vector_db = get_vector_database() diff --git a/cognitive_architecture/shared/data_models.py b/cognitive_architecture/shared/data_models.py index c95000bd5..8ed374908 100644 --- a/cognitive_architecture/shared/data_models.py +++ b/cognitive_architecture/shared/data_models.py @@ -1,5 +1,6 @@ """Data models for the cognitive architecture.""" -from typing import Optional, List +from enum import Enum +from typing import Optional, List, Union from pydantic import BaseModel, Field @@ -39,3 +40,129 @@ class MemorySummary(BaseModel): """ Memory summary. """ nodes: List[Node] = Field(..., default_factory=list) edges: List[Edge] = Field(..., default_factory=list) + + + +class TextSubclass(str, Enum): + ARTICLES = "Articles, essays, and reports" + BOOKS = "Books and manuscripts" + NEWS_STORIES = "News stories and blog posts" + RESEARCH_PAPERS = "Research papers and academic publications" + SOCIAL_MEDIA = "Social media posts and comments" + WEBSITE_CONTENT = "Website content and product descriptions" + PERSONAL_NARRATIVES = "Personal narratives and stories" + SPREADSHEETS = "Spreadsheets and tables" + FORMS = "Forms and surveys" + DATABASES = "Databases and CSV files" + SOURCE_CODE = "Source code in various programming languages" + SHELL_SCRIPTS = "Shell commands and scripts" + MARKUP_LANGUAGES = "Markup languages (HTML, XML)" + STYLESHEETS = "Stylesheets (CSS) and configuration files (YAML, JSON, INI)" + CHAT_TRANSCRIPTS = "Chat transcripts and messaging history" + CUSTOMER_SERVICE_LOGS = "Customer service logs and interactions" + CONVERSATIONAL_AI = "Conversational AI training data" + TEXTBOOK_CONTENT = "Textbook content and lecture notes" + EXAM_QUESTIONS = "Exam questions and academic exercises" + E_LEARNING_MATERIALS = "E-learning course materials" + POETRY = "Poetry and prose" + SCRIPTS = "Scripts for plays, movies, and television" + SONG_LYRICS = "Song lyrics" + MANUALS = "Manuals and user guides" + TECH_SPECS = "Technical specifications and API documentation" + HELPDESK_ARTICLES = "Helpdesk articles and FAQs" + LEGAL_CONTRACTS = "Contracts and agreements" + LAWS = "Laws, regulations, and legal case documents" + POLICY_DOCUMENTS = "Policy documents and compliance materials" + CLINICAL_TRIALS = "Clinical trial reports" + PATIENT_RECORDS = "Patient records and case notes" + SCIENTIFIC_ARTICLES = "Scientific journal articles" + FINANCIAL_REPORTS = "Financial reports and statements" + BUSINESS_PLANS = "Business plans and proposals" + MARKET_RESEARCH = "Market research and analysis reports" + AD_COPIES = "Ad copies and marketing slogans" + PRODUCT_CATALOGS = "Product catalogs and brochures" + PRESS_RELEASES = "Press releases and promotional content" + PROFESSIONAL_EMAILS = "Professional and formal correspondence" + PERSONAL_EMAILS = "Personal emails and letters" + IMAGE_CAPTIONS = "Image and video captions" + ANNOTATIONS = "Annotations and metadata for various media" + VOCAB_LISTS = "Vocabulary lists and grammar rules" + LANGUAGE_EXERCISES = "Language exercises and quizzes" + +class AudioSubclass(str, Enum): + MUSIC_TRACKS = "Music tracks and albums" + PODCASTS = "Podcasts and radio broadcasts" + AUDIOBOOKS = "Audiobooks and audio guides" + INTERVIEWS = "Recorded interviews and speeches" + SOUND_EFFECTS = "Sound effects and ambient sounds" + +class ImageSubclass(str, Enum): + PHOTOGRAPHS = "Photographs and digital images" + ILLUSTRATIONS = "Illustrations, diagrams, and charts" + INFOGRAPHICS = "Infographics and visual data representations" + ARTWORK = "Artwork and paintings" + SCREENSHOTS = "Screenshots and graphical user interfaces" + +class VideoSubclass(str, Enum): + MOVIES = "Movies and short films" + DOCUMENTARIES = "Documentaries and educational videos" + TUTORIALS = "Video tutorials and how-to guides" + ANIMATED_FEATURES = "Animated features and cartoons" + LIVE_EVENTS = "Live event recordings and sports broadcasts" + +class MultimediaSubclass(str, Enum): + WEB_CONTENT = "Interactive web content and games" + VR_EXPERIENCES = "Virtual reality (VR) and augmented reality (AR) experiences" + MIXED_MEDIA = "Mixed media presentations and slide decks" + E_LEARNING_MODULES = "E-learning modules with integrated multimedia" + DIGITAL_EXHIBITIONS = "Digital exhibitions and virtual tours" + +class Model3DSubclass(str, Enum): + ARCHITECTURAL_RENDERINGS = "Architectural renderings and building plans" + PRODUCT_MODELS = "Product design models and prototypes" + ANIMATIONS = "3D animations and character models" + SCIENTIFIC_VISUALIZATIONS = "Scientific simulations and visualizations" + VR_OBJECTS = "Virtual objects for AR/VR applications" + +class ProceduralSubclass(str, Enum): + TUTORIALS_GUIDES = "Tutorials and step-by-step guides" + WORKFLOW_DESCRIPTIONS = "Workflow and process descriptions" + SIMULATIONS = "Simulation and training exercises" + RECIPES = "Recipes and crafting instructions" +class ContentType(BaseModel): + """Base class for different types of content.""" + type: str + +class TextContent(ContentType): + type = "TEXT" + subclass: List[TextSubclass] + +class AudioContent(ContentType): + type = "AUDIO" + subclass: List[AudioSubclass] + +class ImageContent(ContentType): + type = "IMAGE" + subclass: List[ImageSubclass] + +class VideoContent(ContentType): + type = "VIDEO" + subclass: List[VideoSubclass] + +class MultimediaContent(ContentType): + type = "MULTIMEDIA" + subclass: List[MultimediaSubclass] + +class Model3DContent(ContentType): + type = "3D_MODEL" + subclass: List[Model3DSubclass] + +class ProceduralContent(ContentType): + type = "PROCEDURAL" + subclass: List[ProceduralSubclass] + +class SinglePrediction(BaseModel): + """Class for a single class label prediction.""" + + label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent] + diff --git a/cognitive_architecture/utils.py b/cognitive_architecture/utils.py index 80654059c..668d678a1 100644 --- a/cognitive_architecture/utils.py +++ b/cognitive_architecture/utils.py @@ -4,6 +4,7 @@ import os import random import string import uuid +from pathlib import Path from graphviz import Digraph from sqlalchemy import or_ @@ -284,3 +285,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str): return None +def read_query_prompt(filename: str) -> str: + """Read a query prompt from a file.""" + file_path = Path(filename) + try: + return file_path.read_text() + except FileNotFoundError: + logging.error(f"File not found: {file_path.absolute()}") + except Exception as e: + logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}") + return None \ No newline at end of file