Added graph intefrace, added neo4j + networkx structure and updates to the notebook
This commit is contained in:
parent
5426f68d2c
commit
2433e4ed93
21 changed files with 618 additions and 21 deletions
|
|
@ -4022,7 +4022,13 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n"
|
||||
"\n",
|
||||
"#pre filtering\n",
|
||||
"# each semantic layer -> make categories, dimensions, on semantic layer given on the LLM\n",
|
||||
"# weights need to be used topk and cutoff\n",
|
||||
"# entry through entities\n",
|
||||
"# combine unstructured and structured\n",
|
||||
"# address / entrypoint node/ "
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
from neo4j import AsyncSession
|
||||
|
|
@ -6,32 +5,23 @@ from neo4j.exceptions import Neo4jError
|
|||
|
||||
print(os.getcwd())
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from langchain.graphs import Neo4jGraph
|
||||
import os
|
||||
|
||||
import openai
|
||||
import instructor
|
||||
from openai import OpenAI
|
||||
from openai import AsyncOpenAI
|
||||
import pickle
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
# Adds response_model to ChatCompletion
|
||||
# Allows the return of Pydantic model rather than raw JSON
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Optional
|
||||
from ...utils import (
|
||||
format_dict,
|
||||
append_uuid_to_variable_names,
|
||||
create_edge_variable_mapping,
|
||||
create_node_variable_mapping,
|
||||
get_unsumarized_vector_db_namespace,
|
||||
)
|
||||
from ...llm.queries import generate_summary, generate_graph
|
||||
from cognitive_architecture.infrastructure.llm.openai.queries import generate_summary, generate_graph
|
||||
import logging
|
||||
from neo4j import AsyncGraphDatabase
|
||||
from contextlib import asynccontextmanager
|
||||
|
|
@ -45,11 +35,8 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
|||
from ...config import Config
|
||||
|
||||
from ...shared.data_models import (
|
||||
Node,
|
||||
Edge,
|
||||
KnowledgeGraph,
|
||||
GraphQLQuery,
|
||||
MemorySummary,
|
||||
)
|
||||
|
||||
config = Config()
|
||||
|
|
|
|||
0
cognitive_architecture/infrastructure/__init__.py
Normal file
0
cognitive_architecture/infrastructure/__init__.py
Normal file
0
cognitive_architecture/infrastructure/llm/__init__.py
Normal file
0
cognitive_architecture/infrastructure/llm/__init__.py
Normal file
10
cognitive_architecture/infrastructure/llm/get_llm_client.py
Normal file
10
cognitive_architecture/infrastructure/llm/get_llm_client.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
"""Get the LLM client."""
|
||||
from cognitive_architecture.config import Config
|
||||
from .openai.adapter import OpenAIAdapter
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
def get_llm_client():
|
||||
"""Get the LLM client."""
|
||||
return OpenAIAdapter(config.openai_key, config.model)
|
||||
35
cognitive_architecture/infrastructure/llm/llm_interface.py
Normal file
35
cognitive_architecture/infrastructure/llm/llm_interface.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
""" LLM Interface """
|
||||
|
||||
from typing import List, Type, Protocol
|
||||
from abc import abstractmethod
|
||||
from pydantic import BaseModel
|
||||
class LLMInterface(Protocol):
|
||||
""" LLM Interface """
|
||||
|
||||
@abstractmethod
|
||||
async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"):
|
||||
"""To get text embeddings, import/call this function"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"):
|
||||
"""To get text embeddings, import/call this function"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]):
|
||||
"""To get multiple text embeddings in parallel, import/call this function"""
|
||||
raise NotImplementedError
|
||||
|
||||
# """ Get completions """
|
||||
# async def acompletions_with_backoff(self, **kwargs):
|
||||
# raise NotImplementedError
|
||||
#
|
||||
""" Structured output """
|
||||
@abstractmethod
|
||||
async def acreate_structured_output(self,
|
||||
text_input: str,
|
||||
system_prompt_path: str,
|
||||
response_model: Type[BaseModel]) -> BaseModel:
|
||||
"""To get structured output, import/call this function"""
|
||||
raise NotImplementedError
|
||||
197
cognitive_architecture/infrastructure/llm/openai/adapter.py
Normal file
197
cognitive_architecture/infrastructure/llm/openai/adapter.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
"""Adapter for OpenAI's GPT-3, GPT=4 API."""
|
||||
import os
|
||||
import time
|
||||
import random
|
||||
import asyncio
|
||||
from typing import List, Type
|
||||
import openai
|
||||
import instructor
|
||||
from openai import OpenAI,AsyncOpenAI
|
||||
from pydantic import BaseModel
|
||||
from cognitive_architecture.config import Config
|
||||
from cognitive_architecture.utils import read_query_prompt
|
||||
from ..llm_interface import LLMInterface
|
||||
|
||||
#
|
||||
# config = Config()
|
||||
# config.load()
|
||||
|
||||
# aclient = instructor.apatch(AsyncOpenAI())
|
||||
# OPENAI_API_KEY = config.openai_key
|
||||
|
||||
class OpenAIAdapter(LLMInterface):
|
||||
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
|
||||
def __init__(self, api_key: str, model:str):
|
||||
openai.api_key = api_key
|
||||
self.aclient = instructor.apatch(AsyncOpenAI())
|
||||
self.model = model
|
||||
# OPENAI_API_KEY = config.openai_key
|
||||
|
||||
@staticmethod
|
||||
def retry_with_exponential_backoff(
|
||||
func,
|
||||
initial_delay: float = 1,
|
||||
exponential_base: float = 2,
|
||||
jitter: bool = True,
|
||||
max_retries: int = 20,
|
||||
errors: tuple = (openai.RateLimitError,),
|
||||
):
|
||||
"""Retry a function with exponential backoff."""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
"""Wrapper for sync functions."""
|
||||
# Initialize variables
|
||||
num_retries = 0
|
||||
delay = initial_delay
|
||||
|
||||
# Loop until a successful response or max_retries is hit or an exception is raised
|
||||
while True:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
# Retry on specified errors
|
||||
except errors:
|
||||
# Increment retries
|
||||
num_retries += 1
|
||||
|
||||
# Check if max retries has been reached
|
||||
if num_retries > max_retries:
|
||||
raise Exception(
|
||||
f"Maximum number of retries ({max_retries}) exceeded."
|
||||
)
|
||||
|
||||
# Increment the delay
|
||||
delay *= exponential_base * (1 + jitter * random.random())
|
||||
|
||||
# Sleep for the delay
|
||||
time.sleep(delay)
|
||||
|
||||
# Raise exceptions for any errors not specified
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@staticmethod
|
||||
async def aretry_with_exponential_backoff(
|
||||
func,
|
||||
initial_delay: float = 1,
|
||||
exponential_base: float = 2,
|
||||
jitter: bool = True,
|
||||
max_retries: int = 20,
|
||||
errors: tuple = (openai.RateLimitError,),
|
||||
):
|
||||
"""Retry a function with exponential backoff."""
|
||||
|
||||
async def wrapper(*args, **kwargs):
|
||||
"""Wrapper for async functions.
|
||||
:param args: list
|
||||
:param kwargs: dict"""
|
||||
# Initialize variables
|
||||
num_retries = 0
|
||||
delay = initial_delay
|
||||
|
||||
# Loop until a successful response or max_retries is hit or an exception is raised
|
||||
while True:
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
# Retry on specified errors
|
||||
except errors as e:
|
||||
print(f"acreate (backoff): caught error: {e}")
|
||||
# Increment retries
|
||||
num_retries += 1
|
||||
|
||||
# Check if max retries has been reached
|
||||
if num_retries > max_retries:
|
||||
raise Exception(
|
||||
f"Maximum number of retries ({max_retries}) exceeded."
|
||||
)
|
||||
|
||||
# Increment the delay
|
||||
delay *= exponential_base * (1 + jitter * random.random())
|
||||
|
||||
# Sleep for the delay
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
# Raise exceptions for any errors not specified
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@retry_with_exponential_backoff
|
||||
def completions_with_backoff(self, **kwargs):
|
||||
"""Wrapper around ChatCompletion.create w/ backoff"""
|
||||
# Local model
|
||||
return openai.chat.completions.create(**kwargs)
|
||||
|
||||
@aretry_with_exponential_backoff
|
||||
async def acompletions_with_backoff(self,**kwargs):
|
||||
"""Wrapper around ChatCompletion.acreate w/ backoff"""
|
||||
return await openai.chat.completions.acreate(**kwargs)
|
||||
|
||||
@aretry_with_exponential_backoff
|
||||
async def acreate_embedding_with_backoff(self,**kwargs):
|
||||
"""Wrapper around Embedding.acreate w/ backoff"""
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
# This is the default and can be omitted
|
||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||
)
|
||||
|
||||
return await client.embeddings.create(**kwargs)
|
||||
|
||||
async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"):
|
||||
"""To get text embeddings, import/call this function
|
||||
It specifies defaults + handles rate-limiting + is async"""
|
||||
text = text.replace("\n", " ")
|
||||
response = await self.acreate_embedding_with_backoff(input=[text], model=model)
|
||||
embedding = response.data[0].embedding
|
||||
return embedding
|
||||
|
||||
@retry_with_exponential_backoff
|
||||
def create_embedding_with_backoff(self, **kwargs):
|
||||
"""Wrapper around Embedding.create w/ backoff"""
|
||||
return openai.embeddings.create(**kwargs)
|
||||
|
||||
def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"):
|
||||
"""To get text embeddings, import/call this function
|
||||
It specifies defaults + handles rate-limiting
|
||||
:param text: str
|
||||
:param model: str
|
||||
"""
|
||||
text = text.replace("\n", " ")
|
||||
response = self.create_embedding_with_backoff(input=[text], model=model)
|
||||
embedding = response.data[0].embedding
|
||||
return embedding
|
||||
|
||||
async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]):
|
||||
"""To get multiple text embeddings in parallel, import/call this function
|
||||
It specifies defaults + handles rate-limiting + is async"""
|
||||
# Create a generator of coroutines
|
||||
coroutines = (self.async_get_embedding_with_backoff(text, model)
|
||||
for text, model in zip(texts, models))
|
||||
|
||||
# Run the coroutines in parallel and gather the results
|
||||
embeddings = await asyncio.gather(*coroutines)
|
||||
|
||||
return embeddings
|
||||
|
||||
async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel], model:str) -> BaseModel:
|
||||
"""Generate a response from a user query."""
|
||||
system_prompt = read_query_prompt(system_prompt_path)
|
||||
return self.aclient.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""Use the given format to
|
||||
extract information from the following input: {text_input}. """,
|
||||
},
|
||||
{"role": "system", "content": system_prompt},
|
||||
],
|
||||
response_model=response_model,
|
||||
)
|
||||
|
|
@ -158,7 +158,7 @@ def get_embedding_with_backoff(text:str, model:str="text-embedding-ada-002"):
|
|||
|
||||
|
||||
|
||||
async def async_get_multiple_embeddings_with_backoff(texts: List[str], models: List[str]) :
|
||||
async def async_get_batch_embeddings_with_backoff(texts: List[str], models: List[str]) :
|
||||
"""To get multiple text embeddings in parallel, import/call this function
|
||||
It specifies defaults + handles rate-limiting + is async"""
|
||||
# Create a generator of coroutines
|
||||
|
|
@ -3,8 +3,8 @@ import os
|
|||
import instructor
|
||||
from openai import OpenAI
|
||||
import logging
|
||||
from ..shared.data_models import KnowledgeGraph, MemorySummary
|
||||
from ..config import Config
|
||||
from cognitive_architecture.shared.data_models import KnowledgeGraph, MemorySummary
|
||||
from cognitive_architecture.config import Config
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
{
|
||||
"Natural Language Text": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Articles, essays, and reports",
|
||||
"Books and manuscripts",
|
||||
"News stories and blog posts",
|
||||
"Research papers and academic publications",
|
||||
"Social media posts and comments",
|
||||
"Website content and product descriptions",
|
||||
"Personal narratives and stories"
|
||||
]
|
||||
},
|
||||
"Structured Documents": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Spreadsheets and tables",
|
||||
"Forms and surveys",
|
||||
"Databases and CSV files"
|
||||
]
|
||||
},
|
||||
"Code and Scripts": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Source code in various programming languages",
|
||||
"Shell commands and scripts",
|
||||
"Markup languages (HTML, XML)",
|
||||
"Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
|
||||
]
|
||||
},
|
||||
"Conversational Data": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Chat transcripts and messaging history",
|
||||
"Customer service logs and interactions",
|
||||
"Conversational AI training data"
|
||||
]
|
||||
},
|
||||
"Educational Content": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Textbook content and lecture notes",
|
||||
"Exam questions and academic exercises",
|
||||
"E-learning course materials"
|
||||
]
|
||||
},
|
||||
"Creative Writing": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Poetry and prose",
|
||||
"Scripts for plays, movies, and television",
|
||||
"Song lyrics"
|
||||
]
|
||||
},
|
||||
"Technical Documentation": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Manuals and user guides",
|
||||
"Technical specifications and API documentation",
|
||||
"Helpdesk articles and FAQs"
|
||||
]
|
||||
},
|
||||
"Legal and Regulatory Documents": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Contracts and agreements",
|
||||
"Laws, regulations, and legal case documents",
|
||||
"Policy documents and compliance materials"
|
||||
]
|
||||
},
|
||||
"Medical and Scientific Texts": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Clinical trial reports",
|
||||
"Patient records and case notes",
|
||||
"Scientific journal articles"
|
||||
]
|
||||
},
|
||||
"Financial and Business Documents": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Financial reports and statements",
|
||||
"Business plans and proposals",
|
||||
"Market research and analysis reports"
|
||||
]
|
||||
},
|
||||
"Advertising and Marketing Materials": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Ad copies and marketing slogans",
|
||||
"Product catalogs and brochures",
|
||||
"Press releases and promotional content"
|
||||
]
|
||||
},
|
||||
"Emails and Correspondence": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Professional and formal correspondence",
|
||||
"Personal emails and letters"
|
||||
]
|
||||
},
|
||||
"Metadata and Annotations": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Image and video captions",
|
||||
"Annotations and metadata for various media"
|
||||
]
|
||||
},
|
||||
"Language Learning Materials": {
|
||||
"type": "TEXT",
|
||||
"subclass": [
|
||||
"Vocabulary lists and grammar rules",
|
||||
"Language exercises and quizzes"
|
||||
]
|
||||
},
|
||||
"Audio Content": {
|
||||
"type": "AUDIO",
|
||||
"subclass": [
|
||||
"Music tracks and albums",
|
||||
"Podcasts and radio broadcasts",
|
||||
"Audiobooks and audio guides",
|
||||
"Recorded interviews and speeches",
|
||||
"Sound effects and ambient sounds"
|
||||
]
|
||||
},
|
||||
"Image Content": {
|
||||
"type": "IMAGE",
|
||||
"subclass": [
|
||||
"Photographs and digital images",
|
||||
"Illustrations, diagrams, and charts",
|
||||
"Infographics and visual data representations",
|
||||
"Artwork and paintings",
|
||||
"Screenshots and graphical user interfaces"
|
||||
]
|
||||
},
|
||||
"Video Content": {
|
||||
"type": "VIDEO",
|
||||
"subclass": [
|
||||
"Movies and short films",
|
||||
"Documentaries and educational videos",
|
||||
"Video tutorials and how-to guides",
|
||||
"Animated features and cartoons",
|
||||
"Live event recordings and sports broadcasts"
|
||||
]
|
||||
},
|
||||
"Multimedia Content": {
|
||||
"type": "MULTIMEDIA",
|
||||
"subclass": [
|
||||
"Interactive web content and games",
|
||||
"Virtual reality (VR) and augmented reality (AR) experiences",
|
||||
"Mixed media presentations and slide decks",
|
||||
"E-learning modules with integrated multimedia",
|
||||
"Digital exhibitions and virtual tours"
|
||||
]
|
||||
},
|
||||
"3D Models and CAD Content": {
|
||||
"type": "3D_MODEL",
|
||||
"subclass": [
|
||||
"Architectural renderings and building plans",
|
||||
"Product design models and prototypes",
|
||||
"3D animations and character models",
|
||||
"Scientific simulations and visualizations",
|
||||
"Virtual objects for AR/VR environments"
|
||||
]
|
||||
},
|
||||
"Procedural Content": {
|
||||
"type": "PROCEDURAL",
|
||||
"subclass": [
|
||||
"Tutorials and step-by-step guides",
|
||||
"Workflow and process descriptions",
|
||||
"Simulation and training exercises",
|
||||
"Recipes and crafting instructions"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
You are tasked with analyzing a {{data_type}} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {{data_type}}
|
||||
These layers can help in understanding the content, context, and characteristics of the {{data_type}}
|
||||
Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
|
||||
Approach this task by considering the unique characteristics and inherent properties of the data at hand.
|
||||
VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {{layer_name}}
|
||||
Guidelines for Layer Extraction:
|
||||
Take into account: The content type that in this case is: {{layer_name}} should play a major role in how you decompose into layers.
|
||||
Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
|
||||
|
|
@ -1,8 +1,10 @@
|
|||
You are a top-tier algorithm
|
||||
designed for extracting information in structured formats to build a knowledge graph.
|
||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||
- **Edges** represent relationships between concepts. They're akin to Wikipedia links.
|
||||
- The aim is to achieve simplicity and clarity in the
|
||||
knowledge graph, making it accessible for a vast audience.
|
||||
YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {{layer}}
|
||||
## 2. Labeling Nodes
|
||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||
- For example, when you identify an entity representing a person,
|
||||
|
|
@ -31,4 +33,4 @@ always use the most complete identifier for that entity throughout the knowledge
|
|||
Remember, the knowledge graph should be coherent and easily understandable,
|
||||
so maintaining consistency in entity references is crucial.
|
||||
## 5. Strict Compliance
|
||||
Adhere to the rules strictly. Non-compliance will result in termination
|
||||
Adhere to the rules strictly. Non-compliance will result in termination"""
|
||||
1
cognitive_architecture/modules/cognify/__init__.py
Normal file
1
cognitive_architecture/modules/cognify/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .create_vector_memory import create_vector_memory
|
||||
1
cognitive_architecture/modules/cognify/llm/__init__.py
Normal file
1
cognitive_architecture/modules/cognify/llm/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .create_vector_memory import create_vector_memory
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
|
||||
async def content_to_cog_layers(memory_name: str, payload: list):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
# data_points = list()
|
||||
# for point in map(create_data_point, payload):
|
||||
# data_points.append(await point)
|
||||
|
||||
return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002")
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
|
||||
async def content_to_cog_layers(memory_name: str, payload: list):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
# data_points = list()
|
||||
# for point in map(create_data_point, payload):
|
||||
# data_points.append(await point)
|
||||
|
||||
return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002")
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
""" Content to Propositions"""
|
||||
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
|
||||
async def generate_graph(memory_name: str, payload: str):
|
||||
doc_path = "cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt"
|
||||
llm_client = get_llm_client()
|
||||
return await llm_client.generate_graph(memory_name, doc_path=doc_path,payload= payload)
|
||||
|
||||
|
|
@ -2,7 +2,7 @@ import uuid
|
|||
from typing import List
|
||||
from qdrant_client.models import PointStruct
|
||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
from cognitive_architecture.openai_tools import async_get_embedding_with_backoff
|
||||
from cognitive_architecture.infrastructure.llm.openai.openai_tools import async_get_embedding_with_backoff
|
||||
|
||||
async def create_information_points(memory_name: str, payload: List[str]):
|
||||
vector_db = get_vector_database()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Data models for the cognitive architecture."""
|
||||
from typing import Optional, List
|
||||
from enum import Enum
|
||||
from typing import Optional, List, Union
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
|
|
@ -39,3 +40,129 @@ class MemorySummary(BaseModel):
|
|||
""" Memory summary. """
|
||||
nodes: List[Node] = Field(..., default_factory=list)
|
||||
edges: List[Edge] = Field(..., default_factory=list)
|
||||
|
||||
|
||||
|
||||
class TextSubclass(str, Enum):
|
||||
ARTICLES = "Articles, essays, and reports"
|
||||
BOOKS = "Books and manuscripts"
|
||||
NEWS_STORIES = "News stories and blog posts"
|
||||
RESEARCH_PAPERS = "Research papers and academic publications"
|
||||
SOCIAL_MEDIA = "Social media posts and comments"
|
||||
WEBSITE_CONTENT = "Website content and product descriptions"
|
||||
PERSONAL_NARRATIVES = "Personal narratives and stories"
|
||||
SPREADSHEETS = "Spreadsheets and tables"
|
||||
FORMS = "Forms and surveys"
|
||||
DATABASES = "Databases and CSV files"
|
||||
SOURCE_CODE = "Source code in various programming languages"
|
||||
SHELL_SCRIPTS = "Shell commands and scripts"
|
||||
MARKUP_LANGUAGES = "Markup languages (HTML, XML)"
|
||||
STYLESHEETS = "Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
|
||||
CHAT_TRANSCRIPTS = "Chat transcripts and messaging history"
|
||||
CUSTOMER_SERVICE_LOGS = "Customer service logs and interactions"
|
||||
CONVERSATIONAL_AI = "Conversational AI training data"
|
||||
TEXTBOOK_CONTENT = "Textbook content and lecture notes"
|
||||
EXAM_QUESTIONS = "Exam questions and academic exercises"
|
||||
E_LEARNING_MATERIALS = "E-learning course materials"
|
||||
POETRY = "Poetry and prose"
|
||||
SCRIPTS = "Scripts for plays, movies, and television"
|
||||
SONG_LYRICS = "Song lyrics"
|
||||
MANUALS = "Manuals and user guides"
|
||||
TECH_SPECS = "Technical specifications and API documentation"
|
||||
HELPDESK_ARTICLES = "Helpdesk articles and FAQs"
|
||||
LEGAL_CONTRACTS = "Contracts and agreements"
|
||||
LAWS = "Laws, regulations, and legal case documents"
|
||||
POLICY_DOCUMENTS = "Policy documents and compliance materials"
|
||||
CLINICAL_TRIALS = "Clinical trial reports"
|
||||
PATIENT_RECORDS = "Patient records and case notes"
|
||||
SCIENTIFIC_ARTICLES = "Scientific journal articles"
|
||||
FINANCIAL_REPORTS = "Financial reports and statements"
|
||||
BUSINESS_PLANS = "Business plans and proposals"
|
||||
MARKET_RESEARCH = "Market research and analysis reports"
|
||||
AD_COPIES = "Ad copies and marketing slogans"
|
||||
PRODUCT_CATALOGS = "Product catalogs and brochures"
|
||||
PRESS_RELEASES = "Press releases and promotional content"
|
||||
PROFESSIONAL_EMAILS = "Professional and formal correspondence"
|
||||
PERSONAL_EMAILS = "Personal emails and letters"
|
||||
IMAGE_CAPTIONS = "Image and video captions"
|
||||
ANNOTATIONS = "Annotations and metadata for various media"
|
||||
VOCAB_LISTS = "Vocabulary lists and grammar rules"
|
||||
LANGUAGE_EXERCISES = "Language exercises and quizzes"
|
||||
|
||||
class AudioSubclass(str, Enum):
|
||||
MUSIC_TRACKS = "Music tracks and albums"
|
||||
PODCASTS = "Podcasts and radio broadcasts"
|
||||
AUDIOBOOKS = "Audiobooks and audio guides"
|
||||
INTERVIEWS = "Recorded interviews and speeches"
|
||||
SOUND_EFFECTS = "Sound effects and ambient sounds"
|
||||
|
||||
class ImageSubclass(str, Enum):
|
||||
PHOTOGRAPHS = "Photographs and digital images"
|
||||
ILLUSTRATIONS = "Illustrations, diagrams, and charts"
|
||||
INFOGRAPHICS = "Infographics and visual data representations"
|
||||
ARTWORK = "Artwork and paintings"
|
||||
SCREENSHOTS = "Screenshots and graphical user interfaces"
|
||||
|
||||
class VideoSubclass(str, Enum):
|
||||
MOVIES = "Movies and short films"
|
||||
DOCUMENTARIES = "Documentaries and educational videos"
|
||||
TUTORIALS = "Video tutorials and how-to guides"
|
||||
ANIMATED_FEATURES = "Animated features and cartoons"
|
||||
LIVE_EVENTS = "Live event recordings and sports broadcasts"
|
||||
|
||||
class MultimediaSubclass(str, Enum):
|
||||
WEB_CONTENT = "Interactive web content and games"
|
||||
VR_EXPERIENCES = "Virtual reality (VR) and augmented reality (AR) experiences"
|
||||
MIXED_MEDIA = "Mixed media presentations and slide decks"
|
||||
E_LEARNING_MODULES = "E-learning modules with integrated multimedia"
|
||||
DIGITAL_EXHIBITIONS = "Digital exhibitions and virtual tours"
|
||||
|
||||
class Model3DSubclass(str, Enum):
|
||||
ARCHITECTURAL_RENDERINGS = "Architectural renderings and building plans"
|
||||
PRODUCT_MODELS = "Product design models and prototypes"
|
||||
ANIMATIONS = "3D animations and character models"
|
||||
SCIENTIFIC_VISUALIZATIONS = "Scientific simulations and visualizations"
|
||||
VR_OBJECTS = "Virtual objects for AR/VR applications"
|
||||
|
||||
class ProceduralSubclass(str, Enum):
|
||||
TUTORIALS_GUIDES = "Tutorials and step-by-step guides"
|
||||
WORKFLOW_DESCRIPTIONS = "Workflow and process descriptions"
|
||||
SIMULATIONS = "Simulation and training exercises"
|
||||
RECIPES = "Recipes and crafting instructions"
|
||||
class ContentType(BaseModel):
|
||||
"""Base class for different types of content."""
|
||||
type: str
|
||||
|
||||
class TextContent(ContentType):
|
||||
type = "TEXT"
|
||||
subclass: List[TextSubclass]
|
||||
|
||||
class AudioContent(ContentType):
|
||||
type = "AUDIO"
|
||||
subclass: List[AudioSubclass]
|
||||
|
||||
class ImageContent(ContentType):
|
||||
type = "IMAGE"
|
||||
subclass: List[ImageSubclass]
|
||||
|
||||
class VideoContent(ContentType):
|
||||
type = "VIDEO"
|
||||
subclass: List[VideoSubclass]
|
||||
|
||||
class MultimediaContent(ContentType):
|
||||
type = "MULTIMEDIA"
|
||||
subclass: List[MultimediaSubclass]
|
||||
|
||||
class Model3DContent(ContentType):
|
||||
type = "3D_MODEL"
|
||||
subclass: List[Model3DSubclass]
|
||||
|
||||
class ProceduralContent(ContentType):
|
||||
type = "PROCEDURAL"
|
||||
subclass: List[ProceduralSubclass]
|
||||
|
||||
class SinglePrediction(BaseModel):
|
||||
"""Class for a single class label prediction."""
|
||||
|
||||
label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import os
|
|||
import random
|
||||
import string
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from graphviz import Digraph
|
||||
from sqlalchemy import or_
|
||||
|
|
@ -284,3 +285,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
|
|||
return None
|
||||
|
||||
|
||||
def read_query_prompt(filename: str) -> str:
|
||||
"""Read a query prompt from a file."""
|
||||
file_path = Path(filename)
|
||||
try:
|
||||
return file_path.read_text()
|
||||
except FileNotFoundError:
|
||||
logging.error(f"File not found: {file_path.absolute()}")
|
||||
except Exception as e:
|
||||
logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}")
|
||||
return None
|
||||
Loading…
Add table
Reference in a new issue