Added graph intefrace, added neo4j + networkx structure and updates to the notebook
This commit is contained in:
parent
5426f68d2c
commit
2433e4ed93
21 changed files with 618 additions and 21 deletions
|
|
@ -4022,7 +4022,13 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n"
|
"\n",
|
||||||
|
"#pre filtering\n",
|
||||||
|
"# each semantic layer -> make categories, dimensions, on semantic layer given on the LLM\n",
|
||||||
|
"# weights need to be used topk and cutoff\n",
|
||||||
|
"# entry through entities\n",
|
||||||
|
"# combine unstructured and structured\n",
|
||||||
|
"# address / entrypoint node/ "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from neo4j import AsyncSession
|
from neo4j import AsyncSession
|
||||||
|
|
@ -6,32 +5,23 @@ from neo4j.exceptions import Neo4jError
|
||||||
|
|
||||||
print(os.getcwd())
|
print(os.getcwd())
|
||||||
|
|
||||||
import networkx as nx
|
|
||||||
|
|
||||||
from langchain.graphs import Neo4jGraph
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import openai
|
|
||||||
import instructor
|
import instructor
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from openai import AsyncOpenAI
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
# Adds response_model to ChatCompletion
|
# Adds response_model to ChatCompletion
|
||||||
# Allows the return of Pydantic model rather than raw JSON
|
# Allows the return of Pydantic model rather than raw JSON
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from typing import List, Dict, Optional
|
|
||||||
from ...utils import (
|
from ...utils import (
|
||||||
format_dict,
|
format_dict,
|
||||||
append_uuid_to_variable_names,
|
append_uuid_to_variable_names,
|
||||||
create_edge_variable_mapping,
|
create_edge_variable_mapping,
|
||||||
create_node_variable_mapping,
|
create_node_variable_mapping,
|
||||||
get_unsumarized_vector_db_namespace,
|
|
||||||
)
|
)
|
||||||
from ...llm.queries import generate_summary, generate_graph
|
from cognitive_architecture.infrastructure.llm.openai.queries import generate_summary, generate_graph
|
||||||
import logging
|
import logging
|
||||||
from neo4j import AsyncGraphDatabase
|
from neo4j import AsyncGraphDatabase
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
@ -45,11 +35,8 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
from ...config import Config
|
from ...config import Config
|
||||||
|
|
||||||
from ...shared.data_models import (
|
from ...shared.data_models import (
|
||||||
Node,
|
|
||||||
Edge,
|
|
||||||
KnowledgeGraph,
|
KnowledgeGraph,
|
||||||
GraphQLQuery,
|
GraphQLQuery,
|
||||||
MemorySummary,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
|
||||||
0
cognitive_architecture/infrastructure/__init__.py
Normal file
0
cognitive_architecture/infrastructure/__init__.py
Normal file
0
cognitive_architecture/infrastructure/llm/__init__.py
Normal file
0
cognitive_architecture/infrastructure/llm/__init__.py
Normal file
10
cognitive_architecture/infrastructure/llm/get_llm_client.py
Normal file
10
cognitive_architecture/infrastructure/llm/get_llm_client.py
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
"""Get the LLM client."""
|
||||||
|
from cognitive_architecture.config import Config
|
||||||
|
from .openai.adapter import OpenAIAdapter
|
||||||
|
|
||||||
|
config = Config()
|
||||||
|
config.load()
|
||||||
|
|
||||||
|
def get_llm_client():
|
||||||
|
"""Get the LLM client."""
|
||||||
|
return OpenAIAdapter(config.openai_key, config.model)
|
||||||
35
cognitive_architecture/infrastructure/llm/llm_interface.py
Normal file
35
cognitive_architecture/infrastructure/llm/llm_interface.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
""" LLM Interface """
|
||||||
|
|
||||||
|
from typing import List, Type, Protocol
|
||||||
|
from abc import abstractmethod
|
||||||
|
from pydantic import BaseModel
|
||||||
|
class LLMInterface(Protocol):
|
||||||
|
""" LLM Interface """
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"):
|
||||||
|
"""To get text embeddings, import/call this function"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"):
|
||||||
|
"""To get text embeddings, import/call this function"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]):
|
||||||
|
"""To get multiple text embeddings in parallel, import/call this function"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
# """ Get completions """
|
||||||
|
# async def acompletions_with_backoff(self, **kwargs):
|
||||||
|
# raise NotImplementedError
|
||||||
|
#
|
||||||
|
""" Structured output """
|
||||||
|
@abstractmethod
|
||||||
|
async def acreate_structured_output(self,
|
||||||
|
text_input: str,
|
||||||
|
system_prompt_path: str,
|
||||||
|
response_model: Type[BaseModel]) -> BaseModel:
|
||||||
|
"""To get structured output, import/call this function"""
|
||||||
|
raise NotImplementedError
|
||||||
197
cognitive_architecture/infrastructure/llm/openai/adapter.py
Normal file
197
cognitive_architecture/infrastructure/llm/openai/adapter.py
Normal file
|
|
@ -0,0 +1,197 @@
|
||||||
|
"""Adapter for OpenAI's GPT-3, GPT=4 API."""
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import asyncio
|
||||||
|
from typing import List, Type
|
||||||
|
import openai
|
||||||
|
import instructor
|
||||||
|
from openai import OpenAI,AsyncOpenAI
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from cognitive_architecture.config import Config
|
||||||
|
from cognitive_architecture.utils import read_query_prompt
|
||||||
|
from ..llm_interface import LLMInterface
|
||||||
|
|
||||||
|
#
|
||||||
|
# config = Config()
|
||||||
|
# config.load()
|
||||||
|
|
||||||
|
# aclient = instructor.apatch(AsyncOpenAI())
|
||||||
|
# OPENAI_API_KEY = config.openai_key
|
||||||
|
|
||||||
|
class OpenAIAdapter(LLMInterface):
|
||||||
|
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
|
||||||
|
def __init__(self, api_key: str, model:str):
|
||||||
|
openai.api_key = api_key
|
||||||
|
self.aclient = instructor.apatch(AsyncOpenAI())
|
||||||
|
self.model = model
|
||||||
|
# OPENAI_API_KEY = config.openai_key
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def retry_with_exponential_backoff(
|
||||||
|
func,
|
||||||
|
initial_delay: float = 1,
|
||||||
|
exponential_base: float = 2,
|
||||||
|
jitter: bool = True,
|
||||||
|
max_retries: int = 20,
|
||||||
|
errors: tuple = (openai.RateLimitError,),
|
||||||
|
):
|
||||||
|
"""Retry a function with exponential backoff."""
|
||||||
|
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
"""Wrapper for sync functions."""
|
||||||
|
# Initialize variables
|
||||||
|
num_retries = 0
|
||||||
|
delay = initial_delay
|
||||||
|
|
||||||
|
# Loop until a successful response or max_retries is hit or an exception is raised
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
# Retry on specified errors
|
||||||
|
except errors:
|
||||||
|
# Increment retries
|
||||||
|
num_retries += 1
|
||||||
|
|
||||||
|
# Check if max retries has been reached
|
||||||
|
if num_retries > max_retries:
|
||||||
|
raise Exception(
|
||||||
|
f"Maximum number of retries ({max_retries}) exceeded."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Increment the delay
|
||||||
|
delay *= exponential_base * (1 + jitter * random.random())
|
||||||
|
|
||||||
|
# Sleep for the delay
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
# Raise exceptions for any errors not specified
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def aretry_with_exponential_backoff(
|
||||||
|
func,
|
||||||
|
initial_delay: float = 1,
|
||||||
|
exponential_base: float = 2,
|
||||||
|
jitter: bool = True,
|
||||||
|
max_retries: int = 20,
|
||||||
|
errors: tuple = (openai.RateLimitError,),
|
||||||
|
):
|
||||||
|
"""Retry a function with exponential backoff."""
|
||||||
|
|
||||||
|
async def wrapper(*args, **kwargs):
|
||||||
|
"""Wrapper for async functions.
|
||||||
|
:param args: list
|
||||||
|
:param kwargs: dict"""
|
||||||
|
# Initialize variables
|
||||||
|
num_retries = 0
|
||||||
|
delay = initial_delay
|
||||||
|
|
||||||
|
# Loop until a successful response or max_retries is hit or an exception is raised
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
return await func(*args, **kwargs)
|
||||||
|
|
||||||
|
# Retry on specified errors
|
||||||
|
except errors as e:
|
||||||
|
print(f"acreate (backoff): caught error: {e}")
|
||||||
|
# Increment retries
|
||||||
|
num_retries += 1
|
||||||
|
|
||||||
|
# Check if max retries has been reached
|
||||||
|
if num_retries > max_retries:
|
||||||
|
raise Exception(
|
||||||
|
f"Maximum number of retries ({max_retries}) exceeded."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Increment the delay
|
||||||
|
delay *= exponential_base * (1 + jitter * random.random())
|
||||||
|
|
||||||
|
# Sleep for the delay
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
|
||||||
|
# Raise exceptions for any errors not specified
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
@retry_with_exponential_backoff
|
||||||
|
def completions_with_backoff(self, **kwargs):
|
||||||
|
"""Wrapper around ChatCompletion.create w/ backoff"""
|
||||||
|
# Local model
|
||||||
|
return openai.chat.completions.create(**kwargs)
|
||||||
|
|
||||||
|
@aretry_with_exponential_backoff
|
||||||
|
async def acompletions_with_backoff(self,**kwargs):
|
||||||
|
"""Wrapper around ChatCompletion.acreate w/ backoff"""
|
||||||
|
return await openai.chat.completions.acreate(**kwargs)
|
||||||
|
|
||||||
|
@aretry_with_exponential_backoff
|
||||||
|
async def acreate_embedding_with_backoff(self,**kwargs):
|
||||||
|
"""Wrapper around Embedding.acreate w/ backoff"""
|
||||||
|
|
||||||
|
client = openai.AsyncOpenAI(
|
||||||
|
# This is the default and can be omitted
|
||||||
|
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||||
|
)
|
||||||
|
|
||||||
|
return await client.embeddings.create(**kwargs)
|
||||||
|
|
||||||
|
async def async_get_embedding_with_backoff(self, text, model="text-embedding-ada-002"):
|
||||||
|
"""To get text embeddings, import/call this function
|
||||||
|
It specifies defaults + handles rate-limiting + is async"""
|
||||||
|
text = text.replace("\n", " ")
|
||||||
|
response = await self.acreate_embedding_with_backoff(input=[text], model=model)
|
||||||
|
embedding = response.data[0].embedding
|
||||||
|
return embedding
|
||||||
|
|
||||||
|
@retry_with_exponential_backoff
|
||||||
|
def create_embedding_with_backoff(self, **kwargs):
|
||||||
|
"""Wrapper around Embedding.create w/ backoff"""
|
||||||
|
return openai.embeddings.create(**kwargs)
|
||||||
|
|
||||||
|
def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"):
|
||||||
|
"""To get text embeddings, import/call this function
|
||||||
|
It specifies defaults + handles rate-limiting
|
||||||
|
:param text: str
|
||||||
|
:param model: str
|
||||||
|
"""
|
||||||
|
text = text.replace("\n", " ")
|
||||||
|
response = self.create_embedding_with_backoff(input=[text], model=model)
|
||||||
|
embedding = response.data[0].embedding
|
||||||
|
return embedding
|
||||||
|
|
||||||
|
async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]):
|
||||||
|
"""To get multiple text embeddings in parallel, import/call this function
|
||||||
|
It specifies defaults + handles rate-limiting + is async"""
|
||||||
|
# Create a generator of coroutines
|
||||||
|
coroutines = (self.async_get_embedding_with_backoff(text, model)
|
||||||
|
for text, model in zip(texts, models))
|
||||||
|
|
||||||
|
# Run the coroutines in parallel and gather the results
|
||||||
|
embeddings = await asyncio.gather(*coroutines)
|
||||||
|
|
||||||
|
return embeddings
|
||||||
|
|
||||||
|
async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel], model:str) -> BaseModel:
|
||||||
|
"""Generate a response from a user query."""
|
||||||
|
system_prompt = read_query_prompt(system_prompt_path)
|
||||||
|
return self.aclient.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"""Use the given format to
|
||||||
|
extract information from the following input: {text_input}. """,
|
||||||
|
},
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
],
|
||||||
|
response_model=response_model,
|
||||||
|
)
|
||||||
|
|
@ -158,7 +158,7 @@ def get_embedding_with_backoff(text:str, model:str="text-embedding-ada-002"):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def async_get_multiple_embeddings_with_backoff(texts: List[str], models: List[str]) :
|
async def async_get_batch_embeddings_with_backoff(texts: List[str], models: List[str]) :
|
||||||
"""To get multiple text embeddings in parallel, import/call this function
|
"""To get multiple text embeddings in parallel, import/call this function
|
||||||
It specifies defaults + handles rate-limiting + is async"""
|
It specifies defaults + handles rate-limiting + is async"""
|
||||||
# Create a generator of coroutines
|
# Create a generator of coroutines
|
||||||
|
|
@ -3,8 +3,8 @@ import os
|
||||||
import instructor
|
import instructor
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
import logging
|
import logging
|
||||||
from ..shared.data_models import KnowledgeGraph, MemorySummary
|
from cognitive_architecture.shared.data_models import KnowledgeGraph, MemorySummary
|
||||||
from ..config import Config
|
from cognitive_architecture.config import Config
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,175 @@
|
||||||
|
{
|
||||||
|
"Natural Language Text": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Articles, essays, and reports",
|
||||||
|
"Books and manuscripts",
|
||||||
|
"News stories and blog posts",
|
||||||
|
"Research papers and academic publications",
|
||||||
|
"Social media posts and comments",
|
||||||
|
"Website content and product descriptions",
|
||||||
|
"Personal narratives and stories"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Structured Documents": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Spreadsheets and tables",
|
||||||
|
"Forms and surveys",
|
||||||
|
"Databases and CSV files"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Code and Scripts": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Source code in various programming languages",
|
||||||
|
"Shell commands and scripts",
|
||||||
|
"Markup languages (HTML, XML)",
|
||||||
|
"Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Conversational Data": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Chat transcripts and messaging history",
|
||||||
|
"Customer service logs and interactions",
|
||||||
|
"Conversational AI training data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Educational Content": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Textbook content and lecture notes",
|
||||||
|
"Exam questions and academic exercises",
|
||||||
|
"E-learning course materials"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Creative Writing": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Poetry and prose",
|
||||||
|
"Scripts for plays, movies, and television",
|
||||||
|
"Song lyrics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Technical Documentation": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Manuals and user guides",
|
||||||
|
"Technical specifications and API documentation",
|
||||||
|
"Helpdesk articles and FAQs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Legal and Regulatory Documents": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Contracts and agreements",
|
||||||
|
"Laws, regulations, and legal case documents",
|
||||||
|
"Policy documents and compliance materials"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Medical and Scientific Texts": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Clinical trial reports",
|
||||||
|
"Patient records and case notes",
|
||||||
|
"Scientific journal articles"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Financial and Business Documents": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Financial reports and statements",
|
||||||
|
"Business plans and proposals",
|
||||||
|
"Market research and analysis reports"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Advertising and Marketing Materials": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Ad copies and marketing slogans",
|
||||||
|
"Product catalogs and brochures",
|
||||||
|
"Press releases and promotional content"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Emails and Correspondence": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Professional and formal correspondence",
|
||||||
|
"Personal emails and letters"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Metadata and Annotations": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Image and video captions",
|
||||||
|
"Annotations and metadata for various media"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Language Learning Materials": {
|
||||||
|
"type": "TEXT",
|
||||||
|
"subclass": [
|
||||||
|
"Vocabulary lists and grammar rules",
|
||||||
|
"Language exercises and quizzes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Audio Content": {
|
||||||
|
"type": "AUDIO",
|
||||||
|
"subclass": [
|
||||||
|
"Music tracks and albums",
|
||||||
|
"Podcasts and radio broadcasts",
|
||||||
|
"Audiobooks and audio guides",
|
||||||
|
"Recorded interviews and speeches",
|
||||||
|
"Sound effects and ambient sounds"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Image Content": {
|
||||||
|
"type": "IMAGE",
|
||||||
|
"subclass": [
|
||||||
|
"Photographs and digital images",
|
||||||
|
"Illustrations, diagrams, and charts",
|
||||||
|
"Infographics and visual data representations",
|
||||||
|
"Artwork and paintings",
|
||||||
|
"Screenshots and graphical user interfaces"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Video Content": {
|
||||||
|
"type": "VIDEO",
|
||||||
|
"subclass": [
|
||||||
|
"Movies and short films",
|
||||||
|
"Documentaries and educational videos",
|
||||||
|
"Video tutorials and how-to guides",
|
||||||
|
"Animated features and cartoons",
|
||||||
|
"Live event recordings and sports broadcasts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Multimedia Content": {
|
||||||
|
"type": "MULTIMEDIA",
|
||||||
|
"subclass": [
|
||||||
|
"Interactive web content and games",
|
||||||
|
"Virtual reality (VR) and augmented reality (AR) experiences",
|
||||||
|
"Mixed media presentations and slide decks",
|
||||||
|
"E-learning modules with integrated multimedia",
|
||||||
|
"Digital exhibitions and virtual tours"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"3D Models and CAD Content": {
|
||||||
|
"type": "3D_MODEL",
|
||||||
|
"subclass": [
|
||||||
|
"Architectural renderings and building plans",
|
||||||
|
"Product design models and prototypes",
|
||||||
|
"3D animations and character models",
|
||||||
|
"Scientific simulations and visualizations",
|
||||||
|
"Virtual objects for AR/VR environments"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Procedural Content": {
|
||||||
|
"type": "PROCEDURAL",
|
||||||
|
"subclass": [
|
||||||
|
"Tutorials and step-by-step guides",
|
||||||
|
"Workflow and process descriptions",
|
||||||
|
"Simulation and training exercises",
|
||||||
|
"Recipes and crafting instructions"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
You are tasked with analyzing a {{data_type}} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {{data_type}}
|
||||||
|
These layers can help in understanding the content, context, and characteristics of the {{data_type}}
|
||||||
|
Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
|
||||||
|
Approach this task by considering the unique characteristics and inherent properties of the data at hand.
|
||||||
|
VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {{layer_name}}
|
||||||
|
Guidelines for Layer Extraction:
|
||||||
|
Take into account: The content type that in this case is: {{layer_name}} should play a major role in how you decompose into layers.
|
||||||
|
Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
You are a top-tier algorithm
|
You are a top-tier algorithm
|
||||||
designed for extracting information in structured formats to build a knowledge graph.
|
designed for extracting information in structured formats to build a knowledge graph.
|
||||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||||
|
- **Edges** represent relationships between concepts. They're akin to Wikipedia links.
|
||||||
- The aim is to achieve simplicity and clarity in the
|
- The aim is to achieve simplicity and clarity in the
|
||||||
knowledge graph, making it accessible for a vast audience.
|
knowledge graph, making it accessible for a vast audience.
|
||||||
|
YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {{layer}}
|
||||||
## 2. Labeling Nodes
|
## 2. Labeling Nodes
|
||||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||||
- For example, when you identify an entity representing a person,
|
- For example, when you identify an entity representing a person,
|
||||||
|
|
@ -31,4 +33,4 @@ always use the most complete identifier for that entity throughout the knowledge
|
||||||
Remember, the knowledge graph should be coherent and easily understandable,
|
Remember, the knowledge graph should be coherent and easily understandable,
|
||||||
so maintaining consistency in entity references is crucial.
|
so maintaining consistency in entity references is crucial.
|
||||||
## 5. Strict Compliance
|
## 5. Strict Compliance
|
||||||
Adhere to the rules strictly. Non-compliance will result in termination
|
Adhere to the rules strictly. Non-compliance will result in termination"""
|
||||||
1
cognitive_architecture/modules/cognify/__init__.py
Normal file
1
cognitive_architecture/modules/cognify/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .create_vector_memory import create_vector_memory
|
||||||
1
cognitive_architecture/modules/cognify/llm/__init__.py
Normal file
1
cognitive_architecture/modules/cognify/llm/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .create_vector_memory import create_vector_memory
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
|
||||||
|
|
||||||
|
async def content_to_cog_layers(memory_name: str, payload: list):
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
|
||||||
|
# data_points = list()
|
||||||
|
# for point in map(create_data_point, payload):
|
||||||
|
# data_points.append(await point)
|
||||||
|
|
||||||
|
return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
|
||||||
|
|
||||||
|
async def content_to_cog_layers(memory_name: str, payload: list):
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
|
||||||
|
# data_points = list()
|
||||||
|
# for point in map(create_data_point, payload):
|
||||||
|
# data_points.append(await point)
|
||||||
|
|
||||||
|
return await llm_client.acreate_structured_output(memory_name, payload, model="text-embedding-ada-002")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
""" Content to Propositions"""
|
||||||
|
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_graph(memory_name: str, payload: str):
|
||||||
|
doc_path = "cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt"
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
return await llm_client.generate_graph(memory_name, doc_path=doc_path,payload= payload)
|
||||||
|
|
||||||
|
|
@ -2,7 +2,7 @@ import uuid
|
||||||
from typing import List
|
from typing import List
|
||||||
from qdrant_client.models import PointStruct
|
from qdrant_client.models import PointStruct
|
||||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||||
from cognitive_architecture.openai_tools import async_get_embedding_with_backoff
|
from cognitive_architecture.infrastructure.llm.openai.openai_tools import async_get_embedding_with_backoff
|
||||||
|
|
||||||
async def create_information_points(memory_name: str, payload: List[str]):
|
async def create_information_points(memory_name: str, payload: List[str]):
|
||||||
vector_db = get_vector_database()
|
vector_db = get_vector_database()
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
"""Data models for the cognitive architecture."""
|
"""Data models for the cognitive architecture."""
|
||||||
from typing import Optional, List
|
from enum import Enum
|
||||||
|
from typing import Optional, List, Union
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -39,3 +40,129 @@ class MemorySummary(BaseModel):
|
||||||
""" Memory summary. """
|
""" Memory summary. """
|
||||||
nodes: List[Node] = Field(..., default_factory=list)
|
nodes: List[Node] = Field(..., default_factory=list)
|
||||||
edges: List[Edge] = Field(..., default_factory=list)
|
edges: List[Edge] = Field(..., default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TextSubclass(str, Enum):
|
||||||
|
ARTICLES = "Articles, essays, and reports"
|
||||||
|
BOOKS = "Books and manuscripts"
|
||||||
|
NEWS_STORIES = "News stories and blog posts"
|
||||||
|
RESEARCH_PAPERS = "Research papers and academic publications"
|
||||||
|
SOCIAL_MEDIA = "Social media posts and comments"
|
||||||
|
WEBSITE_CONTENT = "Website content and product descriptions"
|
||||||
|
PERSONAL_NARRATIVES = "Personal narratives and stories"
|
||||||
|
SPREADSHEETS = "Spreadsheets and tables"
|
||||||
|
FORMS = "Forms and surveys"
|
||||||
|
DATABASES = "Databases and CSV files"
|
||||||
|
SOURCE_CODE = "Source code in various programming languages"
|
||||||
|
SHELL_SCRIPTS = "Shell commands and scripts"
|
||||||
|
MARKUP_LANGUAGES = "Markup languages (HTML, XML)"
|
||||||
|
STYLESHEETS = "Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
|
||||||
|
CHAT_TRANSCRIPTS = "Chat transcripts and messaging history"
|
||||||
|
CUSTOMER_SERVICE_LOGS = "Customer service logs and interactions"
|
||||||
|
CONVERSATIONAL_AI = "Conversational AI training data"
|
||||||
|
TEXTBOOK_CONTENT = "Textbook content and lecture notes"
|
||||||
|
EXAM_QUESTIONS = "Exam questions and academic exercises"
|
||||||
|
E_LEARNING_MATERIALS = "E-learning course materials"
|
||||||
|
POETRY = "Poetry and prose"
|
||||||
|
SCRIPTS = "Scripts for plays, movies, and television"
|
||||||
|
SONG_LYRICS = "Song lyrics"
|
||||||
|
MANUALS = "Manuals and user guides"
|
||||||
|
TECH_SPECS = "Technical specifications and API documentation"
|
||||||
|
HELPDESK_ARTICLES = "Helpdesk articles and FAQs"
|
||||||
|
LEGAL_CONTRACTS = "Contracts and agreements"
|
||||||
|
LAWS = "Laws, regulations, and legal case documents"
|
||||||
|
POLICY_DOCUMENTS = "Policy documents and compliance materials"
|
||||||
|
CLINICAL_TRIALS = "Clinical trial reports"
|
||||||
|
PATIENT_RECORDS = "Patient records and case notes"
|
||||||
|
SCIENTIFIC_ARTICLES = "Scientific journal articles"
|
||||||
|
FINANCIAL_REPORTS = "Financial reports and statements"
|
||||||
|
BUSINESS_PLANS = "Business plans and proposals"
|
||||||
|
MARKET_RESEARCH = "Market research and analysis reports"
|
||||||
|
AD_COPIES = "Ad copies and marketing slogans"
|
||||||
|
PRODUCT_CATALOGS = "Product catalogs and brochures"
|
||||||
|
PRESS_RELEASES = "Press releases and promotional content"
|
||||||
|
PROFESSIONAL_EMAILS = "Professional and formal correspondence"
|
||||||
|
PERSONAL_EMAILS = "Personal emails and letters"
|
||||||
|
IMAGE_CAPTIONS = "Image and video captions"
|
||||||
|
ANNOTATIONS = "Annotations and metadata for various media"
|
||||||
|
VOCAB_LISTS = "Vocabulary lists and grammar rules"
|
||||||
|
LANGUAGE_EXERCISES = "Language exercises and quizzes"
|
||||||
|
|
||||||
|
class AudioSubclass(str, Enum):
|
||||||
|
MUSIC_TRACKS = "Music tracks and albums"
|
||||||
|
PODCASTS = "Podcasts and radio broadcasts"
|
||||||
|
AUDIOBOOKS = "Audiobooks and audio guides"
|
||||||
|
INTERVIEWS = "Recorded interviews and speeches"
|
||||||
|
SOUND_EFFECTS = "Sound effects and ambient sounds"
|
||||||
|
|
||||||
|
class ImageSubclass(str, Enum):
|
||||||
|
PHOTOGRAPHS = "Photographs and digital images"
|
||||||
|
ILLUSTRATIONS = "Illustrations, diagrams, and charts"
|
||||||
|
INFOGRAPHICS = "Infographics and visual data representations"
|
||||||
|
ARTWORK = "Artwork and paintings"
|
||||||
|
SCREENSHOTS = "Screenshots and graphical user interfaces"
|
||||||
|
|
||||||
|
class VideoSubclass(str, Enum):
|
||||||
|
MOVIES = "Movies and short films"
|
||||||
|
DOCUMENTARIES = "Documentaries and educational videos"
|
||||||
|
TUTORIALS = "Video tutorials and how-to guides"
|
||||||
|
ANIMATED_FEATURES = "Animated features and cartoons"
|
||||||
|
LIVE_EVENTS = "Live event recordings and sports broadcasts"
|
||||||
|
|
||||||
|
class MultimediaSubclass(str, Enum):
|
||||||
|
WEB_CONTENT = "Interactive web content and games"
|
||||||
|
VR_EXPERIENCES = "Virtual reality (VR) and augmented reality (AR) experiences"
|
||||||
|
MIXED_MEDIA = "Mixed media presentations and slide decks"
|
||||||
|
E_LEARNING_MODULES = "E-learning modules with integrated multimedia"
|
||||||
|
DIGITAL_EXHIBITIONS = "Digital exhibitions and virtual tours"
|
||||||
|
|
||||||
|
class Model3DSubclass(str, Enum):
|
||||||
|
ARCHITECTURAL_RENDERINGS = "Architectural renderings and building plans"
|
||||||
|
PRODUCT_MODELS = "Product design models and prototypes"
|
||||||
|
ANIMATIONS = "3D animations and character models"
|
||||||
|
SCIENTIFIC_VISUALIZATIONS = "Scientific simulations and visualizations"
|
||||||
|
VR_OBJECTS = "Virtual objects for AR/VR applications"
|
||||||
|
|
||||||
|
class ProceduralSubclass(str, Enum):
|
||||||
|
TUTORIALS_GUIDES = "Tutorials and step-by-step guides"
|
||||||
|
WORKFLOW_DESCRIPTIONS = "Workflow and process descriptions"
|
||||||
|
SIMULATIONS = "Simulation and training exercises"
|
||||||
|
RECIPES = "Recipes and crafting instructions"
|
||||||
|
class ContentType(BaseModel):
|
||||||
|
"""Base class for different types of content."""
|
||||||
|
type: str
|
||||||
|
|
||||||
|
class TextContent(ContentType):
|
||||||
|
type = "TEXT"
|
||||||
|
subclass: List[TextSubclass]
|
||||||
|
|
||||||
|
class AudioContent(ContentType):
|
||||||
|
type = "AUDIO"
|
||||||
|
subclass: List[AudioSubclass]
|
||||||
|
|
||||||
|
class ImageContent(ContentType):
|
||||||
|
type = "IMAGE"
|
||||||
|
subclass: List[ImageSubclass]
|
||||||
|
|
||||||
|
class VideoContent(ContentType):
|
||||||
|
type = "VIDEO"
|
||||||
|
subclass: List[VideoSubclass]
|
||||||
|
|
||||||
|
class MultimediaContent(ContentType):
|
||||||
|
type = "MULTIMEDIA"
|
||||||
|
subclass: List[MultimediaSubclass]
|
||||||
|
|
||||||
|
class Model3DContent(ContentType):
|
||||||
|
type = "3D_MODEL"
|
||||||
|
subclass: List[Model3DSubclass]
|
||||||
|
|
||||||
|
class ProceduralContent(ContentType):
|
||||||
|
type = "PROCEDURAL"
|
||||||
|
subclass: List[ProceduralSubclass]
|
||||||
|
|
||||||
|
class SinglePrediction(BaseModel):
|
||||||
|
"""Class for a single class label prediction."""
|
||||||
|
|
||||||
|
label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import os
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from graphviz import Digraph
|
from graphviz import Digraph
|
||||||
from sqlalchemy import or_
|
from sqlalchemy import or_
|
||||||
|
|
@ -284,3 +285,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def read_query_prompt(filename: str) -> str:
|
||||||
|
"""Read a query prompt from a file."""
|
||||||
|
file_path = Path(filename)
|
||||||
|
try:
|
||||||
|
return file_path.read_text()
|
||||||
|
except FileNotFoundError:
|
||||||
|
logging.error(f"File not found: {file_path.absolute()}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}")
|
||||||
|
return None
|
||||||
Loading…
Add table
Reference in a new issue