Revert the lvl2
This commit is contained in:
parent
1a284ec127
commit
317223d866
21 changed files with 234 additions and 1050 deletions
|
|
@ -1,7 +1,6 @@
|
|||
from langchain.document_loaders import PyPDFLoader
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from shared.chunk_strategy import ChunkStrategy
|
||||
|
||||
from level_2.shared.chunk_strategy import ChunkStrategy
|
||||
import re
|
||||
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):
|
||||
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
import os
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from contextlib import contextmanager
|
||||
from sqlalchemy.exc import OperationalError
|
||||
from time import sleep
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# this is needed to import classes from other modules
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
# Get the parent directory of your script and add it to sys.path
|
||||
parent_dir = os.path.dirname(script_dir)
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
|
||||
# in seconds
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5
|
||||
|
||||
username = os.getenv('POSTGRES_USER')
|
||||
password = os.getenv('POSTGRES_PASSWORD')
|
||||
database_name = os.getenv('POSTGRES_DB')
|
||||
host = os.getenv('POSTGRES_HOST')
|
||||
|
||||
|
||||
|
||||
SQLALCHEMY_DATABASE_URL = f"postgresql://{username}:{password}@{host}:5432/{database_name}"
|
||||
|
||||
engine = create_engine(
|
||||
SQLALCHEMY_DATABASE_URL,
|
||||
pool_recycle=3600, # recycle connections after 1 hour
|
||||
pool_pre_ping=True # test the connection for liveness upon each checkout
|
||||
)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
@contextmanager
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def safe_db_operation(db_op, *args, **kwargs):
|
||||
for attempt in range(MAX_RETRIES):
|
||||
with get_db() as db:
|
||||
try:
|
||||
return db_op(db, *args, **kwargs)
|
||||
except OperationalError as e:
|
||||
db.rollback()
|
||||
if "server closed the connection unexpectedly" in str(e) and attempt < MAX_RETRIES - 1:
|
||||
sleep(RETRY_DELAY)
|
||||
else:
|
||||
raise
|
||||
|
|
@ -14,18 +14,9 @@ services:
|
|||
ports:
|
||||
- 8000:8000
|
||||
- 443:443
|
||||
postgres:
|
||||
image: postgres
|
||||
container_name: postgres
|
||||
environment:
|
||||
- POSTGRES_HOST_AUTH_METHOD= trust
|
||||
- POSTGRES_USER=bla
|
||||
- POSTGRES_PASSWORD=bla
|
||||
- POSTGRES_DB=bubu
|
||||
networks:
|
||||
- promethai_mem_backend
|
||||
ports:
|
||||
- "5432:5432"
|
||||
|
||||
|
||||
|
||||
networks:
|
||||
promethai_mem_backend:
|
||||
name: promethai_mem_backend
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#!/bin/bash
|
||||
export ENVIRONMENT
|
||||
python fetch_secret.py
|
||||
python scripts/create_database.py
|
||||
|
||||
# Start Gunicorn
|
||||
gunicorn -w 2 -k uvicorn.workers.UvicornWorker -t 120 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
|
||||
|
|
@ -80,6 +80,30 @@ from vectordb.basevectordb import BaseMemory
|
|||
from modulators.modulators import DifferentiableLayer
|
||||
|
||||
|
||||
class SemanticMemory(BaseMemory):
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str,
|
||||
memory_id: Optional[str],
|
||||
index_name: Optional[str],
|
||||
db_type: str = "weaviate",
|
||||
):
|
||||
super().__init__(
|
||||
user_id, memory_id, index_name, db_type, namespace="SEMANTICMEMORY")
|
||||
|
||||
|
||||
class EpisodicMemory(BaseMemory):
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str,
|
||||
memory_id: Optional[str],
|
||||
index_name: Optional[str],
|
||||
db_type: str = "weaviate",
|
||||
):
|
||||
super().__init__(
|
||||
user_id, memory_id, index_name, db_type, namespace="EPISODICMEMORY"
|
||||
)
|
||||
|
||||
|
||||
class EpisodicBuffer(BaseMemory):
|
||||
def __init__(
|
||||
|
|
@ -795,93 +819,37 @@ class EpisodicBuffer(BaseMemory):
|
|||
return result_parsing.json()
|
||||
|
||||
|
||||
# class LongTermMemory:
|
||||
# def __init__(
|
||||
# self,
|
||||
# user_id: str = "676",
|
||||
# memory_id: Optional[str] = None,
|
||||
# index_name: Optional[str] = None,
|
||||
# db_type: str = "weaviate",
|
||||
# ):
|
||||
# self.user_id = user_id
|
||||
# self.memory_id = memory_id
|
||||
# self.ltm_memory_id = str(uuid.uuid4())
|
||||
# self.index_name = index_name
|
||||
# self.db_type = db_type
|
||||
# self.semantic_memory = SemanticMemory(user_id, memory_id, index_name, db_type)
|
||||
# self.episodic_memory = EpisodicMemory(user_id, memory_id, index_name, db_type)
|
||||
|
||||
#
|
||||
# class ShortTermMemory:
|
||||
# def __init__(
|
||||
# self,
|
||||
# user_id: str = "676",
|
||||
# memory_id: Optional[str] = None,
|
||||
# index_name: Optional[str] = None,
|
||||
# db_type: str = "weaviate",
|
||||
# ):
|
||||
# self.user_id = user_id
|
||||
# self.memory_id = memory_id
|
||||
# self.stm_memory_id = str(uuid.uuid4())
|
||||
# self.index_name = index_name
|
||||
# self.db_type = db_type
|
||||
# self.episodic_buffer = EpisodicBuffer(user_id, memory_id, index_name, db_type)
|
||||
#
|
||||
# class PythonClass:
|
||||
# def __init__(self, name):
|
||||
# self.name = name
|
||||
# self.attributes = set() # Using set to store unique attribute names
|
||||
# self.methods = set() # Using set to store unique method names
|
||||
# self.inheritance = None
|
||||
# self.associations = []
|
||||
#
|
||||
# def add_method(self, method_name):
|
||||
# self.methods.add(method_name)
|
||||
#
|
||||
# def add_attribute(self, attribute_name):
|
||||
# self.attributes.add(attribute_name)
|
||||
# async def call_method(self, method_name, *args, **kwargs):
|
||||
# if method_name in self.methods:
|
||||
# method = getattr(self, method_name, None)
|
||||
# if method:
|
||||
# return await method(*args, **kwargs)
|
||||
# raise AttributeError(f"{self.name} object has no attribute {method_name}")
|
||||
#
|
||||
# def get_attribute(self, attribute_name):
|
||||
# return self.attributes.get(attribute_name)
|
||||
class LongTermMemory:
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str = "676",
|
||||
memory_id: Optional[str] = None,
|
||||
index_name: Optional[str] = None,
|
||||
db_type: str = "weaviate",
|
||||
):
|
||||
self.user_id = user_id
|
||||
self.memory_id = memory_id
|
||||
self.ltm_memory_id = str(uuid.uuid4())
|
||||
self.index_name = index_name
|
||||
self.db_type = db_type
|
||||
self.semantic_memory = SemanticMemory(user_id, memory_id, index_name, db_type)
|
||||
self.episodic_memory = EpisodicMemory(user_id, memory_id, index_name, db_type)
|
||||
|
||||
|
||||
class DynamicBaseMemory(BaseMemory):
|
||||
def __init__(self, name, user_id, memory_id, index_name, db_type, namespace):
|
||||
super().__init__(user_id, memory_id, index_name, db_type, namespace)
|
||||
self.name = name
|
||||
self.attributes = set()
|
||||
self.methods = set()
|
||||
self.inheritance = None
|
||||
self.associations = []
|
||||
|
||||
def add_method(self, method_name):
|
||||
self.methods.add(method_name)
|
||||
|
||||
def add_attribute(self, attribute_name):
|
||||
self.attributes.add(attribute_name)
|
||||
|
||||
def get_attribute(self, attribute_name):
|
||||
return attribute_name in self.attributes
|
||||
|
||||
def add_association(self, associated_memory):
|
||||
if associated_memory not in self.associations:
|
||||
self.associations.append(associated_memory)
|
||||
# Optionally, establish a bidirectional association
|
||||
associated_memory.associations.append(self)
|
||||
|
||||
class Attribute:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
class Method:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
class ShortTermMemory:
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str = "676",
|
||||
memory_id: Optional[str] = None,
|
||||
index_name: Optional[str] = None,
|
||||
db_type: str = "weaviate",
|
||||
):
|
||||
self.user_id = user_id
|
||||
self.memory_id = memory_id
|
||||
self.stm_memory_id = str(uuid.uuid4())
|
||||
self.index_name = index_name
|
||||
self.db_type = db_type
|
||||
self.episodic_buffer = EpisodicBuffer(user_id, memory_id, index_name, db_type)
|
||||
|
||||
|
||||
class Memory:
|
||||
|
|
@ -889,17 +857,15 @@ class Memory:
|
|||
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
|
||||
def __init__(self, user_id: str = "676", index_name: str = None,
|
||||
knowledge_source: str = None, knowledge_type: str = None,
|
||||
db_type: str = "weaviate", namespace: str = None, ) -> None:
|
||||
self.memory_class = DynamicBaseMemory('Memory', user_id, str(uuid.uuid4()), index_name, db_type, namespace)
|
||||
self.semantic_memory_class = DynamicBaseMemory('SemanticMemory', user_id, str(uuid.uuid4()), index_name,
|
||||
db_type, namespace)
|
||||
self.episodic_memory_class = DynamicBaseMemory('EpisodicMemory', user_id, str(uuid.uuid4()), index_name,
|
||||
db_type, namespace)
|
||||
self.episodic_buffer_class = DynamicBaseMemory('EpisodicBuffer', user_id, str(uuid.uuid4()), index_name,
|
||||
db_type, namespace)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str = "676",
|
||||
index_name: str = None,
|
||||
knowledge_source: str = None,
|
||||
knowledge_type: str = None,
|
||||
db_type: str = "weaviate",
|
||||
namespace: str = None,
|
||||
) -> None:
|
||||
self.user_id = user_id
|
||||
self.index_name = index_name
|
||||
self.db_type = db_type
|
||||
|
|
@ -909,45 +875,150 @@ class Memory:
|
|||
self.long_term_memory = None
|
||||
self.short_term_memory = None
|
||||
self.namespace = namespace
|
||||
load_dotenv()
|
||||
|
||||
attributes_list = ['user_id', 'index_name', 'db_type', 'knowledge_source', 'knowledge_type', 'memory_id',
|
||||
'long_term_memory', 'short_term_memory', 'namespace']
|
||||
for attr in attributes_list:
|
||||
self.memory_class.add_attribute(attr)
|
||||
# Asynchronous factory function for creating LongTermMemory
|
||||
async def async_create_long_term_memory(
|
||||
self, user_id, memory_id, index_name, db_type
|
||||
):
|
||||
# Perform asynchronous initialization steps if needed
|
||||
return LongTermMemory(
|
||||
user_id=self.user_id,
|
||||
memory_id=self.memory_id,
|
||||
index_name=self.index_name,
|
||||
db_type=self.db_type,
|
||||
)
|
||||
|
||||
methods_list = ['async_create_long_term_memory', 'async_init', 'add_memories', "fetch_memories", 'async_create_short_term_memory',
|
||||
'_create_buffer_context', '_get_task_list', '_run_main_buffer',
|
||||
'_available_operations', '_provide_feedback']
|
||||
for class_instance in [self.memory_class, self.semantic_memory_class, self.episodic_memory_class, self.episodic_buffer_class]:
|
||||
for method in methods_list:
|
||||
class_instance.add_method(method)
|
||||
async def async_init(self):
|
||||
# Asynchronous initialization of LongTermMemory and ShortTermMemory
|
||||
self.long_term_memory = await self.async_create_long_term_memory(
|
||||
user_id=self.user_id,
|
||||
memory_id=self.memory_id,
|
||||
index_name=self.index_name,
|
||||
db_type=self.db_type,
|
||||
)
|
||||
self.short_term_memory = await self.async_create_short_term_memory(
|
||||
user_id=self.user_id,
|
||||
memory_id=self.memory_id,
|
||||
index_name=self.index_name,
|
||||
db_type=self.db_type,
|
||||
)
|
||||
|
||||
async def dynamic_method_call(self, dynamic_base_memory_instance, method_name: str, *args, **kwargs):
|
||||
if method_name in dynamic_base_memory_instance.methods:
|
||||
method = getattr(dynamic_base_memory_instance, method_name, None)
|
||||
if method:
|
||||
return await method(*args, **kwargs)
|
||||
raise AttributeError(f"{dynamic_base_memory_instance.name} object has no attribute {method_name}")
|
||||
async def async_create_short_term_memory(
|
||||
self, user_id, memory_id, index_name, db_type
|
||||
):
|
||||
# Perform asynchronous initialization steps if needed
|
||||
return ShortTermMemory(
|
||||
user_id=self.user_id,
|
||||
memory_id=self.memory_id,
|
||||
index_name=self.index_name,
|
||||
db_type=self.db_type,
|
||||
)
|
||||
|
||||
def add_dynamic_memory_class(self, class_name: str, namespace: str):
|
||||
new_memory_class = DynamicBaseMemory(class_name, self.user_id, str(uuid.uuid4()), self.index_name,
|
||||
self.db_type, namespace)
|
||||
setattr(self, f"{class_name.lower()}_class", new_memory_class)
|
||||
return new_memory_class
|
||||
async def _add_semantic_memory(
|
||||
self, observation: str, loader_settings: dict = None, params: dict = None
|
||||
):
|
||||
return await self.long_term_memory.semantic_memory.add_memories(
|
||||
observation=observation,
|
||||
loader_settings=loader_settings,
|
||||
params=params,
|
||||
)
|
||||
|
||||
def add_attribute_to_class(self, class_instance, attribute_name: str):
|
||||
class_instance.add_attribute(attribute_name)
|
||||
async def _fetch_semantic_memory(self, observation, params):
|
||||
return await self.long_term_memory.semantic_memory.fetch_memories(
|
||||
observation=observation, params=params
|
||||
)
|
||||
|
||||
def add_method_to_class(self, class_instance, method_name: str):
|
||||
class_instance.add_method(method_name)
|
||||
async def _delete_semantic_memory(self, params: str = None):
|
||||
return await self.long_term_memory.semantic_memory.delete_memories(
|
||||
params=params
|
||||
)
|
||||
|
||||
async def _add_episodic_memory(
|
||||
self, observation: str, loader_settings: dict = None, params: dict = None
|
||||
):
|
||||
return await self.long_term_memory.episodic_memory.add_memories(
|
||||
observation=observation, loader_settings=loader_settings, params=params
|
||||
)
|
||||
|
||||
async def _fetch_episodic_memory(self, observation, params: str = None):
|
||||
return await self.long_term_memory.episodic_memory.fetch_memories(
|
||||
observation=observation, params=params
|
||||
)
|
||||
|
||||
async def _delete_episodic_memory(self, params: str = None):
|
||||
return await self.long_term_memory.episodic_memory.delete_memories(
|
||||
params=params
|
||||
)
|
||||
|
||||
|
||||
async def _add_buffer_memory(
|
||||
self,
|
||||
user_input: str,
|
||||
namespace: str = None,
|
||||
loader_settings: dict = None,
|
||||
params: dict = None,
|
||||
):
|
||||
return await self.short_term_memory.episodic_buffer.add_memories(
|
||||
observation=user_input, loader_settings=loader_settings, params=params
|
||||
)
|
||||
|
||||
async def _fetch_buffer_memory(self, user_input: str):
|
||||
return await self.short_term_memory.episodic_buffer.fetch_memories(
|
||||
observation=user_input
|
||||
)
|
||||
|
||||
async def _delete_buffer_memory(self, params: str = None):
|
||||
return await self.short_term_memory.episodic_buffer.delete_memories(
|
||||
params=params
|
||||
)
|
||||
|
||||
async def _create_buffer_context(
|
||||
self,
|
||||
user_input: str,
|
||||
params: dict = None,
|
||||
attention_modulators: dict = None,
|
||||
):
|
||||
return await self.short_term_memory.episodic_buffer.buffer_context(
|
||||
user_input=user_input,
|
||||
params=params,
|
||||
attention_modulators=attention_modulators,
|
||||
)
|
||||
async def _get_task_list(
|
||||
self,
|
||||
user_input: str,
|
||||
params: str = None,
|
||||
attention_modulators: dict = None,
|
||||
):
|
||||
return await self.short_term_memory.episodic_buffer.get_task_list(
|
||||
user_input=user_input,
|
||||
params=params,
|
||||
attention_modulators=attention_modulators,
|
||||
)
|
||||
async def _run_main_buffer(
|
||||
self,
|
||||
user_input: str,
|
||||
params: dict = None,
|
||||
attention_modulators: dict = None,
|
||||
):
|
||||
return await self.short_term_memory.episodic_buffer.main_buffer(
|
||||
user_input=user_input,
|
||||
params=params,
|
||||
attention_modulators=attention_modulators,
|
||||
)
|
||||
|
||||
async def _available_operations(self):
|
||||
return await self.long_term_memory.episodic_buffer.available_operations()
|
||||
|
||||
async def _provide_feedback(self, score:str =None, params: dict = None, attention_modulators: dict = None):
|
||||
return await self.short_term_memory.episodic_buffer.provide_feedback(score=score, params=params, attention_modulators=attention_modulators)
|
||||
|
||||
|
||||
async def main():
|
||||
|
||||
# if you want to run the script as a standalone script, do so with the examples below
|
||||
# memory = Memory(user_id="TestUser")
|
||||
# await memory.async_init()
|
||||
memory = Memory(user_id="TestUser")
|
||||
await memory.async_init()
|
||||
params = {
|
||||
"version": "1.0",
|
||||
"agreement_id": "AG123456",
|
||||
|
|
@ -966,18 +1037,8 @@ async def main():
|
|||
"source": "url",
|
||||
"path": "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf"
|
||||
}
|
||||
# memory_instance = Memory(namespace='SEMANTICMEMORY')
|
||||
# sss = await memory_instance.dynamic_method_call(memory_instance.semantic_memory_class, 'fetch_memories', observation='some_observation')
|
||||
memory_instance = Memory(namespace='PROCEDURALMEMORY')
|
||||
procedural_memory_class = memory_instance.add_dynamic_memory_class('ProceduralMemory', 'PROCEDURALMEMORY')
|
||||
memory_instance.add_method_to_class(procedural_memory_class, 'add_memories')
|
||||
|
||||
sss = await memory_instance.dynamic_method_call(memory_instance.proceduralmemory_class, 'add_memories',
|
||||
observation='some_observation', params=params)
|
||||
|
||||
print(sss)
|
||||
# load_jack_london = await memory._add_semantic_memory(observation = "bla", loader_settings=loader_settings, params=params)
|
||||
# print(load_jack_london)
|
||||
load_jack_london = await memory._add_semantic_memory(observation = "bla", loader_settings=loader_settings, params=params)
|
||||
print(load_jack_london)
|
||||
|
||||
modulator = {"relevance": 0.1, "frequency": 0.1}
|
||||
|
||||
|
|
@ -1000,161 +1061,7 @@ async def main():
|
|||
# print(run_main_buffer)
|
||||
# del_semantic = await memory._delete_semantic_memory()
|
||||
# print(del_semantic)
|
||||
# def __init__(
|
||||
# self,
|
||||
# user_id: str = "676",
|
||||
# index_name: str = None,
|
||||
# knowledge_source: str = None,
|
||||
# knowledge_type: str = None,
|
||||
# db_type: str = "weaviate",
|
||||
# namespace: str = None,
|
||||
# ) -> None:
|
||||
# self.user_id = user_id
|
||||
# self.index_name = index_name
|
||||
# self.db_type = db_type
|
||||
# self.knowledge_source = knowledge_source
|
||||
# self.knowledge_type = knowledge_type
|
||||
# self.memory_id = str(uuid.uuid4())
|
||||
# self.long_term_memory = None
|
||||
# self.short_term_memory = None
|
||||
# self.namespace = namespace
|
||||
# load_dotenv()
|
||||
|
||||
# Asynchronous factory function for creating LongTermMemory
|
||||
# async def async_create_long_term_memory(
|
||||
# self, user_id, memory_id, index_name, db_type
|
||||
# ):
|
||||
# # Perform asynchronous initialization steps if needed
|
||||
# return LongTermMemory(
|
||||
# user_id=self.user_id,
|
||||
# memory_id=self.memory_id,
|
||||
# index_name=self.index_name,
|
||||
# db_type=self.db_type,
|
||||
# )
|
||||
#
|
||||
# async def async_init(self):
|
||||
# # Asynchronous initialization of LongTermMemory and ShortTermMemory
|
||||
# self.long_term_memory = await self.async_create_long_term_memory(
|
||||
# user_id=self.user_id,
|
||||
# memory_id=self.memory_id,
|
||||
# index_name=self.index_name,
|
||||
# db_type=self.db_type,
|
||||
# )
|
||||
# self.short_term_memory = await self.async_create_short_term_memory(
|
||||
# user_id=self.user_id,
|
||||
# memory_id=self.memory_id,
|
||||
# index_name=self.index_name,
|
||||
# db_type=self.db_type,
|
||||
# )
|
||||
#
|
||||
# async def async_create_short_term_memory(
|
||||
# self, user_id, memory_id, index_name, db_type
|
||||
# ):
|
||||
# # Perform asynchronous initialization steps if needed
|
||||
# return ShortTermMemory(
|
||||
# user_id=self.user_id,
|
||||
# memory_id=self.memory_id,
|
||||
# index_name=self.index_name,
|
||||
# db_type=self.db_type,
|
||||
# )
|
||||
#
|
||||
# async def _add_semantic_memory(
|
||||
# self, observation: str, loader_settings: dict = None, params: dict = None
|
||||
# ):
|
||||
# return await self.long_term_memory.semantic_memory.add_memories(
|
||||
# observation=observation,
|
||||
# loader_settings=loader_settings,
|
||||
# params=params,
|
||||
# )
|
||||
#
|
||||
# async def _fetch_semantic_memory(self, observation, params):
|
||||
# return await self.long_term_memory.semantic_memory.fetch_memories(
|
||||
# observation=observation, params=params
|
||||
# )
|
||||
#
|
||||
# async def _delete_semantic_memory(self, params: str = None):
|
||||
# return await self.long_term_memory.semantic_memory.delete_memories(
|
||||
# params=params
|
||||
# )
|
||||
#
|
||||
# async def _add_episodic_memory(
|
||||
# self, observation: str, loader_settings: dict = None, params: dict = None
|
||||
# ):
|
||||
# return await self.long_term_memory.episodic_memory.add_memories(
|
||||
# observation=observation, loader_settings=loader_settings, params=params
|
||||
# )
|
||||
#
|
||||
# async def _fetch_episodic_memory(self, observation, params: str = None):
|
||||
# return await self.long_term_memory.episodic_memory.fetch_memories(
|
||||
# observation=observation, params=params
|
||||
# )
|
||||
#
|
||||
# async def _delete_episodic_memory(self, params: str = None):
|
||||
# return await self.long_term_memory.episodic_memory.delete_memories(
|
||||
# params=params
|
||||
# )
|
||||
#
|
||||
#
|
||||
# async def _add_buffer_memory(
|
||||
# self,
|
||||
# user_input: str,
|
||||
# namespace: str = None,
|
||||
# loader_settings: dict = None,
|
||||
# params: dict = None,
|
||||
# ):
|
||||
# return await self.short_term_memory.episodic_buffer.add_memories(
|
||||
# observation=user_input, loader_settings=loader_settings, params=params
|
||||
# )
|
||||
#
|
||||
# async def _fetch_buffer_memory(self, user_input: str):
|
||||
# return await self.short_term_memory.episodic_buffer.fetch_memories(
|
||||
# observation=user_input
|
||||
# )
|
||||
#
|
||||
# async def _delete_buffer_memory(self, params: str = None):
|
||||
# return await self.short_term_memory.episodic_buffer.delete_memories(
|
||||
# params=params
|
||||
# )
|
||||
#
|
||||
# async def _create_buffer_context(
|
||||
# self,
|
||||
# user_input: str,
|
||||
# params: dict = None,
|
||||
# attention_modulators: dict = None,
|
||||
# ):
|
||||
# return await self.short_term_memory.episodic_buffer.buffer_context(
|
||||
# user_input=user_input,
|
||||
# params=params,
|
||||
# attention_modulators=attention_modulators,
|
||||
# )
|
||||
# async def _get_task_list(
|
||||
# self,
|
||||
# user_input: str,
|
||||
# params: str = None,
|
||||
# attention_modulators: dict = None,
|
||||
# ):
|
||||
# return await self.short_term_memory.episodic_buffer.get_task_list(
|
||||
# user_input=user_input,
|
||||
# params=params,
|
||||
# attention_modulators=attention_modulators,
|
||||
# )
|
||||
# async def _run_main_buffer(
|
||||
# self,
|
||||
# user_input: str,
|
||||
# params: dict = None,
|
||||
# attention_modulators: dict = None,
|
||||
# ):
|
||||
# return await self.short_term_memory.episodic_buffer.main_buffer(
|
||||
# user_input=user_input,
|
||||
# params=params,
|
||||
# attention_modulators=attention_modulators,
|
||||
# )
|
||||
#
|
||||
# async def _available_operations(self):
|
||||
# return await self.long_term_memory.episodic_buffer.available_operations()
|
||||
#
|
||||
# async def _provide_feedback(self, score:str =None, params: dict = None, attention_modulators: dict = None):
|
||||
# return await self.short_term_memory.episodic_buffer.provide_feedback(score=score, params=params, attention_modulators=attention_modulators)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
import os
|
||||
from io import BytesIO
|
||||
import sys, os
|
||||
import fitz
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from chunkers.chunkers import chunk_data
|
||||
import fitz
|
||||
from level_2.chunkers.chunkers import chunk_data
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
|
||||
import requests
|
||||
|
|
|
|||
|
|
@ -1,22 +0,0 @@
|
|||
# memory.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
class Memory(Base):
|
||||
__tablename__ = 'memories'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(Integer, ForeignKey('users.id'), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
user = relationship("User", back_populates="memories")
|
||||
metadatas = relationship("MetaDatas", back_populates="memory", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Memory(id={self.id}, user_id={self.user_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
from sqlalchemy import create_engine, Column, Integer, String
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class MemoryAssociation(Base):
|
||||
__tablename__ = 'memory_associations'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(String)
|
||||
source_memory_id = Column(String)
|
||||
target_memory_id = Column(String)
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
# metadata.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class MetaDatas(Base):
|
||||
__tablename__ = 'metadata'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
version = Column(String, nullable=False)
|
||||
field = Column(String, nullable=False)
|
||||
memory_id = Column(Integer, ForeignKey('memories.id'), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
memory = relationship("Memory", back_populates="metadata")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<MetaData(id={self.id}, version={self.version}, field={self.field}, memory_id={self.memory_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
# operation.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class Operation(Base):
|
||||
__tablename__ = 'operations'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
session_id = Column(Integer, ForeignKey('sessions.id'), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
session = relationship("Session", back_populates="operations")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Operation(id={self.id}, session_id={self.session_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
# session.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class Session(Base):
|
||||
__tablename__ = 'sessions'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(Integer, ForeignKey('users.id'), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
user = relationship("User", back_populates="sessions")
|
||||
operations = relationship("Operation", back_populates="session", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Session(id={self.id}, user_id={self.user_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
# test_output.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class TestOutput(Base):
|
||||
__tablename__ = 'test_outputs'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
test_set_id = Column(Integer, ForeignKey('test_sets.id'), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
test_set = relationship("TestSet", back_populates="test_outputs")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TestOutput(id={self.id}, test_set_id={self.test_set_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
# test_set.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class TestSet(Base):
|
||||
__tablename__ = 'test_sets'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(Integer, ForeignKey('users.id'), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
user = relationship("User", back_populates="test_sets")
|
||||
test_outputs = relationship("TestOutput", back_populates="test_set", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TestSet(id={self.id}, user_id={self.user_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
# user.py
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, DateTime
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = 'users'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String, nullable=False, unique=True, index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
memories = relationship("Memory", back_populates="user", cascade="all, delete-orphan")
|
||||
sessions = relationship("Session", back_populates="user", cascade="all, delete-orphan")
|
||||
test_sets = relationship("TestSet", back_populates="user", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<User(id={self.id}, name={self.name}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -1,46 +1,5 @@
|
|||
import numpy as np
|
||||
# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
|
||||
import json
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
from typing import Dict, List, Union, Any
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
import marvin
|
||||
import requests
|
||||
from deep_translator import GoogleTranslator
|
||||
from dotenv import load_dotenv
|
||||
from langchain.agents import initialize_agent, AgentType
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from langchain.retrievers import WeaviateHybridSearchRetriever
|
||||
from langchain.tools import tool
|
||||
from marvin import ai_classifier
|
||||
from pydantic import parse_obj_as
|
||||
from weaviate.gql.get import HybridFusion
|
||||
import numpy as np
|
||||
load_dotenv()
|
||||
from langchain import OpenAI
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from typing import Optional, Dict, List, Union
|
||||
|
||||
import tracemalloc
|
||||
|
||||
tracemalloc.start()
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from langchain import PromptTemplate
|
||||
from langchain.chains.openai_functions import create_structured_output_chain
|
||||
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from pydantic import BaseModel, Field
|
||||
from dotenv import load_dotenv
|
||||
from langchain.schema import Document, SystemMessage, HumanMessage
|
||||
import uuid
|
||||
import humanize
|
||||
import weaviate
|
||||
|
||||
class DifferentiableLayer:
|
||||
def __init__(self, attention_modulators: dict):
|
||||
|
|
@ -71,169 +30,3 @@ class DifferentiableLayer:
|
|||
async def get_weights(self):
|
||||
return self.weights
|
||||
|
||||
|
||||
|
||||
async def _summarizer(self, text: str, document:str, max_tokens: int = 1200):
|
||||
"""Summarize text using OpenAI API, to reduce amount of code for modulators contributing to context"""
|
||||
class Summaries(BaseModel):
|
||||
"""Schema for documentGroups"""
|
||||
summary: str = Field(
|
||||
...,
|
||||
description="Summarized document")
|
||||
class SummaryContextList(BaseModel):
|
||||
"""Buffer raw context processed by the buffer"""
|
||||
|
||||
summaries: List[Summaries] = Field(..., description="List of summaries")
|
||||
observation: str = Field(..., description="The original user query")
|
||||
|
||||
parser = PydanticOutputParser(pydantic_object=SummaryContextList)
|
||||
prompt = PromptTemplate(
|
||||
template=" \n{format_instructions}\nSummarize the observation briefly based on the user query, observation is: {query}\n. The document is: {document}",
|
||||
input_variables=["query", "document"],
|
||||
partial_variables={"format_instructions": parser.get_format_instructions()},
|
||||
)
|
||||
|
||||
_input = prompt.format_prompt(query=text, document=document)
|
||||
document_context_result = self.llm_base(_input.to_string())
|
||||
document_context_result_parsed = parser.parse(document_context_result)
|
||||
document_context_result_parsed = json.loads(document_context_result_parsed.json())
|
||||
document_summary = document_context_result_parsed["summaries"][0]["summary"]
|
||||
|
||||
return document_summary
|
||||
|
||||
async def memory_route(self, text_time_diff: str):
|
||||
@ai_classifier
|
||||
class MemoryRoute(Enum):
|
||||
"""Represents classifer for freshness of memories"""
|
||||
|
||||
data_uploaded_now = "1"
|
||||
data_uploaded_very_recently = "0.9"
|
||||
data_uploaded_recently = "0.7"
|
||||
data_uploaded_more_than_a_month_ago = "0.5"
|
||||
data_uploaded_more_than_three_months_ago = "0.3"
|
||||
data_uploaded_more_than_six_months_ago = "0.1"
|
||||
|
||||
namespace = MemoryRoute(str(text_time_diff))
|
||||
|
||||
return namespace
|
||||
|
||||
async def freshness(self, observation: str, namespace: str = None, memory=None) -> list[str]:
|
||||
"""Freshness - Score between 0 and 1 on how often was the information updated in episodic or semantic memory in the past"""
|
||||
logging.info("Starting with Freshness")
|
||||
|
||||
lookup_value = await self.fetch_memories(
|
||||
observation=observation, namespace=namespace
|
||||
)
|
||||
unix_t = lookup_value["data"]["Get"]["EPISODICMEMORY"][0]["_additional"][
|
||||
"lastUpdateTimeUnix"
|
||||
]
|
||||
|
||||
# Convert Unix timestamp to datetime
|
||||
last_update_datetime = datetime.fromtimestamp(int(unix_t) / 1000)
|
||||
time_difference = datetime.now() - last_update_datetime
|
||||
time_difference_text = humanize.naturaltime(time_difference)
|
||||
namespace_ = await self.memory_route(str(time_difference_text))
|
||||
return [namespace_.value, lookup_value]
|
||||
|
||||
async def frequency(self, observation: str, namespace: str, memory) -> list[str]:
|
||||
"""Frequency - Score between 0 and 1 on how often was the information processed in episodic memory in the past
|
||||
Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory
|
||||
"""
|
||||
logging.info("Starting with Frequency")
|
||||
weaviate_client = self.init_client(namespace=namespace)
|
||||
|
||||
result_output = await self.fetch_memories(
|
||||
observation=observation, params=None, namespace=namespace
|
||||
)
|
||||
number_of_relevant_events = len(result_output["data"]["Get"]["EPISODICMEMORY"])
|
||||
number_of_total_events = (
|
||||
weaviate_client.query.aggregate(namespace).with_meta_count().do()
|
||||
)
|
||||
frequency = float(number_of_relevant_events) / float(
|
||||
number_of_total_events["data"]["Aggregate"]["EPISODICMEMORY"][0]["meta"][
|
||||
"count"
|
||||
]
|
||||
)
|
||||
summary = await self._summarizer(text=observation, document=result_output["data"]["Get"]["EPISODICMEMORY"][0])
|
||||
logging.info("Frequency summary is %s", str(summary))
|
||||
return [str(frequency), summary]
|
||||
|
||||
async def repetition(self, observation: str, namespace: str, memory) -> list[str]:
|
||||
"""Repetition - Score between 0 and 1 based on how often and at what intervals a memory has been revisited.
|
||||
Accounts for the spacing effect, where memories accessed at increasing intervals are given higher scores.
|
||||
# TO DO -> add metadata column to make sure that the access is not equal to update, and run update vector function each time a memory is accessed
|
||||
"""
|
||||
logging.info("Starting with Repetition")
|
||||
|
||||
result_output = await self.fetch_memories(
|
||||
observation=observation, params=None, namespace=namespace
|
||||
)
|
||||
|
||||
access_times = result_output["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"]
|
||||
# Calculate repetition score based on access times
|
||||
if not access_times or len(access_times) == 1:
|
||||
return ["0", result_output["data"]["Get"]["EPISODICMEMORY"][0]]
|
||||
|
||||
# Sort access times
|
||||
access_times = sorted(access_times)
|
||||
# Calculate intervals between consecutive accesses
|
||||
intervals = [access_times[i + 1] - access_times[i] for i in range(len(access_times) - 1)]
|
||||
# A simple scoring mechanism: Longer intervals get higher scores, as they indicate spaced repetition
|
||||
repetition_score = sum([1.0 / (interval + 1) for interval in intervals]) / len(intervals)
|
||||
summary = await self._summarizer(text = observation, document=result_output["data"]["Get"]["EPISODICMEMORY"][0])
|
||||
logging.info("Repetition is %s", str(repetition_score))
|
||||
logging.info("Repetition summary is %s", str(summary))
|
||||
return [str(repetition_score), summary]
|
||||
|
||||
async def relevance(self, observation: str, namespace: str, memory) -> list[str]:
|
||||
"""
|
||||
Fetches the fusion relevance score for a given observation from the episodic memory.
|
||||
Learn more about fusion scores here on Weaviate docs: https://weaviate.io/blog/hybrid-search-fusion-algorithms
|
||||
Parameters:
|
||||
- observation: The user's query or observation.
|
||||
- namespace: The namespace for the data.
|
||||
|
||||
Returns:
|
||||
- The relevance score between 0 and 1.
|
||||
"""
|
||||
logging.info("Starting with Relevance")
|
||||
score = memory["_additional"]["score"]
|
||||
logging.info("Relevance is %s", str(score))
|
||||
return [score, "fusion score"]
|
||||
|
||||
async def saliency(self, observation: str, namespace=None, memory=None) -> list[str]:
|
||||
"""Determines saliency by scoring the set of retrieved documents against each other and trying to determine saliency
|
||||
"""
|
||||
logging.info("Starting with Saliency")
|
||||
class SaliencyRawList(BaseModel):
|
||||
"""Schema for documentGroups"""
|
||||
summary: str = Field(
|
||||
...,
|
||||
description="Summarized document")
|
||||
saliency_score: str = Field(
|
||||
None, description="The score between 0 and 1")
|
||||
class SailencyContextList(BaseModel):
|
||||
"""Buffer raw context processed by the buffer"""
|
||||
|
||||
docs: List[SaliencyRawList] = Field(..., description="List of docs")
|
||||
observation: str = Field(..., description="The original user query")
|
||||
|
||||
parser = PydanticOutputParser(pydantic_object=SailencyContextList)
|
||||
prompt = PromptTemplate(
|
||||
template="Determine saliency of documents compared to the other documents retrieved \n{format_instructions}\nSummarize the observation briefly based on the user query, observation is: {query}\n",
|
||||
input_variables=["query"],
|
||||
partial_variables={"format_instructions": parser.get_format_instructions()},
|
||||
)
|
||||
|
||||
_input = prompt.format_prompt(query=observation)
|
||||
document_context_result = self.llm_base(_input.to_string())
|
||||
document_context_result_parsed = parser.parse(document_context_result)
|
||||
document_context_result_parsed = json.loads(document_context_result_parsed.json())
|
||||
saliency_score = document_context_result_parsed["docs"][0]["saliency_score"]
|
||||
saliency_values = document_context_result_parsed["docs"][0]["summary"]
|
||||
|
||||
logging.info("Saliency is %s", str(saliency_score))
|
||||
logging.info("Saliency summary is %s", str(saliency_values))
|
||||
|
||||
return [saliency_score, saliency_values]
|
||||
|
||||
|
|
|
|||
26
level_2/poetry.lock
generated
26
level_2/poetry.lock
generated
|
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
|
|
@ -2377,26 +2377,6 @@ files = [
|
|||
{file = "protobuf-3.20.3.tar.gz", hash = "sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psycopg2"
|
||||
version = "2.9.8"
|
||||
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "psycopg2-2.9.8-cp310-cp310-win32.whl", hash = "sha256:2f8594f92bbb5d8b59ffec04e2686c416401e2d4297de1193f8e75235937e71d"},
|
||||
{file = "psycopg2-2.9.8-cp310-cp310-win_amd64.whl", hash = "sha256:f9ecbf504c4eaff90139d5c9b95d47275f2b2651e14eba56392b4041fbf4c2b3"},
|
||||
{file = "psycopg2-2.9.8-cp311-cp311-win32.whl", hash = "sha256:65f81e72136d8b9ac8abf5206938d60f50da424149a43b6073f1546063c0565e"},
|
||||
{file = "psycopg2-2.9.8-cp311-cp311-win_amd64.whl", hash = "sha256:f7e62095d749359b7854143843f27edd7dccfcd3e1d833b880562aa5702d92b0"},
|
||||
{file = "psycopg2-2.9.8-cp37-cp37m-win32.whl", hash = "sha256:81b21424023a290a40884c7f8b0093ba6465b59bd785c18f757e76945f65594c"},
|
||||
{file = "psycopg2-2.9.8-cp37-cp37m-win_amd64.whl", hash = "sha256:67c2f32f3aba79afb15799575e77ee2db6b46b8acf943c21d34d02d4e1041d50"},
|
||||
{file = "psycopg2-2.9.8-cp38-cp38-win32.whl", hash = "sha256:287a64ef168ef7fb9f382964705ff664b342bfff47e7242bf0a04ef203269dd5"},
|
||||
{file = "psycopg2-2.9.8-cp38-cp38-win_amd64.whl", hash = "sha256:dcde3cad4920e29e74bf4e76c072649764914facb2069e6b7fa1ddbebcd49e9f"},
|
||||
{file = "psycopg2-2.9.8-cp39-cp39-win32.whl", hash = "sha256:d4ad050ea50a16731d219c3a85e8f2debf49415a070f0b8331ccc96c81700d9b"},
|
||||
{file = "psycopg2-2.9.8-cp39-cp39-win_amd64.whl", hash = "sha256:d39bb3959788b2c9d7bf5ff762e29f436172b241cd7b47529baac77746fd7918"},
|
||||
{file = "psycopg2-2.9.8.tar.gz", hash = "sha256:3da6488042a53b50933244085f3f91803f1b7271f970f3e5536efa69314f6a49"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptyprocess"
|
||||
version = "0.7.0"
|
||||
|
|
@ -3625,7 +3605,7 @@ files = [
|
|||
]
|
||||
|
||||
[package.dependencies]
|
||||
greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
|
||||
greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""}
|
||||
typing-extensions = ">=4.2.0"
|
||||
|
||||
[package.extras]
|
||||
|
|
@ -4538,4 +4518,4 @@ multidict = ">=4.0"
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "37e192953f55c48139ec58d83cb0cc7c6724b56c0c4e191d4322c97aed6f079f"
|
||||
content-hash = "bc306ab25967437b68ef5216af4b68bf6bfdf5cb966bb6493cc3ad91e8888110"
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ deep-translator = "^1.11.4"
|
|||
humanize = "^4.8.0"
|
||||
deepeval = "^0.10.12"
|
||||
pymupdf = "^1.23.3"
|
||||
psycopg2 = "^2.9.8"
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,17 +6,17 @@ class DocumentMetadataSchemaV1(Schema):
|
|||
ltm_memory_id = fields.Str(required=True)
|
||||
st_memory_id = fields.Str(required=True)
|
||||
buffer_id = fields.Str(required=True)
|
||||
version = fields.Str(load_default="")
|
||||
agreement_id = fields.Str(load_default="")
|
||||
privacy_policy = fields.Str(load_default="")
|
||||
terms_of_service = fields.Str(load_default="")
|
||||
format = fields.Str(load_default="")
|
||||
schema_version = fields.Str(load_default="")
|
||||
checksum = fields.Str(load_default="")
|
||||
owner = fields.Str(load_default="")
|
||||
license = fields.Str(load_default="")
|
||||
validity_start = fields.Str(load_default="")
|
||||
validity_end = fields.Str(load_default="")
|
||||
version = fields.Str(missing="")
|
||||
agreement_id = fields.Str(missing="")
|
||||
privacy_policy = fields.Str(missing="")
|
||||
terms_of_service = fields.Str(missing="")
|
||||
format = fields.Str(missing="")
|
||||
schema_version = fields.Str(missing="")
|
||||
checksum = fields.Str(missing="")
|
||||
owner = fields.Str(missing="")
|
||||
license = fields.Str(missing="")
|
||||
validity_start = fields.Str(missing="")
|
||||
validity_end = fields.Str(missing="")
|
||||
|
||||
class DocumentMetadataSchemaV2(Schema):
|
||||
user_id = fields.Str(required=True)
|
||||
|
|
@ -24,18 +24,18 @@ class DocumentMetadataSchemaV2(Schema):
|
|||
ltm_memory_id = fields.Str(required=True)
|
||||
st_memory_id = fields.Str(required=True)
|
||||
buffer_id = fields.Str(required=True)
|
||||
version = fields.Str(load_default="")
|
||||
agreement_id = fields.Str(load_default="")
|
||||
privacy_policy = fields.Str(load_default="")
|
||||
terms_of_service = fields.Str(load_default="")
|
||||
format = fields.Str(load_default="")
|
||||
schema_version = fields.Str(load_default="")
|
||||
checksum = fields.Str(load_default="")
|
||||
owner = fields.Str(load_default="")
|
||||
license = fields.Str(load_default="")
|
||||
validity_start = fields.Str(load_default="")
|
||||
validity_end = fields.Str(load_default="")
|
||||
random = fields.Str(load_default="")
|
||||
version = fields.Str(missing="")
|
||||
agreement_id = fields.Str(missing="")
|
||||
privacy_policy = fields.Str(missing="")
|
||||
terms_of_service = fields.Str(missing="")
|
||||
format = fields.Str(missing="")
|
||||
schema_version = fields.Str(missing="")
|
||||
checksum = fields.Str(missing="")
|
||||
owner = fields.Str(missing="")
|
||||
license = fields.Str(missing="")
|
||||
validity_start = fields.Str(missing="")
|
||||
validity_end = fields.Str(missing="")
|
||||
random = fields.Str(missing="")
|
||||
|
||||
class DocumentSchema(Schema):
|
||||
metadata = fields.Nested(DocumentMetadataSchemaV1, required=True)
|
||||
|
|
|
|||
|
|
@ -1,180 +0,0 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"ticketType": {
|
||||
"type": "string",
|
||||
"enum": ["online ticket", "ICE ticket"]
|
||||
},
|
||||
"departureDate": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"priceType": {
|
||||
"type": "string",
|
||||
"enum": ["Flex price (single journey)"]
|
||||
},
|
||||
"class": {
|
||||
"type": "integer",
|
||||
"enum": [1]
|
||||
},
|
||||
"adult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"quantity": {
|
||||
"type": "integer"
|
||||
},
|
||||
"BC50": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["quantity", "BC50"]
|
||||
},
|
||||
"journey": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "string"
|
||||
},
|
||||
"to": {
|
||||
"type": "string"
|
||||
},
|
||||
"via": {
|
||||
"type": "string"
|
||||
},
|
||||
"train": {
|
||||
"type": "string",
|
||||
"enum": ["ICE"]
|
||||
}
|
||||
},
|
||||
"required": ["from", "to", "via", "train"]
|
||||
},
|
||||
"refundPolicy": {
|
||||
"type": "string"
|
||||
},
|
||||
"payment": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"quantity": {
|
||||
"type": "integer"
|
||||
},
|
||||
"price": {
|
||||
"type": "number"
|
||||
},
|
||||
"vat19": {
|
||||
"type": "number"
|
||||
},
|
||||
"vat7": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["name", "quantity", "price", "vat19", "vat7"]
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"type": "number"
|
||||
},
|
||||
"method": {
|
||||
"type": "string",
|
||||
"enum": ["credit card"]
|
||||
},
|
||||
"transactionDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"amount": {
|
||||
"type": "number"
|
||||
},
|
||||
"VUNumber": {
|
||||
"type": "integer"
|
||||
},
|
||||
"transactionNumber": {
|
||||
"type": "integer"
|
||||
},
|
||||
"date": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"genNumber": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["amount", "VUNumber", "transactionNumber", "date", "genNumber"]
|
||||
}
|
||||
},
|
||||
"required": ["items", "total", "method", "transactionDetails"]
|
||||
},
|
||||
"bookingDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"bookingDate": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"bookingAddress": {
|
||||
"type": "string"
|
||||
},
|
||||
"taxNumber": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["bookingDate", "bookingAddress", "taxNumber"]
|
||||
},
|
||||
"journeyDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"validFrom": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"passengerName": {
|
||||
"type": "string"
|
||||
},
|
||||
"orderNumber": {
|
||||
"type": "string"
|
||||
},
|
||||
"stops": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"stop": {
|
||||
"type": "string"
|
||||
},
|
||||
"date": {
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"time": {
|
||||
"type": "string",
|
||||
"format": "time"
|
||||
},
|
||||
"track": {
|
||||
"type": "integer"
|
||||
},
|
||||
"product": {
|
||||
"type": "string"
|
||||
},
|
||||
"reservation": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["stop", "date", "time", "track", "product", "reservation"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["validFrom", "passengerName", "orderNumber", "stops"]
|
||||
},
|
||||
"usageNotes": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["ticketType", "departureDate", "priceType", "class", "adult", "journey", "refundPolicy", "payment", "bookingDetails", "journeyDetails", "usageNotes"]
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
import sys
|
||||
import os
|
||||
|
||||
# this is needed to import classes from other modules
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
# Get the parent directory of your script and add it to sys.path
|
||||
parent_dir = os.path.dirname(script_dir)
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from database.database import Base, engine
|
||||
import models.memory
|
||||
import models.metadatas
|
||||
import models.operation
|
||||
import models.sessions
|
||||
import models.test_output
|
||||
import models.test_set
|
||||
import models.user
|
||||
from sqlalchemy import create_engine, text
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def create_admin_engine(username, password, host):
|
||||
admin_url = f"postgresql://{username}:{password}@{host}:5432/bubu"
|
||||
return create_engine(admin_url)
|
||||
|
||||
|
||||
def database_exists(username, password, host, db_name):
|
||||
engine = create_admin_engine(username, password, host)
|
||||
connection = engine.connect()
|
||||
query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'")
|
||||
result = connection.execute(query).fetchone()
|
||||
connection.close()
|
||||
engine.dispose()
|
||||
return result is not None
|
||||
|
||||
|
||||
def create_database(username, password, host, db_name):
|
||||
engine = create_admin_engine(username, password, host)
|
||||
connection = engine.raw_connection()
|
||||
connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(f"CREATE DATABASE {db_name}")
|
||||
cursor.close()
|
||||
connection.close()
|
||||
engine.dispose()
|
||||
|
||||
|
||||
def create_tables():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
username = os.getenv('POSTGRES_USER')
|
||||
password = os.getenv('POSTGRES_PASSWORD')
|
||||
database_name = os.getenv('POSTGRES_DB')
|
||||
host = os.getenv('POSTGRES_HOST')
|
||||
|
||||
if not database_exists(username, password, host, database_name):
|
||||
print(f"Database {database_name} does not exist. Creating...")
|
||||
create_database(username, password, host, database_name)
|
||||
print(f"Database {database_name} created successfully.")
|
||||
|
||||
create_tables()
|
||||
|
|
@ -5,11 +5,11 @@ from io import BytesIO
|
|||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from marshmallow import Schema, fields
|
||||
from loaders.loaders import _document_loader
|
||||
# Add the parent directory to sys.path
|
||||
|
||||
from marshmallow import Schema, fields
|
||||
from level_2.loaders.loaders import _document_loader
|
||||
# Add the parent directory to sys.path
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
import marvin
|
||||
|
|
@ -23,7 +23,7 @@ import os
|
|||
from datetime import datetime
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from dotenv import load_dotenv
|
||||
from schema.semantic.semantic_schema import DocumentSchema, SCHEMA_VERSIONS, DocumentMetadataSchemaV1
|
||||
from level_2.schema.semantic.semantic_schema import DocumentSchema, SCHEMA_VERSIONS, DocumentMetadataSchemaV1
|
||||
from langchain.schema import Document
|
||||
import weaviate
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue