diff --git a/.gitignore b/.gitignore index 6769e21d9..4d1265323 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ + +.env + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -157,4 +160,5 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ + diff --git a/level_2/Readme.md b/level_2/Readme.md index 5b7115177..2b1d36675 100644 --- a/level_2/Readme.md +++ b/level_2/Readme.md @@ -24,21 +24,43 @@ Initial code lets you do three operations: ## Usage -The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text. +The fast API endpoint accepts prompts and stores data with the help of the Memory Manager + +The types of memory are: Episodic, Semantic, Buffer + +Endpoint Overview +The Memory API provides the following endpoints: + +- /[memory_type]/add-memory (POST) +- /[memory_type]/fetch-memory (POST) +- /[memory_type]/delete-memory (POST) +- /available-buffer-actions (GET) +- /run-buffer (POST) +- /buffer/create-context (POST) + +Here is a payload example: -```curl - -X POST - -F "prompt=The quick brown fox" - -F "file=@/path/to/file.pdf" - http://localhost:8000/upload/ ``` - { "payload": { "user_id": "681", "session_id": "471", "model_speed": "slow", - "prompt": "Temperature=Cold;Food Type=Ice Cream", - "pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + "prompt": "I want ", + "pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", + "params": { + "version": "1.0", + "agreement_id": "AG123456", + "privacy_policy": "https://example.com/privacy", + "terms_of_service": "https://example.com/terms", + "format": "json", + "schema_version": "1.1", + "checksum": "a1b2c3d4e5f6", + "owner": "John Doe", + "license": "MIT", + "validity_start": "2023-08-01", + "validity_end": "2024-07-31" + } } -} \ No newline at end of file +} +``` \ No newline at end of file diff --git a/level_2/api.py b/level_2/api.py index d2cc74e6c..2e5a8e9fe 100644 --- a/level_2/api.py +++ b/level_2/api.py @@ -1,3 +1,5 @@ +from io import BytesIO + from langchain.document_loaders import PyPDFLoader from level_2_pdf_vectorstore__dlt_contracts import Memory @@ -27,7 +29,7 @@ from dotenv import load_dotenv load_dotenv() - +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") app = FastAPI(debug=True) @@ -63,82 +65,73 @@ def health_check(): #curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/ -from fastapi import FastAPI, UploadFile, File -import requests -import os -import json - -app = FastAPI() - - -from io import BytesIO - class Payload(BaseModel): payload: Dict[str, Any] -@app.post("/upload/", response_model=dict) -async def upload_pdf_and_payload( - payload: Payload, - # files: List[UploadFile] = File(...), -): - try: - # Process the payload - decoded_payload = payload.payload - # except: - # pass - # - # return JSONResponse(content={"response": decoded_payload}, status_code=200) - - # Download the remote PDF if URL is provided - if 'pdf_url' in decoded_payload: - pdf_response = requests.get(decoded_payload['pdf_url']) - pdf_content = pdf_response.content - - logging.info("Downloaded PDF from URL") - - # Create an in-memory file-like object for the PDF content - pdf_stream = BytesIO(pdf_content) - - contents = pdf_stream.read() - - tmp_location = os.path.join('/tmp', "tmp.pdf") - with open(tmp_location, 'wb') as tmp_file: - tmp_file.write(contents) - - logging.info("Wrote PDF from URL") - - # Process the PDF using PyPDFLoader - loader = PyPDFLoader(tmp_location) - pages = loader.load_and_split() - logging.info(" PDF split into pages") - Memory_ = Memory(index_name="my-agent", user_id='555' ) - await Memory_.async_init() - Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages) - - - # Run the buffer - response = Memory_._run_buffer(user_input="I want to get a schema for my data") - return JSONResponse(content={"response": response}, status_code=200) - - #to do: add the user id to the payload - #to do add the raw pdf to payload - # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt']) - # print(bb) - - - except Exception as e: - - return {"error": str(e)} - # Here you can perform your processing on the PDF contents - # results.append({"filename": file.filename, "size": len(contents)}) - - # Append the in-memory file to the files list - # files.append(UploadFile(pdf_stream, filename="downloaded.pdf")) - +# @app.post("/upload/", response_model=dict) +# async def upload_pdf_and_payload( +# payload: Payload, +# # files: List[UploadFile] = File(...), +# ): +# try: +# # Process the payload +# decoded_payload = payload.payload +# # except: +# # pass +# # +# # return JSONResponse(content={"response": decoded_payload}, status_code=200) +# +# # Download the remote PDF if URL is provided +# if 'pdf_url' in decoded_payload: +# pdf_response = requests.get(decoded_payload['pdf_url']) +# pdf_content = pdf_response.content +# +# logging.info("Downloaded PDF from URL") +# +# # Create an in-memory file-like object for the PDF content +# pdf_stream = BytesIO(pdf_content) +# +# contents = pdf_stream.read() +# +# tmp_location = os.path.join('/tmp', "tmp.pdf") +# with open(tmp_location, 'wb') as tmp_file: +# tmp_file.write(contents) +# +# logging.info("Wrote PDF from URL") +# +# # Process the PDF using PyPDFLoader +# loader = PyPDFLoader(tmp_location) +# pages = loader.load_and_split() +# logging.info(" PDF split into pages") +# Memory_ = Memory(index_name="my-agent", user_id='555' ) +# await Memory_.async_init() +# Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages) +# +# +# # Run the buffer +# response = Memory_._run_buffer(user_input="I want to get a schema for my data") +# return JSONResponse(content={"response": response}, status_code=200) +# +# #to do: add the user id to the payload +# #to do add the raw pdf to payload +# # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt']) +# # print(bb) +# +# +# except Exception as e: +# +# return {"error": str(e)} +# # Here you can perform your processing on the PDF contents +# # results.append({"filename": file.filename, "size": len(contents)}) +# +# # Append the in-memory file to the files list +# # files.append(UploadFile(pdf_stream, filename="downloaded.pdf")) +# def memory_factory(memory_type): + load_dotenv() class Payload(BaseModel): payload: Dict[str, Any] @app.post("/{memory_type}/add-memory", response_model=dict) @@ -148,23 +141,47 @@ def memory_factory(memory_type): ): try: + logging.info(" Init PDF processing") + decoded_payload = payload.payload - Memory_ = Memory( user_id='555') + if 'pdf_url' in decoded_payload: + pdf_response = requests.get(decoded_payload['pdf_url']) + pdf_content = pdf_response.content - await Memory_.async_init() + logging.info("Downloaded PDF from URL") - memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None) - output= memory_class(observation=decoded_payload['prompt']) - return JSONResponse(content={"response": output}, status_code=200) + # Create an in-memory file-like object for the PDF content + pdf_stream = BytesIO(pdf_content) + + contents = pdf_stream.read() + + tmp_location = os.path.join('/tmp', "tmp.pdf") + with open(tmp_location, 'wb') as tmp_file: + tmp_file.write(contents) + + logging.info("Wrote PDF from URL") + + # Process the PDF using PyPDFLoader + loader = PyPDFLoader(tmp_location) + # pages = loader.load_and_split() + logging.info(" PDF split into pages") + + Memory_ = Memory(user_id=decoded_payload['user_id']) + + await Memory_.async_init() + + memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None) + output= await memory_class(observation=str(loader), params =decoded_payload['params']) + return JSONResponse(content={"response": output}, status_code=200) except Exception as e: return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) @app.post("/{memory_type}/fetch-memory", response_model=dict) - async def add_memory( + async def fetch_memory( payload: Payload, # files: List[UploadFile] = File(...), ): @@ -172,7 +189,7 @@ def memory_factory(memory_type): decoded_payload = payload.payload - Memory_ = Memory(user_id='555') + Memory_ = Memory(user_id=decoded_payload['user_id']) await Memory_.async_init() @@ -185,7 +202,7 @@ def memory_factory(memory_type): return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) @app.post("/{memory_type}/delete-memory", response_model=dict) - async def add_memory( + async def delete_memory( payload: Payload, # files: List[UploadFile] = File(...), ): @@ -193,7 +210,7 @@ def memory_factory(memory_type): decoded_payload = payload.payload - Memory_ = Memory(user_id='555') + Memory_ = Memory(user_id=decoded_payload['user_id']) await Memory_.async_init() @@ -210,6 +227,71 @@ for memory_type in memory_list: memory_factory(memory_type) + +@app.get("/available-buffer-actions", response_model=dict) +async def available_buffer_actions( + payload: Payload, + # files: List[UploadFile] = File(...), +): + try: + + decoded_payload = payload.payload + + Memory_ = Memory(user_id=decoded_payload['user_id']) + + await Memory_.async_init() + + # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None) + output = await Memory_._available_operations() + return JSONResponse(content={"response": output}, status_code=200) + + except Exception as e: + + return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) + +@app.post("/run-buffer", response_model=dict) +async def available_buffer_actions( + payload: Payload, + # files: List[UploadFile] = File(...), +): + try: + + decoded_payload = payload.payload + + Memory_ = Memory(user_id=decoded_payload['user_id']) + + await Memory_.async_init() + + # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None) + output = await Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params']) + return JSONResponse(content={"response": output}, status_code=200) + + except Exception as e: + + return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) + +@app.post("/buffer/create-context", response_model=dict) +async def available_buffer_actions( + payload: Payload, + # files: List[UploadFile] = File(...), +): + try: + + decoded_payload = payload.payload + + Memory_ = Memory(user_id=decoded_payload['user_id']) + + await Memory_.async_init() + + # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None) + output = await Memory_._create_buffer_context(user_input=decoded_payload['prompt'], params=decoded_payload['params']) + return JSONResponse(content={"response": output}, status_code=200) + + except Exception as e: + + return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) + + # # # Process each uploaded PDF file # results = [] diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py index 1f8b1cdf9..85598728c 100644 --- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py +++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py @@ -1,4 +1,4 @@ -#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client +# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client import dlt from langchain import PromptTemplate, LLMChain @@ -13,9 +13,10 @@ import asyncio from typing import Any, Dict, List, Coroutine from deep_translator import (GoogleTranslator) from langchain.chat_models import ChatOpenAI +from langchain.output_parsers import PydanticOutputParser from langchain.schema import LLMResult, HumanMessage from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler - +from pydantic import BaseModel, Field, parse_obj_as from langchain.memory import VectorStoreRetrieverMemory from marvin import ai_classifier from enum import Enum @@ -29,16 +30,17 @@ from langchain.tools import tool from langchain.vectorstores import Weaviate import uuid from dotenv import load_dotenv + load_dotenv() from pathlib import Path from langchain import OpenAI, LLMMathChain from langchain.chat_models import ChatOpenAI from langchain.prompts import ChatPromptTemplate +import uuid +from typing import Optional -import os - from datetime import datetime import os from datetime import datetime @@ -56,12 +58,19 @@ from langchain.schema import Document, SystemMessage, HumanMessage from langchain.vectorstores import Weaviate import weaviate import uuid +import humanize +import pinecone +import weaviate load_dotenv() +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + class MyCustomSyncHandler(BaseCallbackHandler): def on_llm_new_token(self, token: str, **kwargs) -> None: print(f"Sync handler being called in a `thread_pool_executor`: token: {token}") + + class MyCustomAsyncHandler(AsyncCallbackHandler): """Async callback handler that can be used to handle callbacks from langchain.""" @@ -80,58 +89,67 @@ class MyCustomAsyncHandler(AsyncCallbackHandler): await asyncio.sleep(0.3) print("Hi! I just woke up. Your llm is ending") + + + +# Assuming OpenAIEmbeddings and other necessary imports are available + +# Default Values +LTM_MEMORY_ID_DEFAULT = '00000' +ST_MEMORY_ID_DEFAULT = '0000' +BUFFER_ID_DEFAULT = '0000' + + +class VectorDBFactory: + def create_vector_db(self, user_id: str, index_name: str, memory_id: str, + ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT, + st_memory_id: str = ST_MEMORY_ID_DEFAULT, + buffer_id: str = BUFFER_ID_DEFAULT, + db_type: str = "pinecone", + namespace: str = None): + db_map = { + "pinecone": PineconeVectorDB, + "weaviate": WeaviateVectorDB + } + + if db_type in db_map: + return db_map[db_type](user_id, index_name, memory_id, ltm_memory_id, st_memory_id, buffer_id, namespace) + + raise ValueError(f"Unsupported database type: {db_type}") + + class VectorDB: - OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0)) OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") - def __init__(self, user_id: str, index_name: str, memory_id:str, ltm_memory_id:str='00000', st_memory_id:str='0000', buffer_id:str='0000', db_type: str = "pinecone", namespace:str = None): + def __init__(self, user_id: str, index_name: str, memory_id: str, + ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT, + st_memory_id: str = ST_MEMORY_ID_DEFAULT, + buffer_id: str = BUFFER_ID_DEFAULT, namespace: str = None): self.user_id = user_id self.index_name = index_name - self.db_type = db_type - self.namespace=namespace + self.namespace = namespace self.memory_id = memory_id self.ltm_memory_id = ltm_memory_id self.st_memory_id = st_memory_id self.buffer_id = buffer_id - # if self.db_type == "pinecone": - # self.vectorstore = self.init_pinecone(self.index_name) - if self.db_type == "weaviate": - self.init_weaviate(namespace=self.namespace) - else: - raise ValueError(f"Unsupported database type: {db_type}") - if self.db_type == "weaviate": - self.init_weaviate_client(namespace=self.namespace) - else: - raise ValueError(f"Unsupported VectorDB client type: {db_type}") - load_dotenv() + + +class PineconeVectorDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_pinecone(self.index_name) def init_pinecone(self, index_name): - load_dotenv() - PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "") - PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "") - pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV) - pinecone.Index(index_name) - vectorstore: Pinecone = Pinecone.from_existing_index( + # Pinecone initialization logic + pass - index_name=self.index_name, - embedding=OpenAIEmbeddings(), - namespace='RESULT' - ) - return vectorstore - def init_weaviate_client(self, namespace:str): - embeddings = OpenAIEmbeddings() - auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY')) - client = weaviate.Client( - url=os.environ.get('WEAVIATE_URL'), - auth_client_secret=auth_config, +class WeaviateVectorDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_weaviate(self.namespace) - additional_headers={ - "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY') - } - ) - return client - - def init_weaviate(self, namespace:str): + def init_weaviate(self, namespace: str): + # Weaviate initialization logic embeddings = OpenAIEmbeddings() auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY')) client = weaviate.Client( @@ -150,99 +168,131 @@ class VectorDB: embedding=embeddings, create_schema_if_missing=True, ) - return retriever + return retriever # If this is part of the initialization, call it here. - async def add_memories(self, observation: str, page: str = "", source: str = ""): - if self.db_type == "pinecone": - # Update Pinecone memories here - vectorstore: Pinecone = Pinecone.from_existing_index( - index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace - ) + def init_weaviate_client(self, namespace: str): + # Weaviate client initialization logic + auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY')) + client = weaviate.Client( + url=os.environ.get('WEAVIATE_URL'), + auth_client_secret=auth_config, + additional_headers={ + "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY') + } + ) + return client + async def add_memories(self, observation: str, params: dict = None): - retriever = vectorstore.as_retriever() - retriever.add_documents( - [ - Document( - page_content=observation, - metadata={ - "inserted_at": datetime.now(), - "text": observation, - "user_id": self.user_id, - "page": page, - "source": source - }, - namespace=self.namespace, - ) - ] - ) - elif self.db_type == "weaviate": - # Update Weaviate memories here - print(self.namespace) - retriever = self.init_weaviate( self.namespace) + # Update Weaviate memories here + print(self.namespace) + retriever = self.init_weaviate(self.namespace) + return retriever.add_documents([ + Document( + metadata={ + "text": observation, + "user_id": str(self.user_id), + "memory_id": str(self.memory_id), + "ltm_memory_id": str(self.ltm_memory_id), + "st_memory_id": str(self.st_memory_id), + "buffer_id": str(self.buffer_id), + "version": params.get('version', None) or "", + "agreement_id": params.get('agreement_id', None) or "", + "privacy_policy": params.get('privacy_policy', None) or "", + "terms_of_service": params.get('terms_of_service', None) or "", + "format": params.get('format', None) or "", + "schema_version": params.get('schema_version', None) or "", + "checksum": params.get('checksum', None) or "", + "owner": params.get('owner', None) or "", + "license": params.get('license', None) or "", + "validity_start": params.get('validity_start', None) or "", + "validity_end": params.get('validity_end', None) or "" - return retriever.add_documents([ - Document( - metadata={ - "text": observation, - "user_id": str(self.user_id), - "memory_id": str(self.memory_id), - "ltm_memory_id": str(self.ltm_memory_id), - "st_memory_id": str(self.st_memory_id), - "buffer_id": str(self.buffer_id), + # **source_metadata, + }, + page_content=observation, + )] + ) - # **source_metadata, - }, - page_content=observation, - )] - ) # def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore: # return Pinecone.from_existing_index( # index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace # ) - async def fetch_memories(self, observation: str, namespace:str, params = None): - if self.db_type == "pinecone": - # Fetch Pinecone memories here - pass - elif self.db_type == "weaviate": - # Fetch Weaviate memories here - """ - Get documents from weaviate. + async def fetch_memories(self, observation: str, namespace: str, params: dict = None): - Args a json containing: - query (str): The query string. - path (list): The path for filtering, e.g., ['year']. - operator (str): The operator for filtering, e.g., 'Equal'. - valueText (str): The value for filtering, e.g., '2017*'. + # Fetch Weaviate memories here + """ + Get documents from weaviate. - Example: - get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*') - """ - client = self.init_weaviate_client(self.namespace) + Args a json containing: + query (str): The query string. + path (list): The path for filtering, e.g., ['year']. + operator (str): The operator for filtering, e.g., 'Equal'. + valueText (str): The value for filtering, e.g., '2017*'. - print(self.namespace) - print(str(datetime.now())) - print(observation) - if namespace is None: - namespace= self.namespace + Example: + get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*') + """ + client = self.init_weaviate_client(self.namespace) - params_user_id= { - "path": ["user_id"], - "operator": "Like", - "valueText": self.user_id - } + print(self.namespace) + print(str(datetime.now())) + print(observation) + if namespace is None: + namespace = self.namespace - if params: - query_output = client.query.get(namespace, ["text","user_id", "memory_id", "ltm_memory_id", "st_memory_id", "buffer_id"]).with_where(params).with_additional(['id','creationTimeUnix','lastUpdateTimeUnix']).with_where(params_user_id).do() - return query_output - else: - query_output = client.query.get(namespace, ["text","user_id", "memory_id", "ltm_memory_id","st_memory_id", "buffer_id"]).with_additional(['id','creationTimeUnix','lastUpdateTimeUnix']).with_where(params_user_id).do() - return query_output + params_user_id = { + "path": ["user_id"], + "operator": "Like", + "valueText": self.user_id + } - async def delete_memories(self, params:dict = None): + if params: + query_output = client.query.get(namespace, ["text" + , "user_id" + , "memory_id" + , "ltm_memory_id" + , "st_memory_id" + , "buffer_id" + , "version", + "agreement_id", + "privacy_policy", + "terms_of_service", + "format", + "schema_version", + "checksum", + "owner", + "license", + "validity_start", + "validity_end"]).with_where(params).with_additional( + ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do() + return query_output + else: + query_output = client.query.get(namespace, ["text", + "user_id", + "memory_id", + "ltm_memory_id", + "st_memory_id", + "buffer_id", + "version", + "agreement_id", + "privacy_policy", + "terms_of_service", + "format", + "schema_version", + "checksum", + "owner", + "license", + "validity_start", + "validity_end" + ]).with_additional( + ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do() + return query_output + + async def delete_memories(self, params: dict = None): client = self.init_weaviate_client(self.namespace) if params: where_filter = { @@ -256,8 +306,8 @@ class VectorDB: where=where_filter, ) else: - #Delete all objects - + # Delete all objects + print("HERE IS THE USER ID", self.user_id) return client.batch.delete_objects( class_name=self.namespace, where={ @@ -267,8 +317,9 @@ class VectorDB: } ) - def update_memories(self, observation, namespace:str,params:dict = None): + def update_memories(self, observation, namespace: str, params: dict = None): client = self.init_weaviate_client(self.namespace) + client.data_object.update( data_object={ "text": observation, @@ -277,6 +328,17 @@ class VectorDB: "ltm_memory_id": str(self.ltm_memory_id), "st_memory_id": str(self.st_memory_id), "buffer_id": str(self.buffer_id), + "version": params.get('version', None) or "", + "agreement_id": params.get('agreement_id', None) or "", + "privacy_policy": params.get('privacy_policy', None) or "", + "terms_of_service": params.get('terms_of_service', None) or "", + "format": params.get('format', None) or "", + "schema_version": params.get('schema_version', None) or "", + "checksum": params.get('checksum', None) or "", + "owner": params.get('owner', None) or "", + "license": params.get('license', None) or "", + "validity_start": params.get('validity_start', None) or "", + "validity_end": params.get('validity_end', None) or "" # **source_metadata, @@ -287,451 +349,412 @@ class VectorDB: ) return - - -class SemanticMemory: - def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="SEMANTICMEMORY"): - # Add any semantic memory-related attributes or setup here - self.user_id=user_id +class BaseMemory: + def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str, namespace: str): + self.user_id = user_id + self.memory_id = memory_id self.index_name = index_name self.namespace = namespace - self.semantic_memory_id = str(uuid.uuid4()) - self.memory_id = memory_id - self.ltm_memory_id = ltm_memory_id - self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace) + self.memory_type_id = str(uuid.uuid4()) self.db_type = db_type + factory = VectorDBFactory() + self.vector_db = factory.create_vector_db(self.user_id, self.index_name, self.memory_id, db_type=self.db_type, + namespace=self.namespace) - - - async def _add_memories(self, semantic_memory: str="None") ->list[str]: - """Update semantic memory for the user""" - + async def add_memories(self, observation: Optional[str] = None, params: Optional[dict] = None): if self.db_type == "weaviate": - out = await self.vector_db.add_memories( observation = semantic_memory) - return out - - elif self.db_type == "pinecone": - pass - - - async def _fetch_memories(self, observation: str,params:str=None) -> Coroutine[Any, Any, Any]: - """Fetch related characteristics, preferences or dislikes for a user.""" - # self.init_pinecone(index_name=self.index) + return await self.vector_db.add_memories(observation=observation, params=params) + # Add other db_type conditions if necessary + async def fetch_memories(self, observation: str, params: Optional[str] = None): if self.db_type == "weaviate": - return await self.vector_db.fetch_memories(observation, params) - elif self.db_type == "pinecone": - pass - - async def _delete_memories(self,params:str=None) -> Coroutine[Any, Any, Any]: - """Fetch related characteristics, preferences or dislikes for a user.""" - # self.init_pinecone(index_name=self.index) - + async def delete_memories(self, params: Optional[str] = None): if self.db_type == "weaviate": + return await self.vector_db.delete_memories(params) - return await self.vector_db.delete_memories( params=params) - - elif self.db_type == "pinecone": - pass - -class EpisodicMemory: - def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="EPISODICMEMORY"): - # Add any semantic memory-related attributes or setup here - self.user_id=user_id - self.index_name = index_name - self.namespace = namespace - self.episodic_memory_id = str(uuid.uuid4()) - self.memory_id = memory_id - self.ltm_memory_id = ltm_memory_id - self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace) - self.db_type = db_type + # Additional methods for specific Memory can be added here - - async def _add_memories(self ,observation:str=None) -> list[str]: - """Update semantic memory for the user""" - - if self.db_type == "weaviate": - return await self.vector_db.add_memories( observation = observation) - - elif self.db_type == "pinecone": - pass +class SemanticMemory(BaseMemory): + def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"): + super().__init__(user_id, memory_id, index_name, db_type, namespace="SEMANTICMEMORY") - def _fetch_memories(self, observation: str,params:str=None) -> Coroutine[Any, Any, Any]: - """Fetch related characteristics, preferences or dislikes for a user.""" - # self.init_pinecone(index_name=self.index) - - if self.db_type == "weaviate": - - return self.vector_db.fetch_memories(observation, params) - - elif self.db_type == "pinecone": - pass - async def _delete_memories(self, params:str=None) -> Coroutine[Any, Any, Any]: - """Fetch related characteristics, preferences or dislikes for a user.""" - # self.init_pinecone(index_name=self.index) - - if self.db_type == "weaviate": - - return await self.vector_db.delete_memories( params=params) - - elif self.db_type == "pinecone": - pass - -class LongTermMemory: - def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"): - self.user_id = user_id - self.memory_id = memory_id - self.ltm_memory_id = str(uuid.uuid4()) - self.index_name = index_name - self.namespace = namespace - self.db_type = db_type - # self.episodic_memory = EpisodicMemory() - self.semantic_memory = SemanticMemory(user_id = self.user_id, memory_id=self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=self.index_name, db_type=self.db_type) - self.episodic_memory = EpisodicMemory(user_id=self.user_id, memory_id=self.memory_id, - ltm_memory_id=self.ltm_memory_id, index_name=self.index_name, - db_type=self.db_type) +class EpisodicMemory(BaseMemory): + def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"): + super().__init__(user_id, memory_id, index_name, db_type, namespace="EPISODICMEMORY") -class ShortTermMemory: - def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"): - # Add any short-term memory-related attributes or setup here - self.user_id = user_id - self.memory_id = memory_id - self.namespace = namespace - self.db_type = db_type - self.stm_memory_id = str(uuid.uuid4()) - self.index_name = index_name - self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type) +class EpisodicBuffer(BaseMemory): + def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"): + super().__init__(user_id, memory_id, index_name, db_type, namespace="BUFFERMEMORY") - - -class EpisodicBuffer: - def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str='EPISODICBUFFER', db_type:str="weaviate"): - # Add any short-term memory-related attributes or setup here - self.user_id = user_id - self.memory_id = memory_id - self.namespace = namespace - self.db_type = db_type self.st_memory_id = "blah" - self.index_name = index_name - self.llm= ChatOpenAI( + self.llm = ChatOpenAI( temperature=0.0, max_tokens=1200, openai_api_key=os.environ.get('OPENAI_API_KEY'), model_name="gpt-4-0613", callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()], ) + self.llm_base = OpenAI( + temperature=0.0, + max_tokens=1200, + openai_api_key=os.environ.get('OPENAI_API_KEY'), + model_name="gpt-4-0613" + ) - # self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace) - - - def _compute_weights(self, context: str): - """Computes the weights for the buffer""" - pass - - def _temporal_weighting(self, context: str): - """Computes the temporal weighting for the buffer""" - pass - - # async def infer_schema_from_text(self, text: str): - # """Infer schema from text""" - # - # prompt_ = """ You are a json schema master. Create a JSON schema based on the following data and don't write anything else: {prompt} """ - # - # complete_query = PromptTemplate( - # input_variables=["prompt"], - # template=prompt_, - # ) - # - # chain = LLMChain( - # llm=self.llm, prompt=complete_query, verbose=True - # ) - # chain_result = chain.run(prompt=text).strip() - # - # json_data = json.dumps(chain_result) - # return json_data - - async def _fetch_memories(self, observation: str,namespace:str) -> str: - vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id, - index_name=self.index_name, db_type=self.db_type, namespace=namespace) - - - query = await vector_db.fetch_memories(observation=observation) - return query - - async def _add_memories(self, observation: str,namespace:str): - vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id, - index_name=self.index_name, db_type=self.db_type, namespace=namespace) - - - query = await vector_db.add_memories(observation) - return query - - async def _delete_memories(self, params:str=None) -> Coroutine[Any, Any, Any]: - """Fetch related characteristics, preferences or dislikes for a user.""" - # self.init_pinecone(index_name=self.index) - - if self.db_type == "weaviate": - - return await self.vector_db.delete_memories( params=params) - - elif self.db_type == "pinecone": - pass - - async def freshness(self, observation: str,namespace:str) -> str: - """Freshness - Score between 1 and 5 on how often was the information processed in episodic memory in the past""" + async def freshness(self, observation: str, namespace: str = None) -> list[str]: + """Freshness - Score between 1 and 5 on how often was the information updated in episodic or semantic memory in the past""" memory = Memory(user_id=self.user_id) await memory.async_init() - # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ") - # print(gg) + lookup_value = await memory._fetch_episodic_memory(observation=observation) + unix_t = lookup_value["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"] - ggur = await memory._fetch_episodic_memory(observation="bla bla bla") - print(ggur) - # @ai_classifier - # class MemoryRoute(Enum): - # """Represents classifer for semantic fetching of memories""" - # - # storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY" - # raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER" - # raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY" - # long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY" - # raw_information_to_store_as_events = "EVENTBUFFER" - # - # namespace= MemoryRoute(observation) + # Convert Unix timestamp to datetime + last_update_datetime = datetime.fromtimestamp(int(unix_t) / 1000) + time_difference = datetime.now() - last_update_datetime + time_difference_text = humanize.naturaltime(time_difference) + marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY') - return ggur + @ai_classifier + class MemoryRoute(Enum): + """Represents classifer for freshness of memories""" - async def encoding(self, document: str, namespace: str = "EPISODICBUFFER") -> None: + data_uploaded_now = "0" + data_uploaded_very_recently = "1" + data_uploaded_recently = "2" + data_uploaded_more_than_a_month_ago = "3" + data_uploaded_more_than_three_months_ago = "4" + data_uploaded_more_than_six_months_ago = "5" + + namespace = MemoryRoute(str(time_difference_text)) + return [namespace.value, lookup_value] + + async def frequency(self, observation: str, namespace: str) -> list[str]: + """Frequency - Score between 1 and 5 on how often was the information processed in episodic memory in the past + Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory """ + client = self.init_weaviate_client(self.namespace) + + memory = Memory(user_id=self.user_id) + await memory.async_init() + + result_output = await memory._fetch_episodic_memory(observation=observation) + number_of_relevant_events = len(result_output["data"]["Get"]["EPISODICMEMORY"]) + number_of_total_events = client.query.aggregate(self.namespace).with_meta_count().do() + frequency = float(number_of_relevant_events) / float(number_of_total_events) + return [str(frequency), result_output["data"]["Get"]["EPISODICMEMORY"][0]] + + async def relevance(self, observation: str) -> list[str]: + """Relevance - Score between 1 and 5 on how often was the final information relevant to the user in the past. + Stored in the episodic memory, mainly to show how well a buffer did the job + Starts at 1, gets updated based on the user feedback """ + + return ["5", "memory"] + + async def saliency(self, observation: str) -> list[str]: + """Determines saliency by finding relevance between user input and document schema values. + After finding document schena value relevant for the user, it forms a new query based on the schema value and the user input """ + + return ["5", "memory"] + + # @ai_classifier + # class MemoryRoute(Enum): + # """Represents classifer for freshness of memories""" + # + # data_uploaded_now = "0" + # data_uploaded_very_recently = "1" + # data_uploaded_recently = "2" + # data_uploaded_more_than_a_month_ago = "3" + # data_uploaded_more_than_three_months_ago = "4" + # data_uploaded_more_than_six_months_ago = "5" + # + # namespace= MemoryRoute(observation) + + # return ggur + + async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params: dict = None) -> list[str]: """Encoding for the buffer, stores raw data in the buffer Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer""" - vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id, - index_name=self.index_name, db_type=self.db_type, namespace=namespace) - - query = await vector_db.add_memories(document) + query = await self.add_memories(document, params=params) return query - async def main_buffer(self, user_input=None, content=None): - """AI buffer to convert unstructured data to structured data""" - # Here we define the user prompt and the structure of the output we desire - # prompt = output[0].page_content + async def available_operations(self) -> list[str]: + """Determines what operations are available for the user to process PDFs""" + + return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory", + "load to buffer"] + + async def buffer_context(self, user_input=None, content=None, params=None): + """Generates the context to be used for the buffer and passed to the agent""" + # we get a list of available operations for our buffer to consider + # these operations are what we can do with the data, in the context of PDFs (load, translate, structure, etc) + list_of_operations = await self.available_operations() + + try: + # we delete all memories in the episodic buffer, so we can start fresh + await self.delete_memories() + except: + # in case there are no memories, we pass + pass + + # we just filter the data here to make sure input is clean + prompt_filter = ChatPromptTemplate.from_template( + "Filter and remove uneccessary information that is not relevant in the user query, keep it as original as possbile: {query}") + chain_filter = prompt_filter | self.llm + output = await chain_filter.ainvoke({"query": user_input}) + + # this part is mostly unfinished but the idea is to apply different algorithms to the data to fetch the most relevant information from the vector stores + context = [] + if params: + + if "freshness" in params: + params.get('freshness', None) # get the value of freshness + freshness = await self.freshness(observation=str(output)) + context.append(freshness) + + elif "frequency" in params: + params.get('freshness', None) + frequency = await self.freshness(observation=str(output)) + print("freshness", frequency) + context.append(frequency) + + # fix this so it actually filters + else: + # defaults to semantic search if we don't want to apply algorithms on the vectordb data + memory = Memory(user_id=self.user_id) + await memory.async_init() + + lookup_value_episodic = await memory._fetch_episodic_memory(observation=str(output)) + lookup_value_semantic = await memory._fetch_semantic_memory(observation=str(output)) + lookup_value_buffer = await self.fetch_memories(observation=str(output)) + + context.append(lookup_value_buffer) + context.append(lookup_value_semantic) + context.append(lookup_value_episodic) + + # copy the context over into the buffer + # do i need to do it for the episodic + raw data, might make sense + print("HERE IS THE CONTEXT", context) + + class BufferRawContextTerms(BaseModel): + """Schema for documentGroups""" + semantic_search_term: str = Field(..., + description="The search term to use to get relevant input based on user query") + document_description: str = Field(None, description="The short summary of what the document is about") + document_relevance: str = Field(None, + description="The relevance of the document for the task on the scale from 1 to 5") + + class BufferRawContextList(BaseModel): + """Buffer raw context processed by the buffer""" + docs: List[BufferRawContextTerms] = Field(..., description="List of docs") + user_query: str = Field(..., description="The original user query") + + parser = PydanticOutputParser(pydantic_object=BufferRawContextList) + + prompt = PromptTemplate( + template="Summarize and create semantic search queries and relevant document summaries for the user query.\n{format_instructions}\nOriginal query is: {query}\n Retrieved context is: {context}", + input_variables=["query", "context"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + + _input = prompt.format_prompt(query=user_input, context=context) + + document_context_result = self.llm_base(_input.to_string()) + + document_context_result_parsed = parser.parse(document_context_result) + + print("HERE ARE THE DOCS PARSED AND STRUCTURED", document_context_result_parsed) + + class Task(BaseModel): + """Schema for an individual task.""" + task_order: str = Field(..., description="The order at which the task needs to be performed") + task_name: str = Field(None, description="The task that needs to be performed") + operation: str = Field(None, description="The operation to be performed") + original_query: str = Field(None, description="Original user query provided") + + class TaskList(BaseModel): + """Schema for the record containing a list of tasks.""" + tasks: List[Task] = Field(..., description="List of tasks") + + prompt_filter_chunk = f"The raw context data is {str(document_context_result_parsed)} Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}" + # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList) + # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations}) + prompt_msgs = [ + SystemMessage( + content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones" + ), + HumanMessage(content="Decompose based on the following prompt:"), + HumanMessagePromptTemplate.from_template("{input}"), + HumanMessage(content="Tips: Make sure to answer in the correct format"), + HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others") + + ] + prompt_ = ChatPromptTemplate(messages=prompt_msgs) + chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True) + from langchain.callbacks import get_openai_callback + with get_openai_callback() as cb: + output = await chain.arun(input=prompt_filter_chunk, verbose=True) + print(cb) + # output = json.dumps(output) + my_object = parse_obj_as(TaskList, output) + print("HERE IS THE OUTPUT", my_object.json()) + + data = json.loads(my_object.json()) + + # Extract the list of tasks + tasks_list = data["tasks"] + return tasks_list + + async def main_buffer(self, user_input=None, content=None, params=None): + + """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations""" + + memory = Memory(user_id=self.user_id) + await memory.async_init() + tasks_list = await self.buffer_context(user_input=user_input, content=content, params=params) + result_tasks = [] + + for task in tasks_list: + class PromptWrapper(BaseModel): + observation: str = Field( + description="observation we want to fetch from vectordb" + ) + + @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True) + def convert_to_structured(observation=None, json_schema=None): + """Convert unstructured data to structured data""" + BASE_DIR = os.getcwd() + json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json") + + def load_json_or_infer_schema(file_path, document_path): + """Load JSON schema from file or infer schema from text""" + + # Attempt to load the JSON file + with open(file_path, 'r') as file: + json_schema = json.load(file) + return json_schema + + json_schema = load_json_or_infer_schema(json_path, None) + + def run_open_ai_mapper(observation=None, json_schema=None): + """Convert unstructured data to structured data""" + + prompt_msgs = [ + SystemMessage( + content="You are a world class algorithm converting unstructured data into structured data." + ), + HumanMessage(content="Convert unstructured data to structured data:"), + HumanMessagePromptTemplate.from_template("{input}"), + HumanMessage(content="Tips: Make sure to answer in the correct format"), + ] + prompt_ = ChatPromptTemplate(messages=prompt_msgs) + chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, + verbose=True) + output = chain_funct.run(input=observation, llm=self.llm) + return output + + result = run_open_ai_mapper(observation, json_schema) + return result + + class TranslateText(BaseModel): + observation: str = Field( + description="observation we want to translate" + ) + + @tool("translate_to_de", args_schema=TranslateText, return_direct=True) + def translate_to_de(observation, args_schema=TranslateText): + """Translate to English""" + out = GoogleTranslator(source='auto', target='de').translate(text=observation) + return out + + agent = initialize_agent( + llm=self.llm, + tools=[translate_to_de, convert_to_structured], + agent=AgentType.OPENAI_FUNCTIONS, + + verbose=True, + ) + print("HERE IS THE TASK", task) + output = agent.run(input=task) + print(output) + result_tasks.append(task) + result_tasks.append(output) + + print("HERE IS THE RESULT TASKS", str(result_tasks)) + + await self.encoding(str(result_tasks), self.namespace, params=params) + + buffer_result = await self.fetch_memories(observation=str(output)) + + print("HERE IS THE RESULT TASKS", str(buffer_result)) + + class EpisodicTask(BaseModel): + """Schema for an individual task.""" + task_order: str = Field(..., description="The order at which the task needs to be performed") + task_name: str = Field(None, description="The task that needs to be performed") + operation: str = Field(None, description="The operation to be performed") + operation_result: str = Field(None, description="The result of the operation") + + class EpisodicList(BaseModel): + """Schema for the record containing a list of tasks.""" + tasks: List[EpisodicTask] = Field(..., description="List of tasks") + start_date: str = Field(..., description="The order at which the task needs to be performed") + end_date: str = Field(..., description="The order at which the task needs to be performed") + user_query: str = Field(..., description="The order at which the task needs to be performed") + + parser = PydanticOutputParser(pydantic_object=EpisodicList) + + prompt = PromptTemplate( + template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}", + input_variables=["query", "steps", "buffer"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + + _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result) + + # return "a few things to do like load episodic memory in a structured format" + + output = self.llm_base(_input.to_string()) + result_parsing = parser.parse(output) + print("here is the parsing result", result_parsing) + lookup_value = await memory._add_episodic_memory(observation=str(output), params=params) + await self.delete_memories() + return lookup_value - if content is not None: +class LongTermMemory: + def __init__(self, user_id: str = "676", memory_id: Optional[str] = None, index_name: Optional[str] = None, + db_type: str = "weaviate"): + self.user_id = user_id + self.memory_id = memory_id + self.ltm_memory_id = str(uuid.uuid4()) + self.index_name = index_name + self.db_type = db_type + self.semantic_memory = SemanticMemory(user_id, memory_id, index_name, db_type) + self.episodic_memory = EpisodicMemory(user_id, memory_id, index_name, db_type) - #We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer - output_translated = GoogleTranslator(source='auto', target='en').translate(text=content) - await self.encoding(output_translated) - freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER") - print(freshness_score) - # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0 - user_relevance_score ="0" - # similarity score between the user input and the content already available in the buffer + +class ShortTermMemory: + def __init__(self, user_id: str = "676", memory_id: Optional[str] = None, index_name: Optional[str] = None, + db_type: str = "weaviate"): + self.user_id = user_id + self.memory_id = memory_id + self.stm_memory_id = str(uuid.uuid4()) + self.index_name = index_name + self.db_type = db_type + self.episodic_buffer = EpisodicBuffer(user_id, memory_id, index_name, db_type) - # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}") - # chain_filter = prompt_filter | self.llm - # output = await chain_filter.ainvoke({"query": user_input}) - # print(output) - - if content is None: - # Sensory and Linguistic Processing - prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}") - chain_filter = prompt_filter | self.llm - output = await chain_filter.ainvoke({"query": user_input}) - translation = GoogleTranslator(source='auto', target='en').translate(text=output.content) - - - def top_down_processing(): - """Top-down processing""" - pass - - def bottom_up_processing(): - """Bottom-up processing""" - pass - - - def interactive_processing(): - """interactive processing""" - pass - - - - - - working_memory_activation = "bla" - - - prompt_chunk = ChatPromptTemplate.from_template("Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief") - chain_chunk = prompt_chunk | self.llm - output_chunks = await chain_chunk.ainvoke({"query": output.content}) - - - print(output_chunks.content) - - - - - - # vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False) - # retriever = WeaviateHybridSearchRetriever( - # client=client, - # index_name="EVENTBUFFER", - # text_key="text", - # attributes=[], - # embedding=embeddings, - # create_schema_if_missing=True, - # ) - - # vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id, - # index_name=self.index_name, db_type=self.db_type, namespace="EVENTBUFFER") - - # query = vector_db. - - - # retriever = vectorstore.as_retriever(search_kwargs=dict(k=1)) - # memory = VectorStoreRetrieverMemory(retriever=retriever) - # class PromptWrapper(BaseModel): - # observation: str = Field( - # description="observation we want to fetch from vectordb" - # ) - # # , - # # json_schema: str = Field(description="json schema we want to infer") - # @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True) - # def convert_to_structured( observation=None, json_schema=None): - # """Convert unstructured data to structured data""" - # BASE_DIR = os.getcwd() - # json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json") - # - # def load_json_or_infer_schema(file_path, document_path): - # """Load JSON schema from file or infer schema from text""" - # - # # Attempt to load the JSON file - # with open(file_path, 'r') as file: - # json_schema = json.load(file) - # return json_schema - # - # json_schema =load_json_or_infer_schema(json_path, None) - # def run_open_ai_mapper(observation=None, json_schema=None): - # """Convert unstructured data to structured data""" - # - # prompt_msgs = [ - # SystemMessage( - # content="You are a world class algorithm converting unstructured data into structured data." - # ), - # HumanMessage(content="Convert unstructured data to structured data:"), - # HumanMessagePromptTemplate.from_template("{input}"), - # HumanMessage(content="Tips: Make sure to answer in the correct format"), - # ] - # prompt_ = ChatPromptTemplate(messages=prompt_msgs) - # chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True) - # output = chain_funct.run(input=observation, llm=self.llm) - # yield output - # pipeline = dlt.pipeline(pipeline_name="train_ticket", destination='duckdb', dataset_name='train_ticket_data') - # info = pipeline.run(data=run_open_ai_mapper(prompt, json_schema)) - # return print(info) - # - # - # class GoalWrapper(BaseModel): - # observation: str = Field( - # description="observation we want to fetch from vectordb" - # ) - # - # @tool("fetch_memory_wrapper", args_schema=GoalWrapper, return_direct=True) - # def fetch_memory_wrapper(observation, args_schema=GoalWrapper): - # """Fetches data from the VectorDB and returns it as a python dictionary.""" - # print("HELLO, HERE IS THE OBSERVATION: ", observation) - # - # marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY') - # @ai_classifier - # class MemoryRoute(Enum): - # """Represents distinct routes for different memory types.""" - # - # storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY" - # raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER" - # raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY" - # long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY" - # raw_information_to_store_as_events = "EVENTBUFFER" - # - # namespace= MemoryRoute(observation) - # vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id, - # index_name=self.index_name, db_type=self.db_type, namespace=namespace.value) - # - # - # query = vector_db.fetch_memories(observation) - # - # return query - # - # class UpdatePreferences(BaseModel): - # observation: str = Field( - # description="observation we want to fetch from vectordb" - # ) - # - # @tool("add_memories_wrapper", args_schema=UpdatePreferences, return_direct=True) - # def add_memories_wrapper(observation, args_schema=UpdatePreferences): - # """Updates user preferences in the VectorDB.""" - # @ai_classifier - # class MemoryRoute(Enum): - # """Represents distinct routes for different memory types.""" - # - # storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY" - # raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER" - # raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY" - # long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY" - # raw_information_to_store_as_events = "EVENTBUFFER" - # - # namespace= MemoryRoute(observation) - # print("HELLO, HERE IS THE OBSERVATION 2: ") - # vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id, - # index_name=self.index_name, db_type=self.db_type, namespace=namespace.value) - # return vector_db.add_memories(observation) - # - # agent = initialize_agent( - # llm=self.llm, - # tools=[convert_to_structured,fetch_memory_wrapper, add_memories_wrapper], - # agent=AgentType.OPENAI_FUNCTIONS, - # - # verbose=True, - # ) - # - # prompt = """ - # Based on all the history and information of this user, decide based on user query query: {query} which of the following tasks needs to be done: - # 1. Memory retrieval , 2. Memory update, 3. Convert data to structured If the query is not any of these, then classify it as 'Other' - # Return the result in format: 'Result_type': 'Goal', "Original_query": "Original query" - # """ - # - # # template = Template(prompt) - # # output = template.render(query=user_input) - # # complete_query = output - # complete_query = PromptTemplate( - # input_variables=[ "query"], template=prompt - # ) - # summary_chain = LLMChain( - # llm=self.llm, prompt=complete_query, verbose=True - # ) - # from langchain.chains import SimpleSequentialChain - # - # overall_chain = SimpleSequentialChain( - # chains=[summary_chain, agent], verbose=True - # ) - # output = overall_chain.run(user_input) - # return output - - - - -#DEFINE STM -#DEFINE LTM class Memory: load_dotenv() @@ -739,7 +762,7 @@ class Memory: OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") def __init__(self, user_id: str = "676", index_name: str = None, knowledge_source: str = None, - knowledge_type: str = None, db_type:str="weaviate", namespace:str=None) -> None: + knowledge_type: str = None, db_type: str = "weaviate", namespace: str = None) -> None: self.user_id = user_id self.index_name = index_name self.db_type = db_type @@ -752,110 +775,147 @@ class Memory: load_dotenv() # Asynchronous factory function for creating LongTermMemory - async def async_create_long_term_memory(self,user_id, memory_id, index_name, namespace, db_type): + async def async_create_long_term_memory(self, user_id, memory_id, index_name, db_type): # Perform asynchronous initialization steps if needed return LongTermMemory( - user_id=user_id, memory_id=memory_id, index_name=index_name, - namespace=namespace, db_type=db_type - ) - async def async_init(self): - # Asynchronous initialization of LongTermMemory and ShortTermMemory - self.long_term_memory = await self.async_create_long_term_memory( user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, - namespace=self.namespace, db_type=self.db_type - ) - - async def async_create_short_term_memory(self, user_id, memory_id, index_name, db_type): - # Perform asynchronous initialization steps if needed - return ShortTermMemory( - user_id=user_id, memory_id=memory_id, index_name=index_name, db_type=db_type + db_type=self.db_type ) async def async_init(self): # Asynchronous initialization of LongTermMemory and ShortTermMemory self.long_term_memory = await self.async_create_long_term_memory( user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, - namespace=self.namespace, db_type=self.db_type + db_type=self.db_type ) self.short_term_memory = await self.async_create_short_term_memory( user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type ) + + async def async_create_short_term_memory(self, user_id, memory_id, index_name, db_type): + # Perform asynchronous initialization steps if needed + return ShortTermMemory( + user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type + ) + + # self.short_term_memory = await ShortTermMemory.async_init( # user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type # ) - async def _add_semantic_memory(self, semantic_memory:str): - return await self.long_term_memory.semantic_memory._add_memories( - semantic_memory=semantic_memory + async def _add_semantic_memory(self, semantic_memory: str, params: dict = None): + return await self.long_term_memory.semantic_memory.add_memories( + semantic_memory=semantic_memory, params=params ) async def _fetch_semantic_memory(self, observation, params): - return await self.long_term_memory.semantic_memory._fetch_memories( + return await self.long_term_memory.semantic_memory.fetch_memories( observation=observation, params=params - - ) - async def _delete_semantic_memory(self, params:str=None): - return await self.long_term_memory.semantic_memory._delete_memories( + + async def _delete_semantic_memory(self, params: str = None): + return await self.long_term_memory.semantic_memory.delete_memories( params=params ) - async def _add_episodic_memory(self, observation:str): - return await self.long_term_memory.episodic_memory._add_memories( - observation=observation + async def _add_episodic_memory(self, observation: str, params: dict = None): + return await self.long_term_memory.episodic_memory.add_memories( + observation=observation, params=params ) - async def _fetch_episodic_memory(self, observation, params:str=None): - return await self.long_term_memory.episodic_memory._fetch_memories( + async def _fetch_episodic_memory(self, observation, params: str = None): + return await self.long_term_memory.episodic_memory.fetch_memories( observation=observation, params=params ) - async def _delete_episodic_memory(self, params:str=None): - return await self.long_term_memory.episodic_memory._delete_memories( - params=params + async def _delete_episodic_memory(self, params: str = None): + return await self.long_term_memory.episodic_memory.delete_memories( + params=params ) - async def _run_buffer(self, user_input:str, content:str=None): - return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content) + async def _run_buffer(self, user_input: str, content: str = None, params:str=None): + return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content, params=params) + async def _add_buffer_memory(self, user_input: str, namespace: str = None, params: dict = None): + return await self.short_term_memory.episodic_buffer.add_memories(observation=user_input, + params=params) + async def _fetch_buffer_memory(self, user_input: str, namespace: str = None): + return await self.short_term_memory.episodic_buffer.fetch_memories(observation=user_input) - async def _add_buffer_memory(self, user_input: str, namespace: str = None ): - return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace) - - async def _fetch_buffer_memory(self, user_input: str, namespace: str = None ): - return await self.short_term_memory.episodic_buffer._fetch_memories(observation=user_input, namespace=namespace) - - async def _delete_buffer_memory(self, params:str=None): - return await self.long_term_memory.episodic_buffer._delete_memories( - params=params + async def _delete_buffer_memory(self, params: str = None): + return await self.short_term_memory.episodic_buffer.delete_memories( + params=params ) + async def _create_buffer_context(self, user_input: str, content: str = None, params:str=None): + return await self.short_term_memory.episodic_buffer.buffer_context( + user_input=user_input, content=content, params=params + ) + async def _available_operations(self): + return await self.long_term_memory.episodic_buffer.available_operations() + async def main(): memory = Memory(user_id="123") await memory.async_init() + params = { + "version": "1.0", + "agreement_id": "AG123456", + "privacy_policy": "https://example.com/privacy", + "terms_of_service": "https://example.com/terms", + "format": "json", + "schema_version": "1.1", + "checksum": "a1b2c3d4e5f6", + "owner": "John Doe", + "license": "MIT", + "validity_start": "2023-08-01", + "validity_end": "2024-07-31" + } - # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ") - # print(gg) - - gg = await memory._delete_episodic_memory() + gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ", params=params) print(gg) - # ggur = await memory._add_episodic_memory(observation = "bla bla bla") + # gg = await memory._fetch_buffer_memory(user_input="i TO GERMAN ") + # print(gg) + + episodic = """{ + "start_date": "2023-08-23", + "end_date": "2023-08-30", + "user_query": "How can I plan a healthy diet?", + "action_steps": [ + { + "step_number": 1, + "description": "Research and gather information about basic principles of a healthy diet." + }, + { + "step_number": 2, + "description": "Create a weekly meal plan that includes a variety of nutritious foods." + }, + { + "step_number": 3, + "description": "Prepare and cook meals according to your meal plan. Include fruits, vegetables, lean proteins, and whole grains." + } + ] + }""" + # + # ggur = await memory._delete_buffer_memory() # print(ggur) - # ggur = await memory._fetch_episodic_memory(observation = "bla bla bla") + # ggur = await memory._add_buffer_memory(user_input = episodic, params=params) # print(ggur) - # fff = await memory._fetch_memories_buffer(user_input = "bla bla bla", namespace="Test") - # print(fff) + + # fff = await memory._fetch_episodic_memory(observation = "healthy diet") + # print(len(fff["data"]["Get"]["EPISODICMEMORY"])) + if __name__ == "__main__": import asyncio + asyncio.run(main()) # bb = agent._update_semantic_memory(semantic_memory="Users core summary") diff --git a/level_2/poetry.lock b/level_2/poetry.lock index 7b95c721a..8233ad038 100644 --- a/level_2/poetry.lock +++ b/level_2/poetry.lock @@ -261,17 +261,17 @@ numpy = {version = ">=1.19.0", markers = "python_version >= \"3.9\""} [[package]] name = "boto3" -version = "1.28.30" +version = "1.28.32" description = "The AWS SDK for Python" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.28.30-py3-none-any.whl", hash = "sha256:e095ede98d3680e65966ab71f273b7d86938f5d853773ef96f4cb646277c2a4b"}, - {file = "boto3-1.28.30.tar.gz", hash = "sha256:2b509a959966a572f15db5768a18066ce1f53022ac53fca9421c620219fa3998"}, + {file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"}, + {file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"}, ] [package.dependencies] -botocore = ">=1.31.30,<1.32.0" +botocore = ">=1.31.32,<1.32.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -280,13 +280,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.31.30" +version = "1.31.32" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.31.30-py3-none-any.whl", hash = "sha256:269f20dcadd8dfd0c26d0e6fbceb84814ff6638ff3aafcc5324b9fb9949a7051"}, - {file = "botocore-1.31.30.tar.gz", hash = "sha256:3cf6a9d7621b897c9ff23cd02113826141b3dd3d7e90273b661efc4dc05f84e2"}, + {file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"}, + {file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"}, ] [package.dependencies] @@ -1053,13 +1053,13 @@ requests = ">=2.20.0,<3.0" [[package]] name = "gptcache" -version = "0.1.39.1" +version = "0.1.40" description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications." optional = false python-versions = ">=3.8.1" files = [ - {file = "gptcache-0.1.39.1-py3-none-any.whl", hash = "sha256:81355f7878e12a820dccb017f8a45ea44b73178dac07108c56db664a476a4a07"}, - {file = "gptcache-0.1.39.1.tar.gz", hash = "sha256:a9c629fdeaa94b78a6cfe707a5f9a3a52b361655a3f01327709ca00c78a500eb"}, + {file = "gptcache-0.1.40-py3-none-any.whl", hash = "sha256:ba323e5e46b100fa7663b5f4d164cc2aee60f343184ed03ec2d2bb95e9f47c50"}, + {file = "gptcache-0.1.40.tar.gz", hash = "sha256:5fe4bcf3a45946177cb845b3e1ec01159f10622600e1384b9de0c7c6065d10d5"}, ] [package.dependencies] @@ -1362,39 +1362,38 @@ files = [ [[package]] name = "langchain" -version = "0.0.250" +version = "0.0.271" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchain-0.0.250-py3-none-any.whl", hash = "sha256:65b3520f507e848edd88a35a70700971bbbf822fda65f621ccf44a3bb36ad03a"}, - {file = "langchain-0.0.250.tar.gz", hash = "sha256:1b5775d6a472f633bb06e794f58cb6ff5d1eeb2da603b64a6a15013f8f61ee3f"}, + {file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"}, + {file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">=0.5.7,<0.6.0" -langsmith = ">=0.0.11,<0.1.0" +langsmith = ">=0.0.21,<0.1.0" numexpr = ">=2.8.4,<3.0.0" numpy = ">=1,<2" -openapi-schema-pydantic = ">=1.2,<2.0" -pydantic = ">=1,<2" -PyYAML = ">=5.4.1" +pydantic = ">=1,<3" +PyYAML = ">=5.3" requests = ">=2,<3" SQLAlchemy = ">=1.4,<3" tenacity = ">=8.1.0,<9.0.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)", "xinference (>=0.0.6,<0.0.7)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"] clarifai = ["clarifai (>=9.1.0)"] cohere = ["cohere (>=4,<5)"] docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xinference (>=0.0.6,<0.0.7)", "zep-python (>=0.32)"] +extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] javascript = ["esprima (>=4.0.1,<5.0.0)"] -llms = ["anthropic (>=0.3,<0.4)", "clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)", "xinference (>=0.0.6,<0.0.7)"] +llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"] qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] text-helpers = ["chardet (>=5.1.0,<6.0.0)"] @@ -1415,13 +1414,13 @@ data = ["language-data (>=1.1,<2.0)"] [[package]] name = "langsmith" -version = "0.0.25" +version = "0.0.26" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.25-py3-none-any.whl", hash = "sha256:d595435ad21fa6077550d7c85472935d1e8241afa042c1e29287d2c95c3ed151"}, - {file = "langsmith-0.0.25.tar.gz", hash = "sha256:e728c398fc1adaa0ed8abeb21f6a92d7fb19fe3ab49d3911c22b03dfe25935d6"}, + {file = "langsmith-0.0.26-py3-none-any.whl", hash = "sha256:61c1d4582104d96edde04e1eea1dae347645b691c44489a5871341a2a1a2a1eb"}, + {file = "langsmith-0.0.26.tar.gz", hash = "sha256:80a4ef1b663a24a460d25b9986ab2010c5d06b6061c65be473abafc0647d191a"}, ] [package.dependencies] @@ -1875,20 +1874,6 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] -[[package]] -name = "openapi-schema-pydantic" -version = "1.2.4" -description = "OpenAPI (v3) specification schema as pydantic class" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"}, - {file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"}, -] - -[package.dependencies] -pydantic = ">=1.8.2" - [[package]] name = "orjson" version = "3.9.5" @@ -3561,13 +3546,13 @@ colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python [[package]] name = "weaviate-client" -version = "3.22.1" +version = "3.23.0" description = "A python native Weaviate client" optional = false python-versions = ">=3.8" files = [ - {file = "weaviate-client-3.22.1.tar.gz", hash = "sha256:aff61bd3f5d74df20a62328443e3aa9c860d5330fdfb19c4d8ddc44cb604032f"}, - {file = "weaviate_client-3.22.1-py3-none-any.whl", hash = "sha256:01843a4899a227300e570409e77628e9d1b28476313f94943c37aee3f75112e1"}, + {file = "weaviate-client-3.23.0.tar.gz", hash = "sha256:3ffd7f1460c9e32755d84d4f5fc63dfc0bd990dbe2c3dc20d5c68119d467680e"}, + {file = "weaviate_client-3.23.0-py3-none-any.whl", hash = "sha256:3d3bb75c1d96b2b71e213c5eb885ae3e3f42e4304955383c467d100187d9ff8e"}, ] [package.dependencies] @@ -3581,13 +3566,13 @@ grpc = ["grpcio", "grpcio-tools"] [[package]] name = "wheel" -version = "0.41.1" +version = "0.41.2" description = "A built-package format for Python" optional = false python-versions = ">=3.7" files = [ - {file = "wheel-0.41.1-py3-none-any.whl", hash = "sha256:473219bd4cbedc62cea0cb309089b593e47c15c4a2531015f94e4e3b9a0f6981"}, - {file = "wheel-0.41.1.tar.gz", hash = "sha256:12b911f083e876e10c595779709f8a88a59f45aacc646492a67fe9ef796c1b47"}, + {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}, + {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"}, ] [package.extras] @@ -3795,4 +3780,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "13258c777467d93ab73021225322da670e42513cedec6252a40aacf74822ea68" +content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991" diff --git a/level_2/pyproject.toml b/level_2/pyproject.toml index c8b2c95ee..5a252ba37 100644 --- a/level_2/pyproject.toml +++ b/level_2/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" #langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"} -langchain = "v0.0.250" +langchain = "v0.0.271" nltk = "3.8.1" openai = "0.27.8" @@ -39,6 +39,7 @@ dlt = { version ="^0.3.8", extras = ["duckdb"]} weaviate-client = "^3.22.1" python-multipart = "^0.0.6" deep-translator = "^1.11.4" +humanize = "^4.8.0" diff --git a/level_2/tests/crud_test.py b/level_2/tests/crud_test.py new file mode 100644 index 000000000..403273481 --- /dev/null +++ b/level_2/tests/crud_test.py @@ -0,0 +1,76 @@ +import unittest +import asyncio + +import sys +sys.path.append("..") # Adds higher directory to python modules path. + +from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory +class TestMemory(unittest.TestCase): + + def setUp(self): + self.loop = asyncio.get_event_loop() + self.memory = Memory(user_id="123") + self.loop.run_until_complete(self.memory.async_init()) + + def test_add_fetch_delete_semantic_memory(self): + async def semantic_workflow(): + params = {"sample_param": "value"} + sample_memory = "sample semantic memory" + + # Add + await self.memory._add_semantic_memory(sample_memory, params=params) + # Fetch + fetched = await self.memory._fetch_semantic_memory(sample_memory, params) + fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text'] + self.assertIn(sample_memory, fetched_text) # Replace this with the appropriate validation + # Delete + await self.memory._delete_semantic_memory() + # Verify Deletion + after_delete = await self.memory._fetch_semantic_memory(sample_memory, params) + self.assertNotIn(sample_memory, after_delete) # Replace with the appropriate validation + + self.loop.run_until_complete(semantic_workflow()) + + def test_add_fetch_delete_episodic_memory(self): + async def episodic_workflow(): + params = {"sample_param": "value"} + sample_memory = """{ + "sample_key": "sample_value" + }""" + + # Add + await self.memory._add_episodic_memory(observation=sample_memory, params=params) + # Fetch + fetched = await self.memory._fetch_episodic_memory(sample_memory) + fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text'] + self.assertIn(sample_memory, fetched_text) # Replace this with the appropriate validation + # Delete + await self.memory._delete_episodic_memory() + # Verify Deletion + after_delete = await self.memory._fetch_episodic_memory(sample_memory) + self.assertNotIn(sample_memory, after_delete) # Replace with the appropriate validation + + self.loop.run_until_complete(episodic_workflow()) + + # def test_add_fetch_delete_buffer_memory(self): + # async def buffer_workflow(): + # params = {"sample_param": "value"} + # user_input = "sample buffer input" + # namespace = "sample_namespace" + # + # # Add + # await self.memory._add_buffer_memory(user_input=user_input, namespace=namespace, params=params) + # # Fetch + # fetched = await self.memory._fetch_buffer_memory(user_input, namespace) + # self.assertIn(user_input, fetched) # Replace this with the appropriate validation + # # Delete + # await self.memory._delete_buffer_memory() + # # Verify Deletion + # after_delete = await self.memory._fetch_buffer_memory(user_input, namespace) + # self.assertNotIn(user_input, after_delete) # Replace with the appropriate validation + # + # self.loop.run_until_complete(buffer_workflow()) + + +if __name__ == '__main__': + unittest.main()