added level 2 initial code
This commit is contained in:
parent
e8fc6c8952
commit
81b6cc1920
15 changed files with 5063 additions and 0 deletions
11
level_2/.env
Normal file
11
level_2/.env
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
OPENAI_API_KEY = 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
|
||||||
|
PINECONE_API_KEY = 4e0982ad-57d5-41ac-bce5-d1fd2c2da273
|
||||||
|
PINECONE_API_ENV = us-west1-gcp
|
||||||
|
REPLICATE_API_TOKEN = 4e0982ad-57d5-41ac-bce5-d1fd2c2da273
|
||||||
|
GPLACES_API_KEY = AIzaSyAfuT9tBy6wC3phZR1Tl5acknNA_TU2mKE
|
||||||
|
REDIS_HOST=redis
|
||||||
|
SERPAPI_API_KEY=17bb94b76b0d7cf3fb1c36d8376e0fc4c3ed761e862b05ef154e116d73c39da5
|
||||||
|
ZAPIER_NLA_API_KEY=sk-ak-GtXls7Y5JcPOSbWw7SZDzSvtAF
|
||||||
|
LOCAL_DEV = True
|
||||||
|
WEAVIATE_API_KEY =shCCL5EVpOKxIdZhMRH090lFqDb5aE1XgUTP
|
||||||
|
WEAVIATE_URL = https://new-test-cluster-i49dzudl.weaviate.network
|
||||||
3
level_2/.env.template
Normal file
3
level_2/.env.template
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
OPENAI_API_KEY=sk
|
||||||
|
WEAVIATE_URL =
|
||||||
|
WEAVIATE_API_KEY =
|
||||||
36
level_2/Dockerfile
Normal file
36
level_2/Dockerfile
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set build argument
|
||||||
|
ARG API_ENABLED
|
||||||
|
|
||||||
|
# Set environment variable based on the build argument
|
||||||
|
ENV API_ENABLED=${API_ENABLED} \
|
||||||
|
PIP_NO_CACHE_DIR=true
|
||||||
|
ENV PATH="${PATH}:/root/.poetry/bin"
|
||||||
|
RUN pip install poetry
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY pyproject.toml poetry.lock /app/
|
||||||
|
|
||||||
|
# Install the dependencies
|
||||||
|
RUN poetry config virtualenvs.create false && \
|
||||||
|
poetry install --no-root --no-dev
|
||||||
|
|
||||||
|
RUN apt-get update -q && \
|
||||||
|
apt-get install curl zip jq netcat-traditional -y -q
|
||||||
|
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||||
|
unzip -qq awscliv2.zip && ./aws/install && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
|
||||||
|
|
||||||
|
#RUN playwright install
|
||||||
|
#RUN playwright install-deps
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . /app
|
||||||
|
COPY entrypoint.sh /app/entrypoint.sh
|
||||||
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||||
44
level_2/Readme.md
Normal file
44
level_2/Readme.md
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
## PromethAI Memory Manager
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Description
|
||||||
|
|
||||||
|
|
||||||
|
Initial code lets you do three operations:
|
||||||
|
|
||||||
|
1. Add to memory
|
||||||
|
2. Retrieve from memory
|
||||||
|
3. Structure the data to schema and load to duckdb
|
||||||
|
|
||||||
|
#How to use
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```docker compose build promethai_mem ```
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
```docker compose up promethai_mem ```
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text.
|
||||||
|
|
||||||
|
```curl
|
||||||
|
-X POST
|
||||||
|
-F "prompt=The quick brown fox"
|
||||||
|
-F "file=@/path/to/file.pdf"
|
||||||
|
http://localhost:8000/upload/
|
||||||
|
```
|
||||||
|
|
||||||
|
{
|
||||||
|
"payload": {
|
||||||
|
"user_id": "681",
|
||||||
|
"session_id": "471",
|
||||||
|
"model_speed": "slow",
|
||||||
|
"prompt": "Temperature=Cold;Food Type=Ice Cream",
|
||||||
|
"pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||||
|
}
|
||||||
|
}
|
||||||
230
level_2/api.py
Normal file
230
level_2/api.py
Normal file
|
|
@ -0,0 +1,230 @@
|
||||||
|
from langchain.document_loaders import PyPDFLoader
|
||||||
|
|
||||||
|
from level_2_pdf_vectorstore__dlt_contracts import Memory
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import Dict, Any
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import uvicorn
|
||||||
|
from fastapi import Request
|
||||||
|
import yaml
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from fastapi import FastAPI, UploadFile, File
|
||||||
|
from typing import List
|
||||||
|
import requests
|
||||||
|
# Set up logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO, # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||||
|
format="%(asctime)s [%(levelname)s] %(message)s", # Set the log message format
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(debug=True)
|
||||||
|
|
||||||
|
|
||||||
|
from fastapi import Depends
|
||||||
|
|
||||||
|
|
||||||
|
class ImageResponse(BaseModel):
|
||||||
|
success: bool
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", )
|
||||||
|
async def root():
|
||||||
|
"""
|
||||||
|
Root endpoint that returns a welcome message.
|
||||||
|
"""
|
||||||
|
return {"message": "Hello, World, I am alive!"}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health_check():
|
||||||
|
"""
|
||||||
|
Health check endpoint that returns the server status.
|
||||||
|
"""
|
||||||
|
return {"status": "OK"}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/
|
||||||
|
|
||||||
|
from fastapi import FastAPI, UploadFile, File
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
|
class Payload(BaseModel):
|
||||||
|
payload: Dict[str, Any]
|
||||||
|
|
||||||
|
@app.post("/upload/", response_model=dict)
|
||||||
|
async def upload_pdf_and_payload(
|
||||||
|
payload: Payload,
|
||||||
|
# files: List[UploadFile] = File(...),
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
# Process the payload
|
||||||
|
decoded_payload = payload.payload
|
||||||
|
# except:
|
||||||
|
# pass
|
||||||
|
#
|
||||||
|
# return JSONResponse(content={"response": decoded_payload}, status_code=200)
|
||||||
|
|
||||||
|
# Download the remote PDF if URL is provided
|
||||||
|
if 'pdf_url' in decoded_payload:
|
||||||
|
pdf_response = requests.get(decoded_payload['pdf_url'])
|
||||||
|
pdf_content = pdf_response.content
|
||||||
|
|
||||||
|
logging.info("Downloaded PDF from URL")
|
||||||
|
|
||||||
|
# Create an in-memory file-like object for the PDF content
|
||||||
|
pdf_stream = BytesIO(pdf_content)
|
||||||
|
|
||||||
|
contents = pdf_stream.read()
|
||||||
|
|
||||||
|
tmp_location = os.path.join('/tmp', "tmp.pdf")
|
||||||
|
with open(tmp_location, 'wb') as tmp_file:
|
||||||
|
tmp_file.write(contents)
|
||||||
|
|
||||||
|
logging.info("Wrote PDF from URL")
|
||||||
|
|
||||||
|
# Process the PDF using PyPDFLoader
|
||||||
|
loader = PyPDFLoader(tmp_location)
|
||||||
|
pages = loader.load_and_split()
|
||||||
|
logging.info(" PDF split into pages")
|
||||||
|
Memory_ = Memory(index_name="my-agent", user_id='555' )
|
||||||
|
await Memory_.async_init()
|
||||||
|
Memory_._run_buffer(user_input="I want to get a schema for my data", content =pages)
|
||||||
|
|
||||||
|
|
||||||
|
# Run the buffer
|
||||||
|
response = Memory_._run_buffer(user_input="I want to get a schema for my data")
|
||||||
|
return JSONResponse(content={"response": response}, status_code=200)
|
||||||
|
|
||||||
|
#to do: add the user id to the payload
|
||||||
|
#to do add the raw pdf to payload
|
||||||
|
# bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
|
||||||
|
# print(bb)
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
|
||||||
|
return {"error": str(e)}
|
||||||
|
# Here you can perform your processing on the PDF contents
|
||||||
|
# results.append({"filename": file.filename, "size": len(contents)})
|
||||||
|
|
||||||
|
# Append the in-memory file to the files list
|
||||||
|
# files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
|
||||||
|
#
|
||||||
|
# # Process each uploaded PDF file
|
||||||
|
# results = []
|
||||||
|
# for file in files:
|
||||||
|
# contents = await file.read()
|
||||||
|
# tmp_location = os.path.join('/tmp', "tmp.pdf")
|
||||||
|
# with open(tmp_location, 'wb') as tmp_file:
|
||||||
|
# tmp_file.write(contents)
|
||||||
|
# loader = PyPDFLoader(tmp_location)
|
||||||
|
# pages = loader.load_and_split()
|
||||||
|
#
|
||||||
|
# stm = ShortTermMemory(user_id=decoded_payload['user_id'])
|
||||||
|
# stm.episodic_buffer.main_buffer(prompt=decoded_payload['prompt'], pages=pages)
|
||||||
|
# # Here you can perform your processing on the PDF contents
|
||||||
|
# results.append({"filename": file.filename, "size": len(contents)})
|
||||||
|
#
|
||||||
|
# return {"message": "Upload successful", "results": results}
|
||||||
|
#
|
||||||
|
# except Exception as e:
|
||||||
|
# return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# @app.post("/clear-cache", response_model=dict)
|
||||||
|
# async def clear_cache(request_data: Payload) -> dict:
|
||||||
|
# """
|
||||||
|
# Endpoint to clear the cache.
|
||||||
|
#
|
||||||
|
# Parameters:
|
||||||
|
# request_data (Payload): The request data containing the user and session IDs.
|
||||||
|
#
|
||||||
|
# Returns:
|
||||||
|
# dict: A dictionary with a message indicating the cache was cleared.
|
||||||
|
# """
|
||||||
|
# json_payload = request_data.payload
|
||||||
|
# agent = Agent()
|
||||||
|
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
|
||||||
|
# try:
|
||||||
|
# agent.clear_cache()
|
||||||
|
# return JSONResponse(content={"response": "Cache cleared"}, status_code=200)
|
||||||
|
# except Exception as e:
|
||||||
|
# raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
#
|
||||||
|
# @app.post("/correct-prompt-grammar", response_model=dict)
|
||||||
|
# async def prompt_to_correct_grammar(request_data: Payload) -> dict:
|
||||||
|
# json_payload = request_data.payload
|
||||||
|
# agent = Agent()
|
||||||
|
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
|
||||||
|
# logging.info("Correcting grammar %s", json_payload["prompt_source"])
|
||||||
|
#
|
||||||
|
# output = agent.prompt_correction(json_payload["prompt_source"], model_speed= json_payload["model_speed"])
|
||||||
|
# return JSONResponse(content={"response": {"result": json.loads(output)}})
|
||||||
|
|
||||||
|
|
||||||
|
# @app.post("/action-add-zapier-calendar-action", response_model=dict,dependencies=[Depends(auth)])
|
||||||
|
# async def action_add_zapier_calendar_action(
|
||||||
|
# request: Request, request_data: Payload
|
||||||
|
# ) -> dict:
|
||||||
|
# json_payload = request_data.payload
|
||||||
|
# agent = Agent()
|
||||||
|
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
|
||||||
|
# # Extract the bearer token from the header
|
||||||
|
# auth_header = request.headers.get("Authorization")
|
||||||
|
# if auth_header:
|
||||||
|
# bearer_token = auth_header.replace("Bearer ", "")
|
||||||
|
# else:
|
||||||
|
# bearer_token = None
|
||||||
|
# outcome = agent.add_zapier_calendar_action(
|
||||||
|
# prompt_base=json_payload["prompt_base"],
|
||||||
|
# token=bearer_token,
|
||||||
|
# model_speed=json_payload["model_speed"],
|
||||||
|
# )
|
||||||
|
# return JSONResponse(content={"response": outcome})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
||||||
|
"""
|
||||||
|
Start the API server using uvicorn.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
host (str): The host for the server.
|
||||||
|
port (int): The port for the server.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting server at {host}:{port}")
|
||||||
|
uvicorn.run(app, host=host, port=port)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Failed to start server: {e}")
|
||||||
|
# Here you could add any cleanup code or error recovery code.
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
start_api_server()
|
||||||
22
level_2/docker-compose.yml
Normal file
22
level_2/docker-compose.yml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
services:
|
||||||
|
promethai_mem:
|
||||||
|
networks:
|
||||||
|
- promethai_mem_backend
|
||||||
|
build:
|
||||||
|
context: ./
|
||||||
|
volumes:
|
||||||
|
- "./:/app"
|
||||||
|
environment:
|
||||||
|
- HOST=0.0.0.0
|
||||||
|
profiles: ["exclude-from-up"] # Use `docker-compose run teenage-agi` to get an attached container
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
||||||
|
- 443:443
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
networks:
|
||||||
|
promethai_mem_backend:
|
||||||
|
name: promethai_mem_backend
|
||||||
6
level_2/entrypoint.sh
Executable file
6
level_2/entrypoint.sh
Executable file
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
export ENVIRONMENT
|
||||||
|
#python fetch_secret.py
|
||||||
|
|
||||||
|
# Start Gunicorn
|
||||||
|
gunicorn -w 2 -k uvicorn.workers.UvicornWorker -t 120 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
|
||||||
711
level_2/level_2_pdf_vectorstore__dlt_contracts.py
Normal file
711
level_2/level_2_pdf_vectorstore__dlt_contracts.py
Normal file
|
|
@ -0,0 +1,711 @@
|
||||||
|
#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
|
||||||
|
|
||||||
|
import dlt
|
||||||
|
from langchain import PromptTemplate, LLMChain
|
||||||
|
from langchain.agents import initialize_agent, AgentType
|
||||||
|
from langchain.chains.openai_functions import create_structured_output_chain
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.document_loaders import PyPDFLoader
|
||||||
|
import weaviate
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
from deep_translator import (GoogleTranslator)
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.schema import LLMResult, HumanMessage
|
||||||
|
from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
|
||||||
|
|
||||||
|
from langchain.memory import VectorStoreRetrieverMemory
|
||||||
|
from marvin import ai_classifier
|
||||||
|
from enum import Enum
|
||||||
|
import marvin
|
||||||
|
import asyncio
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
||||||
|
from langchain.retrievers import WeaviateHybridSearchRetriever
|
||||||
|
from langchain.schema import Document, SystemMessage, HumanMessage, LLMResult
|
||||||
|
from langchain.tools import tool
|
||||||
|
from langchain.vectorstores import Weaviate
|
||||||
|
import uuid
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
from pathlib import Path
|
||||||
|
from langchain import OpenAI, LLMMathChain
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts import ChatPromptTemplate
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from jinja2 import Template
|
||||||
|
from langchain import PromptTemplate, LLMChain
|
||||||
|
from langchain.chains.openai_functions import create_structured_output_chain
|
||||||
|
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
import pinecone
|
||||||
|
from langchain.vectorstores import Pinecone
|
||||||
|
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from langchain.schema import Document, SystemMessage, HumanMessage
|
||||||
|
from langchain.vectorstores import Weaviate
|
||||||
|
import weaviate
|
||||||
|
import uuid
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
class MyCustomSyncHandler(BaseCallbackHandler):
|
||||||
|
def on_llm_new_token(self, token: str, **kwargs) -> None:
|
||||||
|
print(f"Sync handler being called in a `thread_pool_executor`: token: {token}")
|
||||||
|
class MyCustomAsyncHandler(AsyncCallbackHandler):
|
||||||
|
"""Async callback handler that can be used to handle callbacks from langchain."""
|
||||||
|
|
||||||
|
async def on_llm_start(
|
||||||
|
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||||
|
) -> None:
|
||||||
|
"""Run when chain starts running."""
|
||||||
|
print("zzzz....")
|
||||||
|
await asyncio.sleep(0.3)
|
||||||
|
class_name = serialized["name"]
|
||||||
|
print("Hi! I just woke up. Your llm is starting")
|
||||||
|
|
||||||
|
async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||||
|
"""Run when chain ends running."""
|
||||||
|
print("zzzz....")
|
||||||
|
await asyncio.sleep(0.3)
|
||||||
|
print("Hi! I just woke up. Your llm is ending")
|
||||||
|
|
||||||
|
class VectorDB:
|
||||||
|
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
|
||||||
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
|
def __init__(self, user_id: str, index_name: str, memory_id:str, ltm_memory_id:str='00000', st_memory_id:str='0000', buffer_id:str='0000', db_type: str = "pinecone", namespace:str = None):
|
||||||
|
self.user_id = user_id
|
||||||
|
self.index_name = index_name
|
||||||
|
self.db_type = db_type
|
||||||
|
self.namespace=namespace
|
||||||
|
self.memory_id = memory_id
|
||||||
|
self.ltm_memory_id = ltm_memory_id
|
||||||
|
self.st_memory_id = st_memory_id
|
||||||
|
self.buffer_id = buffer_id
|
||||||
|
# if self.db_type == "pinecone":
|
||||||
|
# self.vectorstore = self.init_pinecone(self.index_name)
|
||||||
|
if self.db_type == "weaviate":
|
||||||
|
self.init_weaviate(namespace=self.namespace)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported database type: {db_type}")
|
||||||
|
if self.db_type == "weaviate":
|
||||||
|
self.init_weaviate_client(namespace=self.namespace)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported VectorDB client type: {db_type}")
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
def init_pinecone(self, index_name):
|
||||||
|
load_dotenv()
|
||||||
|
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
|
||||||
|
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
|
||||||
|
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
|
||||||
|
pinecone.Index(index_name)
|
||||||
|
vectorstore: Pinecone = Pinecone.from_existing_index(
|
||||||
|
|
||||||
|
index_name=self.index_name,
|
||||||
|
embedding=OpenAIEmbeddings(),
|
||||||
|
namespace='RESULT'
|
||||||
|
)
|
||||||
|
return vectorstore
|
||||||
|
|
||||||
|
def init_weaviate_client(self, namespace:str):
|
||||||
|
embeddings = OpenAIEmbeddings()
|
||||||
|
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
|
||||||
|
client = weaviate.Client(
|
||||||
|
url=os.environ.get('WEAVIATE_URL'),
|
||||||
|
auth_client_secret=auth_config,
|
||||||
|
|
||||||
|
additional_headers={
|
||||||
|
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return client
|
||||||
|
|
||||||
|
def init_weaviate(self, namespace:str):
|
||||||
|
embeddings = OpenAIEmbeddings()
|
||||||
|
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
|
||||||
|
client = weaviate.Client(
|
||||||
|
url=os.environ.get('WEAVIATE_URL'),
|
||||||
|
auth_client_secret=auth_config,
|
||||||
|
|
||||||
|
additional_headers={
|
||||||
|
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
|
||||||
|
}
|
||||||
|
)
|
||||||
|
retriever = WeaviateHybridSearchRetriever(
|
||||||
|
client=client,
|
||||||
|
index_name=namespace,
|
||||||
|
text_key="text",
|
||||||
|
attributes=[],
|
||||||
|
embedding=embeddings,
|
||||||
|
create_schema_if_missing=True,
|
||||||
|
)
|
||||||
|
return retriever
|
||||||
|
|
||||||
|
async def add_memories(self, observation: str, page: str = "", source: str = ""):
|
||||||
|
if self.db_type == "pinecone":
|
||||||
|
# Update Pinecone memories here
|
||||||
|
vectorstore: Pinecone = Pinecone.from_existing_index(
|
||||||
|
index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
retriever = vectorstore.as_retriever()
|
||||||
|
retriever.add_documents(
|
||||||
|
[
|
||||||
|
Document(
|
||||||
|
page_content=observation,
|
||||||
|
metadata={
|
||||||
|
"inserted_at": datetime.now(),
|
||||||
|
"text": observation,
|
||||||
|
"user_id": self.user_id,
|
||||||
|
"page": page,
|
||||||
|
"source": source
|
||||||
|
},
|
||||||
|
namespace=self.namespace,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
elif self.db_type == "weaviate":
|
||||||
|
# Update Weaviate memories here
|
||||||
|
print(self.namespace)
|
||||||
|
retriever = self.init_weaviate( self.namespace)
|
||||||
|
|
||||||
|
|
||||||
|
return retriever.add_documents([
|
||||||
|
Document(
|
||||||
|
metadata={
|
||||||
|
"inserted_at": str(datetime.now()),
|
||||||
|
"text": observation,
|
||||||
|
"user_id": str(self.user_id),
|
||||||
|
"memory_id": str(self.memory_id),
|
||||||
|
"ltm_memory_id": str(self.ltm_memory_id),
|
||||||
|
"st_memory_id": str(self.st_memory_id),
|
||||||
|
"buffer_id": str(self.buffer_id),
|
||||||
|
"last_accessed_at": str(datetime.now()),
|
||||||
|
|
||||||
|
# **source_metadata,
|
||||||
|
},
|
||||||
|
page_content=observation,
|
||||||
|
)]
|
||||||
|
)
|
||||||
|
# def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
|
||||||
|
# return Pinecone.from_existing_index(
|
||||||
|
# index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
|
||||||
|
# )
|
||||||
|
|
||||||
|
async def fetch_memories(self, observation: str, params = None):
|
||||||
|
if self.db_type == "pinecone":
|
||||||
|
# Fetch Pinecone memories here
|
||||||
|
pass
|
||||||
|
elif self.db_type == "weaviate":
|
||||||
|
# Fetch Weaviate memories here
|
||||||
|
"""
|
||||||
|
Get documents from weaviate.
|
||||||
|
|
||||||
|
Args a json containing:
|
||||||
|
query (str): The query string.
|
||||||
|
path (list): The path for filtering, e.g., ['year'].
|
||||||
|
operator (str): The operator for filtering, e.g., 'Equal'.
|
||||||
|
valueText (str): The value for filtering, e.g., '2017*'.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
|
||||||
|
"""
|
||||||
|
retriever = self.init_weaviate(self.namespace)
|
||||||
|
|
||||||
|
print(self.namespace)
|
||||||
|
print(str(datetime.now()))
|
||||||
|
print(observation)
|
||||||
|
|
||||||
|
# Retrieve documents with filters applied
|
||||||
|
output = retriever.get_relevant_documents(
|
||||||
|
observation
|
||||||
|
# ,
|
||||||
|
# score=True
|
||||||
|
# ,
|
||||||
|
# where_filter=params
|
||||||
|
)
|
||||||
|
print(output)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
# def
|
||||||
|
|
||||||
|
def delete_memories(self, params: None):
|
||||||
|
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
|
||||||
|
client = weaviate.Client(
|
||||||
|
url=os.environ.get('WEAVIATE_API_KEY'),
|
||||||
|
auth_client_secret=auth_config,
|
||||||
|
|
||||||
|
additional_headers={
|
||||||
|
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
|
||||||
|
}
|
||||||
|
)
|
||||||
|
client.batch.delete_objects(
|
||||||
|
class_name=self.namespace,
|
||||||
|
# Same `where` filter as in the GraphQL API
|
||||||
|
where=params,
|
||||||
|
)
|
||||||
|
|
||||||
|
def update_memories(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SemanticMemory:
|
||||||
|
def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="SEMANTICMEMORY"):
|
||||||
|
# Add any semantic memory-related attributes or setup here
|
||||||
|
self.user_id=user_id
|
||||||
|
self.index_name = index_name
|
||||||
|
self.namespace = namespace
|
||||||
|
self.semantic_memory_id = str(uuid.uuid4())
|
||||||
|
self.memory_id = memory_id
|
||||||
|
self.ltm_memory_id = ltm_memory_id
|
||||||
|
self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
|
||||||
|
self.db_type = db_type
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _update_memories(self ,memory_id:str="None", semantic_memory: str="None") -> None:
|
||||||
|
"""Update semantic memory for the user"""
|
||||||
|
|
||||||
|
if self.db_type == "weaviate":
|
||||||
|
self.vector_db.add_memories( observation = semantic_memory)
|
||||||
|
|
||||||
|
elif self.db_type == "pinecone":
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_memories(self, observation: str,params) -> dict[str, str] | str:
|
||||||
|
"""Fetch related characteristics, preferences or dislikes for a user."""
|
||||||
|
# self.init_pinecone(index_name=self.index)
|
||||||
|
|
||||||
|
if self.db_type == "weaviate":
|
||||||
|
|
||||||
|
return self.vector_db.fetch_memories(observation, params)
|
||||||
|
|
||||||
|
elif self.db_type == "pinecone":
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LongTermMemory:
|
||||||
|
def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
|
||||||
|
self.user_id = user_id
|
||||||
|
self.memory_id = memory_id
|
||||||
|
self.ltm_memory_id = str(uuid.uuid4())
|
||||||
|
self.index_name = index_name
|
||||||
|
self.namespace = namespace
|
||||||
|
self.db_type = db_type
|
||||||
|
# self.episodic_memory = EpisodicMemory()
|
||||||
|
self.semantic_memory = SemanticMemory(user_id = self.user_id, memory_id=self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=self.index_name, db_type=self.db_type)
|
||||||
|
|
||||||
|
class ShortTermMemory:
|
||||||
|
def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
|
||||||
|
# Add any short-term memory-related attributes or setup here
|
||||||
|
self.user_id = user_id
|
||||||
|
self.memory_id = memory_id
|
||||||
|
self.namespace = namespace
|
||||||
|
self.db_type = db_type
|
||||||
|
self.stm_memory_id = str(uuid.uuid4())
|
||||||
|
self.index_name = index_name
|
||||||
|
self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class EpisodicBuffer:
|
||||||
|
def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str='EPISODICBUFFER', db_type:str="weaviate"):
|
||||||
|
# Add any short-term memory-related attributes or setup here
|
||||||
|
self.user_id = user_id
|
||||||
|
self.memory_id = memory_id
|
||||||
|
self.namespace = namespace
|
||||||
|
self.db_type = db_type
|
||||||
|
self.st_memory_id = "blah"
|
||||||
|
self.index_name = index_name
|
||||||
|
self.llm= ChatOpenAI(
|
||||||
|
temperature=0.0,
|
||||||
|
max_tokens=1200,
|
||||||
|
openai_api_key=os.environ.get('OPENAI_API_KEY'),
|
||||||
|
model_name="gpt-4-0613",
|
||||||
|
callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_weights(self, context: str):
|
||||||
|
"""Computes the weights for the buffer"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _temporal_weighting(self, context: str):
|
||||||
|
"""Computes the temporal weighting for the buffer"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
# async def infer_schema_from_text(self, text: str):
|
||||||
|
# """Infer schema from text"""
|
||||||
|
#
|
||||||
|
# prompt_ = """ You are a json schema master. Create a JSON schema based on the following data and don't write anything else: {prompt} """
|
||||||
|
#
|
||||||
|
# complete_query = PromptTemplate(
|
||||||
|
# input_variables=["prompt"],
|
||||||
|
# template=prompt_,
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# chain = LLMChain(
|
||||||
|
# llm=self.llm, prompt=complete_query, verbose=True
|
||||||
|
# )
|
||||||
|
# chain_result = chain.run(prompt=text).strip()
|
||||||
|
#
|
||||||
|
# json_data = json.dumps(chain_result)
|
||||||
|
# return json_data
|
||||||
|
|
||||||
|
async def _fetch_memories(self, observation: str,namespace:str) -> str:
|
||||||
|
vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
|
||||||
|
index_name=self.index_name, db_type=self.db_type, namespace=namespace)
|
||||||
|
|
||||||
|
|
||||||
|
query = await vector_db.fetch_memories(observation=observation)
|
||||||
|
return query
|
||||||
|
|
||||||
|
async def _add_memories(self, observation: str,namespace:str):
|
||||||
|
vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
|
||||||
|
index_name=self.index_name, db_type=self.db_type, namespace=namespace)
|
||||||
|
|
||||||
|
|
||||||
|
query = await vector_db.add_memories(observation)
|
||||||
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def encoding(self, document: str, namespace: str = "EPISODICBUFFER") -> None:
|
||||||
|
"""Encoding for the buffer, stores raw data in the buffer
|
||||||
|
Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer"""
|
||||||
|
vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
|
||||||
|
index_name=self.index_name, db_type=self.db_type, namespace=namespace)
|
||||||
|
|
||||||
|
query = vector_db.add_memories(document)
|
||||||
|
return query
|
||||||
|
|
||||||
|
async def main_buffer(self, user_input=None, content=None):
|
||||||
|
"""AI buffer to convert unstructured data to structured data"""
|
||||||
|
# Here we define the user prompt and the structure of the output we desire
|
||||||
|
# prompt = output[0].page_content
|
||||||
|
|
||||||
|
|
||||||
|
if content is not None:
|
||||||
|
|
||||||
|
#We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
|
||||||
|
self.encoding(content)
|
||||||
|
|
||||||
|
prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
|
||||||
|
chain_filter = prompt_filter | self.llm
|
||||||
|
output = await chain_filter.ainvoke({"query": user_input})
|
||||||
|
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
if content is None:
|
||||||
|
# Sensory and Linguistic Processing
|
||||||
|
prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
|
||||||
|
chain_filter = prompt_filter | self.llm
|
||||||
|
output = await chain_filter.ainvoke({"query": user_input})
|
||||||
|
translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
|
||||||
|
|
||||||
|
|
||||||
|
def top_down_processing():
|
||||||
|
"""Top-down processing"""
|
||||||
|
|
||||||
|
|
||||||
|
def bottom_up_processing():
|
||||||
|
"""Bottom-up processing"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def interactive_processing():
|
||||||
|
"""interactive processing"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
working_memory_activation = "bla"
|
||||||
|
|
||||||
|
|
||||||
|
prompt_chunk = ChatPromptTemplate.from_template("Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
|
||||||
|
chain_chunk = prompt_chunk | self.llm
|
||||||
|
output_chunks = await chain_chunk.ainvoke({"query": output.content})
|
||||||
|
|
||||||
|
|
||||||
|
print(output_chunks.content)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)
|
||||||
|
# retriever = WeaviateHybridSearchRetriever(
|
||||||
|
# client=client,
|
||||||
|
# index_name="EVENTBUFFER",
|
||||||
|
# text_key="text",
|
||||||
|
# attributes=[],
|
||||||
|
# embedding=embeddings,
|
||||||
|
# create_schema_if_missing=True,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
|
||||||
|
# index_name=self.index_name, db_type=self.db_type, namespace="EVENTBUFFER")
|
||||||
|
|
||||||
|
# query = vector_db.
|
||||||
|
|
||||||
|
|
||||||
|
# retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))
|
||||||
|
# memory = VectorStoreRetrieverMemory(retriever=retriever)
|
||||||
|
# class PromptWrapper(BaseModel):
|
||||||
|
# observation: str = Field(
|
||||||
|
# description="observation we want to fetch from vectordb"
|
||||||
|
# )
|
||||||
|
# # ,
|
||||||
|
# # json_schema: str = Field(description="json schema we want to infer")
|
||||||
|
# @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
|
||||||
|
# def convert_to_structured( observation=None, json_schema=None):
|
||||||
|
# """Convert unstructured data to structured data"""
|
||||||
|
# BASE_DIR = os.getcwd()
|
||||||
|
# json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
|
||||||
|
#
|
||||||
|
# def load_json_or_infer_schema(file_path, document_path):
|
||||||
|
# """Load JSON schema from file or infer schema from text"""
|
||||||
|
#
|
||||||
|
# # Attempt to load the JSON file
|
||||||
|
# with open(file_path, 'r') as file:
|
||||||
|
# json_schema = json.load(file)
|
||||||
|
# return json_schema
|
||||||
|
#
|
||||||
|
# json_schema =load_json_or_infer_schema(json_path, None)
|
||||||
|
# def run_open_ai_mapper(observation=None, json_schema=None):
|
||||||
|
# """Convert unstructured data to structured data"""
|
||||||
|
#
|
||||||
|
# prompt_msgs = [
|
||||||
|
# SystemMessage(
|
||||||
|
# content="You are a world class algorithm converting unstructured data into structured data."
|
||||||
|
# ),
|
||||||
|
# HumanMessage(content="Convert unstructured data to structured data:"),
|
||||||
|
# HumanMessagePromptTemplate.from_template("{input}"),
|
||||||
|
# HumanMessage(content="Tips: Make sure to answer in the correct format"),
|
||||||
|
# ]
|
||||||
|
# prompt_ = ChatPromptTemplate(messages=prompt_msgs)
|
||||||
|
# chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
|
||||||
|
# output = chain_funct.run(input=observation, llm=self.llm)
|
||||||
|
# yield output
|
||||||
|
# pipeline = dlt.pipeline(pipeline_name="train_ticket", destination='duckdb', dataset_name='train_ticket_data')
|
||||||
|
# info = pipeline.run(data=run_open_ai_mapper(prompt, json_schema))
|
||||||
|
# return print(info)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# class GoalWrapper(BaseModel):
|
||||||
|
# observation: str = Field(
|
||||||
|
# description="observation we want to fetch from vectordb"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# @tool("fetch_memory_wrapper", args_schema=GoalWrapper, return_direct=True)
|
||||||
|
# def fetch_memory_wrapper(observation, args_schema=GoalWrapper):
|
||||||
|
# """Fetches data from the VectorDB and returns it as a python dictionary."""
|
||||||
|
# print("HELLO, HERE IS THE OBSERVATION: ", observation)
|
||||||
|
#
|
||||||
|
# marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
|
||||||
|
# @ai_classifier
|
||||||
|
# class MemoryRoute(Enum):
|
||||||
|
# """Represents distinct routes for different memory types."""
|
||||||
|
#
|
||||||
|
# storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
|
||||||
|
# raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
|
||||||
|
# raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
|
||||||
|
# long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
|
||||||
|
# raw_information_to_store_as_events = "EVENTBUFFER"
|
||||||
|
#
|
||||||
|
# namespace= MemoryRoute(observation)
|
||||||
|
# vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
|
||||||
|
# index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# query = vector_db.fetch_memories(observation)
|
||||||
|
#
|
||||||
|
# return query
|
||||||
|
#
|
||||||
|
# class UpdatePreferences(BaseModel):
|
||||||
|
# observation: str = Field(
|
||||||
|
# description="observation we want to fetch from vectordb"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# @tool("add_memories_wrapper", args_schema=UpdatePreferences, return_direct=True)
|
||||||
|
# def add_memories_wrapper(observation, args_schema=UpdatePreferences):
|
||||||
|
# """Updates user preferences in the VectorDB."""
|
||||||
|
# @ai_classifier
|
||||||
|
# class MemoryRoute(Enum):
|
||||||
|
# """Represents distinct routes for different memory types."""
|
||||||
|
#
|
||||||
|
# storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
|
||||||
|
# raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
|
||||||
|
# raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
|
||||||
|
# long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
|
||||||
|
# raw_information_to_store_as_events = "EVENTBUFFER"
|
||||||
|
#
|
||||||
|
# namespace= MemoryRoute(observation)
|
||||||
|
# print("HELLO, HERE IS THE OBSERVATION 2: ")
|
||||||
|
# vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
|
||||||
|
# index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
|
||||||
|
# return vector_db.add_memories(observation)
|
||||||
|
#
|
||||||
|
# agent = initialize_agent(
|
||||||
|
# llm=self.llm,
|
||||||
|
# tools=[convert_to_structured,fetch_memory_wrapper, add_memories_wrapper],
|
||||||
|
# agent=AgentType.OPENAI_FUNCTIONS,
|
||||||
|
#
|
||||||
|
# verbose=True,
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# prompt = """
|
||||||
|
# Based on all the history and information of this user, decide based on user query query: {query} which of the following tasks needs to be done:
|
||||||
|
# 1. Memory retrieval , 2. Memory update, 3. Convert data to structured If the query is not any of these, then classify it as 'Other'
|
||||||
|
# Return the result in format: 'Result_type': 'Goal', "Original_query": "Original query"
|
||||||
|
# """
|
||||||
|
#
|
||||||
|
# # template = Template(prompt)
|
||||||
|
# # output = template.render(query=user_input)
|
||||||
|
# # complete_query = output
|
||||||
|
# complete_query = PromptTemplate(
|
||||||
|
# input_variables=[ "query"], template=prompt
|
||||||
|
# )
|
||||||
|
# summary_chain = LLMChain(
|
||||||
|
# llm=self.llm, prompt=complete_query, verbose=True
|
||||||
|
# )
|
||||||
|
# from langchain.chains import SimpleSequentialChain
|
||||||
|
#
|
||||||
|
# overall_chain = SimpleSequentialChain(
|
||||||
|
# chains=[summary_chain, agent], verbose=True
|
||||||
|
# )
|
||||||
|
# output = overall_chain.run(user_input)
|
||||||
|
# return output
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#DEFINE STM
|
||||||
|
#DEFINE LTM
|
||||||
|
|
||||||
|
class Memory:
|
||||||
|
load_dotenv()
|
||||||
|
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
|
||||||
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
|
|
||||||
|
def __init__(self, user_id: str = "676", index_name: str = None, knowledge_source: str = None,
|
||||||
|
knowledge_type: str = None, db_type:str="weaviate", namespace:str=None) -> None:
|
||||||
|
self.user_id = user_id
|
||||||
|
self.index_name = index_name
|
||||||
|
self.db_type = db_type
|
||||||
|
self.knowledge_source = knowledge_source
|
||||||
|
self.knowledge_type = knowledge_type
|
||||||
|
self.memory_id = str(uuid.uuid4())
|
||||||
|
self.long_term_memory = None
|
||||||
|
self.short_term_memory = None
|
||||||
|
self.namespace = namespace
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Asynchronous factory function for creating LongTermMemory
|
||||||
|
async def async_create_long_term_memory(self,user_id, memory_id, index_name, namespace, db_type):
|
||||||
|
# Perform asynchronous initialization steps if needed
|
||||||
|
return LongTermMemory(
|
||||||
|
user_id=user_id, memory_id=memory_id, index_name=index_name,
|
||||||
|
namespace=namespace, db_type=db_type
|
||||||
|
)
|
||||||
|
async def async_init(self):
|
||||||
|
# Asynchronous initialization of LongTermMemory and ShortTermMemory
|
||||||
|
self.long_term_memory = await self.async_create_long_term_memory(
|
||||||
|
user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
|
||||||
|
namespace=self.namespace, db_type=self.db_type
|
||||||
|
)
|
||||||
|
|
||||||
|
async def async_create_short_term_memory(self, user_id, memory_id, index_name, db_type):
|
||||||
|
# Perform asynchronous initialization steps if needed
|
||||||
|
return ShortTermMemory(
|
||||||
|
user_id=user_id, memory_id=memory_id, index_name=index_name, db_type=db_type
|
||||||
|
)
|
||||||
|
|
||||||
|
async def async_init(self):
|
||||||
|
# Asynchronous initialization of LongTermMemory and ShortTermMemory
|
||||||
|
self.long_term_memory = await self.async_create_long_term_memory(
|
||||||
|
user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
|
||||||
|
namespace=self.namespace, db_type=self.db_type
|
||||||
|
)
|
||||||
|
self.short_term_memory = await self.async_create_short_term_memory(
|
||||||
|
user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
|
||||||
|
db_type=self.db_type
|
||||||
|
)
|
||||||
|
# self.short_term_memory = await ShortTermMemory.async_init(
|
||||||
|
# user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type
|
||||||
|
# )
|
||||||
|
|
||||||
|
def _update_semantic_memory(self, semantic_memory:str):
|
||||||
|
return self.long_term_memory.semantic_memory._update_memories(
|
||||||
|
memory_id=self.memory_id,
|
||||||
|
semantic_memory=semantic_memory
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
def _fetch_semantic_memory(self, observation, params):
|
||||||
|
return self.long_term_memory.semantic_memory._fetch_memories(
|
||||||
|
observation=observation, params=params
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _run_buffer(self, user_input:str):
|
||||||
|
return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_memories_buffer(self, user_input: str, namespace: str = None ):
|
||||||
|
return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace)
|
||||||
|
|
||||||
|
async def _fetch_memories_buffer(self, user_input: str, namespace: str = None ):
|
||||||
|
return await self.short_term_memory.episodic_buffer._fetch_memories(observation=user_input, namespace=namespace)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
memory = Memory(user_id="123")
|
||||||
|
await memory.async_init()
|
||||||
|
|
||||||
|
# gg = await memory._run_buffer(user_input="I want to get a my past data from 2017")
|
||||||
|
|
||||||
|
ggur = await memory._add_memories_buffer(user_input = "bla bla bla", namespace="test")
|
||||||
|
print(ggur)
|
||||||
|
fff = await memory._fetch_memories_buffer(user_input = "bla bla bla", namespace="Test")
|
||||||
|
print(fff)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
asyncio.run(main())
|
||||||
|
|
||||||
|
# bb = agent._update_semantic_memory(semantic_memory="Users core summary")
|
||||||
|
# bb = agent._fetch_semantic_memory(observation= "Users core summary", params = {
|
||||||
|
# "path": ["inserted_at"],
|
||||||
|
# "operator": "Equal",
|
||||||
|
# "valueText": "*2023*"
|
||||||
|
# })
|
||||||
|
# buffer = agent._run_buffer(user_input="I want to get a schema for my data")
|
||||||
|
# print(bb)
|
||||||
|
# rrr = {
|
||||||
|
# "path": ["year"],
|
||||||
|
# "operator": "Equal",
|
||||||
|
# "valueText": "2017*"
|
||||||
|
# }
|
||||||
|
|
||||||
BIN
level_2/personal_receipts/2017/de/public_transport/118NP8.pdf
Normal file
BIN
level_2/personal_receipts/2017/de/public_transport/118NP8.pdf
Normal file
Binary file not shown.
BIN
level_2/personal_receipts/2017/de/public_transport/3ZCCCW.pdf
Normal file
BIN
level_2/personal_receipts/2017/de/public_transport/3ZCCCW.pdf
Normal file
Binary file not shown.
BIN
level_2/personal_receipts/2017/de/public_transport/4GBEC9.pdf
Normal file
BIN
level_2/personal_receipts/2017/de/public_transport/4GBEC9.pdf
Normal file
Binary file not shown.
BIN
level_2/personal_receipts/2017/de/public_transport/96W2GF.pdf
Normal file
BIN
level_2/personal_receipts/2017/de/public_transport/96W2GF.pdf
Normal file
Binary file not shown.
3772
level_2/poetry.lock
generated
Normal file
3772
level_2/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
48
level_2/pyproject.toml
Normal file
48
level_2/pyproject.toml
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "PromethAI_memory"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "PromethAI memory manager"
|
||||||
|
authors = ["Vasilije Markovic"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.10"
|
||||||
|
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
|
||||||
|
langchain = "v0.0.250"
|
||||||
|
|
||||||
|
nltk = "3.8.1"
|
||||||
|
openai = "0.27.8"
|
||||||
|
pinecone-client = "2.2.2"
|
||||||
|
python-dotenv = "1.0.0"
|
||||||
|
pyyaml = "6.0"
|
||||||
|
fastapi = "0.98.0"
|
||||||
|
uvicorn = "0.22.0"
|
||||||
|
googlemaps = "4.10.0"
|
||||||
|
jinja2 = "3.1.2"
|
||||||
|
replicate = "^0.8.4"
|
||||||
|
pexpect = "^4.8.0"
|
||||||
|
selenium = "^4.9.0"
|
||||||
|
playwright = "^1.32.1"
|
||||||
|
pytest-playwright = "^0.3.3"
|
||||||
|
boto3 = "^1.26.125"
|
||||||
|
gptcache = "^0.1.22"
|
||||||
|
redis = "^4.5.5"
|
||||||
|
gunicorn = "^20.1.0"
|
||||||
|
tiktoken = "^0.4.0"
|
||||||
|
google-search-results = "^2.4.2"
|
||||||
|
spacy = "^3.5.3"
|
||||||
|
python-jose = "^3.3.0"
|
||||||
|
pypdf = "^3.12.0"
|
||||||
|
fastjsonschema = "^2.18.0"
|
||||||
|
marvin = "^1.3.0"
|
||||||
|
dlt = { version ="^0.3.8" , extras = ["duckdb"]}
|
||||||
|
weaviate-client = "^3.22.1"
|
||||||
|
python-multipart = "^0.0.6"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
180
level_2/schema_registry/ticket_schema.json
Normal file
180
level_2/schema_registry/ticket_schema.json
Normal file
|
|
@ -0,0 +1,180 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"ticketType": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["online ticket", "ICE ticket"]
|
||||||
|
},
|
||||||
|
"departureDate": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date"
|
||||||
|
},
|
||||||
|
"priceType": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["Flex price (single journey)"]
|
||||||
|
},
|
||||||
|
"class": {
|
||||||
|
"type": "integer",
|
||||||
|
"enum": [1]
|
||||||
|
},
|
||||||
|
"adult": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"quantity": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"BC50": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["quantity", "BC50"]
|
||||||
|
},
|
||||||
|
"journey": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"from": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"to": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"via": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"train": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["ICE"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["from", "to", "via", "train"]
|
||||||
|
},
|
||||||
|
"refundPolicy": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"payment": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"items": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"quantity": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"price": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"vat19": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"vat7": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["name", "quantity", "price", "vat19", "vat7"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"method": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["credit card"]
|
||||||
|
},
|
||||||
|
"transactionDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"amount": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"VUNumber": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"transactionNumber": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date"
|
||||||
|
},
|
||||||
|
"genNumber": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["amount", "VUNumber", "transactionNumber", "date", "genNumber"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["items", "total", "method", "transactionDetails"]
|
||||||
|
},
|
||||||
|
"bookingDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"bookingDate": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date-time"
|
||||||
|
},
|
||||||
|
"bookingAddress": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"taxNumber": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["bookingDate", "bookingAddress", "taxNumber"]
|
||||||
|
},
|
||||||
|
"journeyDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"validFrom": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date"
|
||||||
|
},
|
||||||
|
"passengerName": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"orderNumber": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stops": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"stop": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date"
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "time"
|
||||||
|
},
|
||||||
|
"track": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"product": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"reservation": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["stop", "date", "time", "track", "product", "reservation"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["validFrom", "passengerName", "orderNumber", "stops"]
|
||||||
|
},
|
||||||
|
"usageNotes": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["ticketType", "departureDate", "priceType", "class", "adult", "journey", "refundPolicy", "payment", "bookingDetails", "journeyDetails", "usageNotes"]
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue