Add endpoint for buffer

2023-08-24 16:01:51 +02:00 · 2023-08-24 16:01:51 +02:00 · deab564373
commit deab564373
parent b4baa9d1fe
2 changed files with 20 additions and 288 deletions
--- a/level_2/api.py
+++ b/level_2/api.py
@ -249,6 +249,26 @@ async def available_buffer_actions(

        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)

+@app.post("/run-buffer", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)

 #
    #     # Process each uploaded PDF file
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@ -900,294 +900,6 @@ class EpisodicBuffer:



-
-
-
-        #for files where user input is provided and they are directly proccessed
-
-
-        #
-        # based on the semantic search , raw memory data will be fetched. also, episodic memory will be fetched
-        # they will be written down as a "context" for the user
-
-
-        # i get scores for the episodic memory and the semantic memory
-        # i get only the data with the highest score
-        # i use that data to form the context
-        # file_upload
-        #
-
-
-        #for files where user input is provided and they are directly proccessed
-
-
-        #
-        # #
-        #
-        # if content is not None:
-        #
-        #     # operations -> translate, structure, load to db
-        #
-        #     list_of_operations = ["translate", "structure", "load to db"]
-        #
-        #     prompt_filter = ChatPromptTemplate.from_template(
-        #         "Filter and remove uneccessary information that is not relevant in the user query {query}")
-        #     chain_filter = prompt_filter | self.llm
-        #     output = await chain_filter.ainvoke({"query": user_input})
-        #
-        #     class Task(BaseModel):
-        #         """Schema for an individual task."""
-        #         task_order: str = Field(..., description="The order at which the task needs to be performed")
-        #         task_name: str = Field(None, description="The task that needs to be performed")
-        #         operation: str = Field(None, description="The operation to be performed")
-        #
-        #     class TaskList(BaseModel):
-        #         """Schema for the record containing a list of tasks."""
-        #         tasks: List[Task] = Field(..., description="List of tasks")
-        #
-        #     prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
-        #     # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
-        #     # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
-        #     prompt_msgs = [
-        #         SystemMessage(
-        #             content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
-        #         ),
-        #         HumanMessage(content="Decompose based on the following prompt:"),
-        #         HumanMessagePromptTemplate.from_template("{input}"),
-        #         HumanMessage(content="Tips: Make sure to answer in the correct format"),
-        #         HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
-        #
-        #     ]
-        #     prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-        #     chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
-        #     from langchain.callbacks import get_openai_callback
-        #     with get_openai_callback() as cb:
-        #         output = await chain.arun(input=prompt_filter_chunk, verbose=True)
-        #         print(cb)
-        #     # output = json.dumps(output)
-        #     my_object = parse_obj_as(TaskList, output)
-        #     print("HERE IS THE OUTPUT", my_object.json())
-        #
-        #     data = json.loads(my_object.json())
-        #
-        #     # Extract the list of tasks
-        #     tasks_list = data["tasks"]
-        #
-        #     for task in tasks_list:
-        #         class TranslateText(BaseModel):
-        #             observation: str = Field(
-        #                 description="observation we want to translate"
-        #             )
-        #
-        #         @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
-        #         def translate_to_en(observation, args_schema=TranslateText):
-        #             """Translate to English"""
-        #             out = GoogleTranslator(source='auto', target='en').translate(text=observation)
-        #             return out
-        #
-        #         agent = initialize_agent(
-        #             llm=self.llm,
-        #             tools=[translate_to_en],
-        #             agent=AgentType.OPENAI_FUNCTIONS,
-        #
-        #             verbose=True,
-        #         )
-        #
-        #         agent.run(task)
-        #
-        #     # We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
-        #     # output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
-        #     # await self.encoding(output_translated)
-        #     # freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER")
-        #     # print(freshness_score)
-        #     # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0
-        #     user_relevance_score = "0"
-        #     # similarity score between the user input and the content already available in the buffer
-        #
-        #     # write this to episodic memory
-        #
-        #     # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
-        #     # chain_filter = prompt_filter | self.llm
-        #     # output = await chain_filter.ainvoke({"query": user_input})
-        #
-        #     # print(output)
-        #
-        # if content is None:
-        #     # Sensory and Linguistic Processing
-        #     prompt_filter = ChatPromptTemplate.from_template(
-        #         "Filter and remove uneccessary information that is not relevant in the user query {query}")
-        #     chain_filter = prompt_filter | self.llm
-        #     output = await chain_filter.ainvoke({"query": user_input})
-        #     translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
-        #
-        #     def top_down_processing():
-        #         """Top-down processing"""
-        #         pass
-        #
-        #     def bottom_up_processing():
-        #         """Bottom-up processing"""
-        #         pass
-        #
-        #     def interactive_processing():
-        #         """interactive processing"""
-        #         pass
-        #
-        #     working_memory_activation = "bla"
-        #
-        #     prompt_chunk = ChatPromptTemplate.from_template(
-        #         "Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
-        #     chain_chunk = prompt_chunk | self.llm
-        #     output_chunks = await chain_chunk.ainvoke({"query": output.content})
-        #
-        #     print(output_chunks.content)
-
-        # vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)
-        # retriever = WeaviateHybridSearchRetriever(
-        #     client=client,
-        #     index_name="EVENTBUFFER",
-        #     text_key="text",
-        #     attributes=[],
-        #     embedding=embeddings,
-        #     create_schema_if_missing=True,
-        # )
-
-        # vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-        #                      index_name=self.index_name, db_type=self.db_type, namespace="EVENTBUFFER")
-
-        # query = vector_db.
-
-        # retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))
-        # memory = VectorStoreRetrieverMemory(retriever=retriever)
-        # class PromptWrapper(BaseModel):
-        #     observation: str = Field(
-        #         description="observation we want to fetch from vectordb"
-        #     )
-        #         # ,
-        #     # json_schema: str = Field(description="json schema we want to infer")
-        # @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
-        # def convert_to_structured( observation=None, json_schema=None):
-        #     """Convert unstructured data to structured data"""
-        #     BASE_DIR = os.getcwd()
-        #     json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
-        #
-        #     def load_json_or_infer_schema(file_path, document_path):
-        #         """Load JSON schema from file or infer schema from text"""
-        #
-        #         # Attempt to load the JSON file
-        #         with open(file_path, 'r') as file:
-        #             json_schema = json.load(file)
-        #         return json_schema
-        #
-        #     json_schema =load_json_or_infer_schema(json_path, None)
-        #     def run_open_ai_mapper(observation=None, json_schema=None):
-        #         """Convert unstructured data to structured data"""
-        #
-        #         prompt_msgs = [
-        #             SystemMessage(
-        #                 content="You are a world class algorithm converting unstructured data into structured data."
-        #             ),
-        #             HumanMessage(content="Convert unstructured data to structured data:"),
-        #             HumanMessagePromptTemplate.from_template("{input}"),
-        #             HumanMessage(content="Tips: Make sure to answer in the correct format"),
-        #         ]
-        #         prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-        #         chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
-        #         output = chain_funct.run(input=observation, llm=self.llm)
-        #         yield output
-        #     pipeline = dlt.pipeline(pipeline_name="train_ticket", destination='duckdb', dataset_name='train_ticket_data')
-        #     info = pipeline.run(data=run_open_ai_mapper(prompt, json_schema))
-        #     return print(info)
-        #
-        #
-        # class GoalWrapper(BaseModel):
-        #     observation: str = Field(
-        #         description="observation we want to fetch from vectordb"
-        #     )
-        #
-        # @tool("fetch_memory_wrapper", args_schema=GoalWrapper, return_direct=True)
-        # def fetch_memory_wrapper(observation, args_schema=GoalWrapper):
-        #     """Fetches data from the VectorDB and returns it as a python dictionary."""
-        #     print("HELLO, HERE IS THE OBSERVATION: ", observation)
-        #
-        #     marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
-        #     @ai_classifier
-        #     class MemoryRoute(Enum):
-        #         """Represents distinct routes  for  different memory types."""
-        #
-        #         storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
-        #         raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
-        #         raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
-        #         long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
-        #         raw_information_to_store_as_events = "EVENTBUFFER"
-        #
-        #     namespace= MemoryRoute(observation)
-        #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-        #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
-        #
-        #
-        #     query = vector_db.fetch_memories(observation)
-        #
-        #     return query
-        #
-        # class UpdatePreferences(BaseModel):
-        #     observation: str = Field(
-        #         description="observation we want to fetch from vectordb"
-        #     )
-        #
-        # @tool("add_memories_wrapper", args_schema=UpdatePreferences, return_direct=True)
-        # def add_memories_wrapper(observation, args_schema=UpdatePreferences):
-        #     """Updates user preferences in the VectorDB."""
-        #     @ai_classifier
-        #     class MemoryRoute(Enum):
-        #         """Represents distinct routes  for  different memory types."""
-        #
-        #         storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
-        #         raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
-        #         raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
-        #         long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
-        #         raw_information_to_store_as_events = "EVENTBUFFER"
-        #
-        #     namespace= MemoryRoute(observation)
-        #     print("HELLO, HERE IS THE OBSERVATION 2: ")
-        #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-        #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
-        #     return vector_db.add_memories(observation)
-        #
-        # agent = initialize_agent(
-        #     llm=self.llm,
-        #     tools=[convert_to_structured,fetch_memory_wrapper, add_memories_wrapper],
-        #     agent=AgentType.OPENAI_FUNCTIONS,
-        #
-        #     verbose=True,
-        # )
-        #
-        # prompt = """
-        #     Based on all the history and information of this user, decide based on user query query: {query} which of the following tasks needs to be done:
-        #     1. Memory retrieval , 2. Memory update,  3. Convert data to structured   If the query is not any of these, then classify it as 'Other'
-        #     Return the result in format:  'Result_type': 'Goal', "Original_query": "Original query"
-        #     """
-        #
-        # # template = Template(prompt)
-        # # output = template.render(query=user_input)
-        # # complete_query = output
-        # complete_query = PromptTemplate(
-        #     input_variables=[ "query"], template=prompt
-        # )
-        # summary_chain = LLMChain(
-        #     llm=self.llm, prompt=complete_query, verbose=True
-        # )
-        # from langchain.chains import SimpleSequentialChain
-        #
-        # overall_chain = SimpleSequentialChain(
-        #     chains=[summary_chain, agent], verbose=True
-        # )
-        # output = overall_chain.run(user_input)
-        # return output
-
-
-# DEFINE STM
-# DEFINE LTM
-
 class Memory:
    load_dotenv()
    OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))