Added fix

This commit is contained in:
Vasilije 2023-08-23 19:59:12 +02:00
parent 020570f57f
commit 697c49a670
3 changed files with 90 additions and 48 deletions

View file

@ -13,6 +13,7 @@ import asyncio
from typing import Any, Dict, List, Coroutine
from deep_translator import (GoogleTranslator)
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.schema import LLMResult, HumanMessage
from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
from pydantic import BaseModel, Field, parse_obj_as
@ -512,14 +513,6 @@ class EpisodicBuffer:
# self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
def _compute_weights(self, context: str):
"""Computes the weights for the buffer"""
pass
def _temporal_weighting(self, context: str):
"""Computes the temporal weighting for the buffer"""
pass
# async def infer_schema_from_text(self, text: str):
# """Infer schema from text"""
#
@ -739,7 +732,46 @@ class EpisodicBuffer:
# Extract the list of tasks
tasks_list = data["tasks"]
result_tasks =[]
for task in tasks_list:
class PromptWrapper(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)
@tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
def convert_to_structured( observation=None, json_schema=None):
"""Convert unstructured data to structured data"""
BASE_DIR = os.getcwd()
json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
def load_json_or_infer_schema(file_path, document_path):
"""Load JSON schema from file or infer schema from text"""
# Attempt to load the JSON file
with open(file_path, 'r') as file:
json_schema = json.load(file)
return json_schema
json_schema =load_json_or_infer_schema(json_path, None)
def run_open_ai_mapper(observation=None, json_schema=None):
"""Convert unstructured data to structured data"""
prompt_msgs = [
SystemMessage(
content="You are a world class algorithm converting unstructured data into structured data."
),
HumanMessage(content="Convert unstructured data to structured data:"),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
output = chain_funct.run(input=observation, llm=self.llm)
return output
result = run_open_ai_mapper(observation, json_schema)
return result
class TranslateText(BaseModel):
observation: str = Field(
description="observation we want to translate"
@ -753,7 +785,7 @@ class EpisodicBuffer:
agent = initialize_agent(
llm=self.llm,
tools=[translate_to_en],
tools=[translate_to_en, convert_to_structured],
agent=AgentType.OPENAI_FUNCTIONS,
verbose=True,
@ -761,25 +793,50 @@ class EpisodicBuffer:
print("HERE IS THE TASK", task)
output = agent.run(input=task)
print(output)
await self.encoding(output)
result_tasks.append(task)
result_tasks.append(output)
await self.encoding(str(result_tasks), self.namespace, params=params)
buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
#json here
prompt_filter = ChatPromptTemplate.from_template(
"Format and collect all outputs from the tasks presented here {tasks} and their results {results}")
chain_filter_chunk = prompt_filter | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
output = await chain_filter_chunk.ainvoke({"query": buffer_result})
print("HERE IS THE OUTPUT", output)
class EpisodicTask(BaseModel):
"""Schema for an individual task."""
task_order: str = Field(..., description="The order at which the task needs to be performed")
task_name: str = Field(None, description="The task that needs to be performed")
operation: str = Field(None, description="The operation to be performed")
class EpisodicList(BaseModel):
"""Schema for the record containing a list of tasks."""
tasks: List[EpisodicTask] = Field(..., description="List of tasks")
start_date: str = Field(..., description="The order at which the task needs to be performed")
end_date: str = Field(..., description="The order at which the task needs to be performed")
user_query: str = Field(..., description="The order at which the task needs to be performed")
memory = Memory(user_id=self.user_id)
await memory.async_init()
parser = PydanticOutputParser(pydantic_object=EpisodicList)
lookup_value = await memory._add_episodic_memory(observation=str(output), params={})
prompt = PromptTemplate(
template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}",
input_variables=["query", "steps", "buffer"],
partial_variables={"format_instructions": parser.get_format_instructions()},
)
_input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
return "a few things to do like load episodic memory in a structured format"
# output = self.llm(_input.to_string())
#
# parser.parse(output)
# memory = Memory(user_id=self.user_id)
# await memory.async_init()
#
# lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
# return lookup_value
#load to buffer once is done
@ -1163,8 +1220,8 @@ class Memory:
params=params
)
async def _run_buffer(self, user_input: str, content: str = None):
return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content)
async def _run_buffer(self, user_input: str, content: str = None, params:str=None):
return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content, params=params)
async def _add_buffer_memory(self, user_input: str, namespace: str = None, params: dict = None):
return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace,
@ -1197,7 +1254,7 @@ async def main():
"validity_end": "2024-07-31"
}
gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ")
gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ", params=params)
print(gg)
# gg = await memory._delete_buffer_memory()

35
level_2/poetry.lock generated
View file

@ -1362,39 +1362,38 @@ files = [
[[package]]
name = "langchain"
version = "0.0.250"
version = "0.0.271"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain-0.0.250-py3-none-any.whl", hash = "sha256:65b3520f507e848edd88a35a70700971bbbf822fda65f621ccf44a3bb36ad03a"},
{file = "langchain-0.0.250.tar.gz", hash = "sha256:1b5775d6a472f633bb06e794f58cb6ff5d1eeb2da603b64a6a15013f8f61ee3f"},
{file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"},
{file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"},
]
[package.dependencies]
aiohttp = ">=3.8.3,<4.0.0"
async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
dataclasses-json = ">=0.5.7,<0.6.0"
langsmith = ">=0.0.11,<0.1.0"
langsmith = ">=0.0.21,<0.1.0"
numexpr = ">=2.8.4,<3.0.0"
numpy = ">=1,<2"
openapi-schema-pydantic = ">=1.2,<2.0"
pydantic = ">=1,<2"
PyYAML = ">=5.4.1"
pydantic = ">=1,<3"
PyYAML = ">=5.3"
requests = ">=2,<3"
SQLAlchemy = ">=1.4,<3"
tenacity = ">=8.1.0,<9.0.0"
[package.extras]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)", "xinference (>=0.0.6,<0.0.7)"]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
clarifai = ["clarifai (>=9.1.0)"]
cohere = ["cohere (>=4,<5)"]
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
embeddings = ["sentence-transformers (>=2,<3)"]
extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xinference (>=0.0.6,<0.0.7)", "zep-python (>=0.32)"]
extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
javascript = ["esprima (>=4.0.1,<5.0.0)"]
llms = ["anthropic (>=0.3,<0.4)", "clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)", "xinference (>=0.0.6,<0.0.7)"]
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
@ -1875,20 +1874,6 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc
embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
[[package]]
name = "openapi-schema-pydantic"
version = "1.2.4"
description = "OpenAPI (v3) specification schema as pydantic class"
optional = false
python-versions = ">=3.6.1"
files = [
{file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"},
{file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"},
]
[package.dependencies]
pydantic = ">=1.8.2"
[[package]]
name = "orjson"
version = "3.9.5"
@ -3795,4 +3780,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "788ca51ba313eac5f1dbcadfd7f91109b8b7a7734a1f16e10c8fb2e3b435a606"
content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991"

View file

@ -8,7 +8,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
langchain = "v0.0.250"
langchain = "v0.0.271"
nltk = "3.8.1"
openai = "0.27.8"