diff --git a/level_2/api.py b/level_2/api.py index 8ddc33686..ab89ac217 100644 --- a/level_2/api.py +++ b/level_2/api.py @@ -216,6 +216,35 @@ async def create_context( except Exception as e: return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) + +@app.post("/buffer/provide-feedback", response_model=dict) +async def provide_feedback( + payload: Payload, + # files: List[UploadFile] = File(...), +): + try: + decoded_payload = payload.payload + + Memory_ = Memory(user_id=decoded_payload["user_id"]) + + await Memory_.async_init() + + # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None) + if decoded_payload["total_score"] is None: + + output = await Memory_._provide_feedback( + user_input=decoded_payload["prompt"], params=decoded_payload["params"], attention_modulators=None, total_score=decoded_payload["total_score"] + ) + return JSONResponse(content={"response": output}, status_code=200) + else: + output = await Memory_._provide_feedback( + user_input=decoded_payload["prompt"], params=decoded_payload["params"], attention_modulators=decoded_payload["attention_modulators"], total_score=None + ) + return JSONResponse(content={"response": output}, status_code=200) + + + except Exception as e: + return JSONResponse(content={"response": {"error": str(e)}}, status_code=503) def start_api_server(host: str = "0.0.0.0", port: int = 8000): """ Start the API server using uvicorn. diff --git a/level_2/auth/auth.py b/level_2/auth/auth.py new file mode 100644 index 000000000..7c8a19005 --- /dev/null +++ b/level_2/auth/auth.py @@ -0,0 +1,35 @@ +import os + +import requests +from dotenv import load_dotenv +from fastapi import Depends, HTTPException +from starlette.status import HTTP_403_FORBIDDEN + +from auth.cognito.JWTBearer import JWKS, JWTBearer, JWTAuthorizationCredentials + +load_dotenv() # Automatically load environment variables from a '.env' file. + +# jwks = JWKS.parse_obj( +# requests.get( +# f"https://cognito-idp.{os.environ.get('eu-west-1:46372257029')}.amazonaws.com/" +# f"{os.environ.get('eu-west-1_3VUqKzMgj')}/.well-known/jwks.json" +# ).json() +# ) +# Construct the Cognito User Pool URL using the correct syntax +region = "eu-west-1" +user_pool_id = "eu-west-1_viUyNCqKp" +cognito_url = f"https://cognito-idp.{region}.amazonaws.com/{user_pool_id}/.well-known/jwks.json" + +# Fetch the JWKS using the updated URL +jwks = JWKS.parse_obj(requests.get(cognito_url).json()) + +auth = JWTBearer(jwks) + + +async def get_current_user( + credentials: JWTAuthorizationCredentials = Depends(auth) +) -> str: + try: + return credentials.claims["username"] + except KeyError: + HTTPException(status_code=HTTP_403_FORBIDDEN, detail="Username missing") diff --git a/level_2/auth/auth_utils.py b/level_2/auth/auth_utils.py new file mode 100644 index 000000000..41a1b09c6 --- /dev/null +++ b/level_2/auth/auth_utils.py @@ -0,0 +1,62 @@ + +from cognito.JWTBearer import JWKS, JWTBearer, JWTAuthorizationCredentials + +import requests + +region = "eu-west-1" +user_pool_id = "" #needed +cognito_url = f"https://cognito-idp.{region}.amazonaws.com/{user_pool_id}/.well-known/jwks.json" + +# Fetch the JWKS using the updated URL +jwks = JWKS.parse_obj(requests.get(cognito_url).json()) +print(jwks) + +auth = JWTBearer(jwks) + + +import requests + +# Set the Cognito authentication endpoint URL + +auth = JWTBearer(jwks) + +# Set the user credentials + +username = "" #needed +password = "" #needed + +# Create the authentication payload +payload = { + "username": username, + "password": password +} + +# Set the Cognito authentication endpoint URL +# Set the Cognito token endpoint URL +token_endpoint = f"https://your-cognito-domain.auth.{region}.amazoncognito.com/oauth2/token" + +# Set the client credentials +client_id = "" #needed +client_secret = "" + +import boto3 +def authenticate_and_get_token(username: str, password: str, + user_pool_id: str, app_client_id: str) -> None: + client = boto3.client('cognito-idp') + + resp = client.admin_initiate_auth( + UserPoolId=user_pool_id, + ClientId=app_client_id, + AuthFlow='ADMIN_NO_SRP_AUTH', + AuthParameters={ + "USERNAME": username, + "PASSWORD": password + } + ) + + print("Log in success") + print("Access token:", resp['AuthenticationResult']['AccessToken']) + print("ID token:", resp['AuthenticationResult']['IdToken']) + + +authenticate_and_get_token(username, password, user_pool_id, client_id) \ No newline at end of file diff --git a/level_2/auth/cognito/JWTBearer.py b/level_2/auth/cognito/JWTBearer.py new file mode 100644 index 000000000..aa5f6a81d --- /dev/null +++ b/level_2/auth/cognito/JWTBearer.py @@ -0,0 +1,72 @@ +from typing import Dict, Optional, List + +from fastapi import HTTPException +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from jose import jwt, jwk, JWTError +from jose.utils import base64url_decode +from pydantic import BaseModel +from starlette.requests import Request +from starlette.status import HTTP_403_FORBIDDEN + +JWK = Dict[str, str] + + +class JWKS(BaseModel): + keys: List[JWK] + + +class JWTAuthorizationCredentials(BaseModel): + jwt_token: str + header: Dict[str, str] + claims: Dict[str, str] + signature: str + message: str + + +class JWTBearer(HTTPBearer): + def __init__(self, jwks: JWKS, auto_error: bool = True): + super().__init__(auto_error=auto_error) + + self.kid_to_jwk = {jwk["kid"]: jwk for jwk in jwks.keys} + + def verify_jwk_token(self, jwt_credentials: JWTAuthorizationCredentials) -> bool: + try: + public_key = self.kid_to_jwk[jwt_credentials.header["kid"]] + except KeyError: + raise HTTPException( + status_code=HTTP_403_FORBIDDEN, detail="JWK public key not found" + ) + + key = jwk.construct(public_key) + decoded_signature = base64url_decode(jwt_credentials.signature.encode()) + + return key.verify(jwt_credentials.message.encode(), decoded_signature) + + async def __call__(self, request: Request) -> Optional[JWTAuthorizationCredentials]: + credentials: HTTPAuthorizationCredentials = await super().__call__(request) + + if credentials: + if not credentials.scheme == "Bearer": + raise HTTPException( + status_code=HTTP_403_FORBIDDEN, detail="Wrong authentication method" + ) + + jwt_token = credentials.credentials + + message, signature = jwt_token.rsplit(".", 1) + + try: + jwt_credentials = JWTAuthorizationCredentials( + jwt_token=jwt_token, + header=jwt.get_unverified_header(jwt_token), + claims=jwt.get_unverified_claims(jwt_token), + signature=signature, + message=message, + ) + except JWTError: + raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="JWK invalid") + + if not self.verify_jwk_token(jwt_credentials): + raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="JWK invalid") + + return jwt_credentials \ No newline at end of file diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py index 42892c2bb..9799d651f 100644 --- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py +++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py @@ -2,8 +2,10 @@ import json from enum import Enum from io import BytesIO -from typing import Dict, List, Union +from typing import Dict, List, Union, Any +import logging +logging.basicConfig(level=logging.INFO) import marvin import requests from deep_translator import GoogleTranslator @@ -15,11 +17,12 @@ from langchain.retrievers import WeaviateHybridSearchRetriever from langchain.tools import tool from marvin import ai_classifier from pydantic import parse_obj_as - +from weaviate.gql.get import HybridFusion +import numpy as np load_dotenv() from langchain import OpenAI from langchain.chat_models import ChatOpenAI -from typing import Optional +from typing import Optional, Dict, List, Union import tracemalloc @@ -71,407 +74,10 @@ marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") # Assuming OpenAIEmbeddings and other necessary imports are available -# Default Values -LTM_MEMORY_ID_DEFAULT = "00000" -ST_MEMORY_ID_DEFAULT = "0000" -BUFFER_ID_DEFAULT = "0000" +from vectordb.basevectordb import BaseMemory -class VectorDBFactory: - def create_vector_db( - self, - user_id: str, - index_name: str, - memory_id: str, - ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT, - st_memory_id: str = ST_MEMORY_ID_DEFAULT, - buffer_id: str = BUFFER_ID_DEFAULT, - db_type: str = "pinecone", - namespace: str = None, - ): - db_map = {"pinecone": PineconeVectorDB, "weaviate": WeaviateVectorDB} - - if db_type in db_map: - return db_map[db_type]( - user_id, - index_name, - memory_id, - ltm_memory_id, - st_memory_id, - buffer_id, - namespace, - ) - - raise ValueError(f"Unsupported database type: {db_type}") - - -class VectorDB: - OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") - - def __init__( - self, - user_id: str, - index_name: str, - memory_id: str, - ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT, - st_memory_id: str = ST_MEMORY_ID_DEFAULT, - buffer_id: str = BUFFER_ID_DEFAULT, - namespace: str = None, - ): - self.user_id = user_id - self.index_name = index_name - self.namespace = namespace - self.memory_id = memory_id - self.ltm_memory_id = ltm_memory_id - self.st_memory_id = st_memory_id - self.buffer_id = buffer_id - - -class PineconeVectorDB(VectorDB): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.init_pinecone(self.index_name) - - def init_pinecone(self, index_name): - # Pinecone initialization logic - pass - - -class WeaviateVectorDB(VectorDB): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.init_weaviate(self.namespace) - - def init_weaviate(self, namespace: str): - # Weaviate initialization logic - embeddings = OpenAIEmbeddings() - auth_config = weaviate.auth.AuthApiKey( - api_key=os.environ.get("WEAVIATE_API_KEY") - ) - client = weaviate.Client( - url=os.environ.get("WEAVIATE_URL"), - auth_client_secret=auth_config, - additional_headers={"X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")}, - ) - retriever = WeaviateHybridSearchRetriever( - client=client, - index_name=namespace, - text_key="text", - attributes=[], - embedding=embeddings, - create_schema_if_missing=True, - ) - return retriever # If this is part of the initialization, call it here. - - def init_weaviate_client(self, namespace: str): - # Weaviate client initialization logic - auth_config = weaviate.auth.AuthApiKey( - api_key=os.environ.get("WEAVIATE_API_KEY") - ) - client = weaviate.Client( - url=os.environ.get("WEAVIATE_URL"), - auth_client_secret=auth_config, - additional_headers={"X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")}, - ) - return client - - def _document_loader(self, observation: str, loader_settings: dict): - # Create an in-memory file-like object for the PDF content - - if loader_settings.get("format") == "PDF": - - if loader_settings.get("source") == "url": - pdf_response = requests.get(loader_settings["path"]) - pdf_stream = BytesIO(pdf_response.content) - contents = pdf_stream.read() - tmp_location = os.path.join("/tmp", "tmp.pdf") - with open(tmp_location, "wb") as tmp_file: - tmp_file.write(contents) - - # Process the PDF using PyPDFLoader - loader = PyPDFLoader(tmp_location) - # adapt this for different chunking strategies - pages = loader.load_and_split() - return pages - - if loader_settings.get("source") == "file": - # Process the PDF using PyPDFLoader - # might need adapting for different loaders + OCR - # need to test the path - loader = PyPDFLoader(loader_settings["path"]) - pages = loader.load_and_split() - - return pages - else: - # Process the text by just loading the base text - return observation - - - async def add_memories( - self, observation: str, loader_settings: dict = None, params: dict = None ,namespace:str=None - ): - # Update Weaviate memories here - print(self.namespace) - if namespace is None: - namespace = self.namespace - retriever = self.init_weaviate(namespace) - - def _stuct(observation, params): - """Utility function to not repeat metadata structure""" - # needs smarter solution, like dynamic generation of metadata - return [ - Document( - metadata={ - # "text": observation, - "user_id": str(self.user_id), - "memory_id": str(self.memory_id), - "ltm_memory_id": str(self.ltm_memory_id), - "st_memory_id": str(self.st_memory_id), - "buffer_id": str(self.buffer_id), - "version": params.get("version", None) or "", - "agreement_id": params.get("agreement_id", None) or "", - "privacy_policy": params.get("privacy_policy", None) or "", - "terms_of_service": params.get("terms_of_service", None) or "", - "format": params.get("format", None) or "", - "schema_version": params.get("schema_version", None) or "", - "checksum": params.get("checksum", None) or "", - "owner": params.get("owner", None) or "", - "license": params.get("license", None) or "", - "validity_start": params.get("validity_start", None) or "", - "validity_end": params.get("validity_end", None) or "" - # **source_metadata, - }, - page_content=observation, - ) - ] - - if loader_settings: - # Load the document - document = self._document_loader(observation, loader_settings) - print("DOC LENGTH", len(document)) - for doc in document: - document_to_load = _stuct(doc.page_content, params) - retriever.add_documents( - document_to_load - ) - - return retriever.add_documents( - _stuct(observation, params) - ) - - async def fetch_memories( - self, observation: str, namespace: str, params: dict = None - ): - # Fetch Weaviate memories here - """ - Get documents from weaviate. - - Args a json containing: - query (str): The query string. - path (list): The path for filtering, e.g., ['year']. - operator (str): The operator for filtering, e.g., 'Equal'. - valueText (str): The value for filtering, e.g., '2017*'. - - Example: - get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*') - """ - client = self.init_weaviate_client(self.namespace) - - print(self.namespace) - print(str(datetime.now())) - print(observation) - if namespace is None: - namespace = self.namespace - - params_user_id = { - "path": ["user_id"], - "operator": "Like", - "valueText": self.user_id, - } - - if params: - query_output = ( - client.query.get( - namespace, - [ - # "text", - "user_id", - "memory_id", - "ltm_memory_id", - "st_memory_id", - "buffer_id", - "version", - "agreement_id", - "privacy_policy", - "terms_of_service", - "format", - "schema_version", - "checksum", - "owner", - "license", - "validity_start", - "validity_end", - ], - ) - .with_where(params) - .with_near_text({"concepts": [observation]}) - .with_additional( - ["id", "creationTimeUnix", "lastUpdateTimeUnix", "score",'distance'] - ) - .with_where(params_user_id) - .do() - ) - return query_output - else: - query_output = ( - client.query.get( - namespace, - - [ - "text", - "user_id", - "memory_id", - "ltm_memory_id", - "st_memory_id", - "buffer_id", - "version", - "agreement_id", - "privacy_policy", - "terms_of_service", - "format", - "schema_version", - "checksum", - "owner", - "license", - "validity_start", - "validity_end", - ], - ) - .with_additional( - ["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance'] - ) - .with_hybrid( - query=observation, - ) - .with_autocut(1) - .with_where(params_user_id) - .do() - ) - return query_output - - async def delete_memories(self, params: dict = None): - client = self.init_weaviate_client(self.namespace) - if params: - where_filter = { - "path": ["id"], - "operator": "Equal", - "valueText": params.get("id", None), - } - return client.batch.delete_objects( - class_name=self.namespace, - # Same `where` filter as in the GraphQL API - where=where_filter, - ) - else: - # Delete all objects - print("HERE IS THE USER ID", self.user_id) - return client.batch.delete_objects( - class_name=self.namespace, - where={ - "path": ["user_id"], - "operator": "Equal", - "valueText": self.user_id, - }, - ) - - def update_memories(self, observation, namespace: str, params: dict = None): - client = self.init_weaviate_client(self.namespace) - - client.data_object.update( - data_object={ - # "text": observation, - "user_id": str(self.user_id), - "memory_id": str(self.memory_id), - "ltm_memory_id": str(self.ltm_memory_id), - "st_memory_id": str(self.st_memory_id), - "buffer_id": str(self.buffer_id), - "version": params.get("version", None) or "", - "agreement_id": params.get("agreement_id", None) or "", - "privacy_policy": params.get("privacy_policy", None) or "", - "terms_of_service": params.get("terms_of_service", None) or "", - "format": params.get("format", None) or "", - "schema_version": params.get("schema_version", None) or "", - "checksum": params.get("checksum", None) or "", - "owner": params.get("owner", None) or "", - "license": params.get("license", None) or "", - "validity_start": params.get("validity_start", None) or "", - "validity_end": params.get("validity_end", None) or "" - # **source_metadata, - }, - class_name="Test", - uuid=params.get("id", None), - consistency_level=weaviate.data.replication.ConsistencyLevel.ALL, # default QUORUM - ) - return - - -class BaseMemory: - def __init__( - self, - user_id: str, - memory_id: Optional[str], - index_name: Optional[str], - db_type: str, - namespace: str, - ): - self.user_id = user_id - self.memory_id = memory_id - self.index_name = index_name - self.namespace = namespace - self.memory_type_id = str(uuid.uuid4()) - self.db_type = db_type - factory = VectorDBFactory() - self.vector_db = factory.create_vector_db( - self.user_id, - self.index_name, - self.memory_id, - db_type=self.db_type, - namespace=self.namespace, - ) - - def init_client(self, namespace: str): - if self.db_type == "weaviate": - return self.vector_db.init_weaviate_client(namespace) - - async def add_memories( - self, - observation: Optional[str] = None, - loader_settings: dict = None, - params: Optional[dict] = None, - namespace: Optional[str] = None, - ): - if self.db_type == "weaviate": - return await self.vector_db.add_memories( - observation=observation, loader_settings=loader_settings, params=params, namespace=namespace - ) - # Add other db_type conditions if necessary - - async def fetch_memories( - self, - observation: str, - params: Optional[str] = None, - namespace: Optional[str] = None, - ): - if self.db_type == "weaviate": - return await self.vector_db.fetch_memories( - observation=observation, params=params, namespace=namespace - ) - - async def delete_memories(self, params: Optional[str] = None): - if self.db_type == "weaviate": - return await self.vector_db.delete_memories(params) - - # Additional methods for specific Memory can be added here +from modulators.modulators import DifferentiableLayer class SemanticMemory(BaseMemory): @@ -483,8 +89,7 @@ class SemanticMemory(BaseMemory): db_type: str = "weaviate", ): super().__init__( - user_id, memory_id, index_name, db_type, namespace="SEMANTICMEMORY" - ) + user_id, memory_id, index_name, db_type, namespace="SEMANTICMEMORY") class EpisodicMemory(BaseMemory): @@ -512,7 +117,7 @@ class EpisodicBuffer(BaseMemory): user_id, memory_id, index_name, db_type, namespace="BUFFERMEMORY" ) - self.st_memory_id = "blah" + self.st_memory_id = str( uuid.uuid4()) self.llm = ChatOpenAI( temperature=0.0, max_tokens=1200, @@ -527,6 +132,34 @@ class EpisodicBuffer(BaseMemory): model_name="gpt-4-0613", ) + async def _summarizer(self, text: str, document:str, max_tokens: int = 1200): + """Summarize text using OpenAI API, to reduce amount of code for modulators contributing to context""" + class Summaries(BaseModel): + """Schema for documentGroups""" + summary: str = Field( + ..., + description="Summarized document") + class SummaryContextList(BaseModel): + """Buffer raw context processed by the buffer""" + + summaries: List[Summaries] = Field(..., description="List of summaries") + observation: str = Field(..., description="The original user query") + + parser = PydanticOutputParser(pydantic_object=SummaryContextList) + prompt = PromptTemplate( + template=" \n{format_instructions}\nSummarize the observation briefly based on the user query, observation is: {query}\n. The document is: {document}", + input_variables=["query", "document"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + + _input = prompt.format_prompt(query=text, document=document) + document_context_result = self.llm_base(_input.to_string()) + document_context_result_parsed = parser.parse(document_context_result) + document_context_result_parsed = json.loads(document_context_result_parsed.json()) + document_summary = document_context_result_parsed["summaries"][0]["summary"] + + return document_summary + async def memory_route(self, text_time_diff: str): @ai_classifier class MemoryRoute(Enum): @@ -543,8 +176,9 @@ class EpisodicBuffer(BaseMemory): return namespace - async def freshness(self, observation: str, namespace: str = None) -> list[str]: + async def freshness(self, observation: str, namespace: str = None, memory=None) -> list[str]: """Freshness - Score between 0 and 1 on how often was the information updated in episodic or semantic memory in the past""" + logging.info("Starting with Freshness") lookup_value = await self.fetch_memories( observation=observation, namespace=namespace @@ -557,13 +191,14 @@ class EpisodicBuffer(BaseMemory): last_update_datetime = datetime.fromtimestamp(int(unix_t) / 1000) time_difference = datetime.now() - last_update_datetime time_difference_text = humanize.naturaltime(time_difference) - namespace = await self.memory_route(str(time_difference_text)) - return [namespace.value, lookup_value] + namespace_ = await self.memory_route(str(time_difference_text)) + return [namespace_.value, lookup_value] - async def frequency(self, observation: str, namespace: str) -> list[str]: + async def frequency(self, observation: str, namespace: str, memory) -> list[str]: """Frequency - Score between 0 and 1 on how often was the information processed in episodic memory in the past Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory """ + logging.info("Starting with Frequency") weaviate_client = self.init_client(namespace=namespace) result_output = await self.fetch_memories( @@ -578,23 +213,62 @@ class EpisodicBuffer(BaseMemory): "count" ] ) - return [str(frequency), result_output["data"]["Get"]["EPISODICMEMORY"][0]] + summary = await self._summarizer(text=observation, document=result_output["data"]["Get"]["EPISODICMEMORY"][0]) + logging.info("Frequency summary is %s", str(summary)) + return [str(frequency), summary] - async def relevance(self, observation: str, namespace: str) -> list[str]: - """Relevance - Score between 0 and 1 on how often was the final information relevant to the user in the past. - Stored in the episodic memory, mainly to show how well a buffer did the job - Starts at 0, gets updated based on the user feedback""" + async def repetition(self, observation: str, namespace: str, memory) -> list[str]: + """Repetition - Score between 0 and 1 based on how often and at what intervals a memory has been revisited. + Accounts for the spacing effect, where memories accessed at increasing intervals are given higher scores. + # TO DO -> add metadata column to make sure that the access is not equal to update, and run update vector function each time a memory is accessed + """ + logging.info("Starting with Repetition") - return ["0", "memory"] + result_output = await self.fetch_memories( + observation=observation, params=None, namespace=namespace + ) - async def saliency(self, observation: str, namespace=None) -> list[str]: + access_times = result_output["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"] + # Calculate repetition score based on access times + if not access_times or len(access_times) == 1: + return ["0", result_output["data"]["Get"]["EPISODICMEMORY"][0]] + + # Sort access times + access_times = sorted(access_times) + # Calculate intervals between consecutive accesses + intervals = [access_times[i + 1] - access_times[i] for i in range(len(access_times) - 1)] + # A simple scoring mechanism: Longer intervals get higher scores, as they indicate spaced repetition + repetition_score = sum([1.0 / (interval + 1) for interval in intervals]) / len(intervals) + summary = await self._summarizer(text = observation, document=result_output["data"]["Get"]["EPISODICMEMORY"][0]) + logging.info("Repetition is %s", str(repetition_score)) + logging.info("Repetition summary is %s", str(summary)) + return [str(repetition_score), summary] + + async def relevance(self, observation: str, namespace: str, memory) -> list[str]: + """ + Fetches the fusion relevance score for a given observation from the episodic memory. + Learn more about fusion scores here on Weaviate docs: https://weaviate.io/blog/hybrid-search-fusion-algorithms + Parameters: + - observation: The user's query or observation. + - namespace: The namespace for the data. + + Returns: + - The relevance score between 0 and 1. + """ + logging.info("Starting with Relevance") + score = memory["_additional"]["score"] + logging.info("Relevance is %s", str(score)) + return [score, "fusion score"] + + async def saliency(self, observation: str, namespace=None, memory=None) -> list[str]: """Determines saliency by scoring the set of retrieved documents against each other and trying to determine saliency """ + logging.info("Starting with Saliency") class SaliencyRawList(BaseModel): """Schema for documentGroups""" - original_document: str = Field( + summary: str = Field( ..., - description="The original document retrieved from the database") + description="Summarized document") saliency_score: str = Field( None, description="The score between 0 and 1") class SailencyContextList(BaseModel): @@ -605,7 +279,7 @@ class EpisodicBuffer(BaseMemory): parser = PydanticOutputParser(pydantic_object=SailencyContextList) prompt = PromptTemplate( - template="Determine saliency of documents compared to the other documents retrieved \n{format_instructions}\nOriginal observation is: {query}\n", + template="Determine saliency of documents compared to the other documents retrieved \n{format_instructions}\nSummarize the observation briefly based on the user query, observation is: {query}\n", input_variables=["query"], partial_variables={"format_instructions": parser.get_format_instructions()}, ) @@ -613,16 +287,37 @@ class EpisodicBuffer(BaseMemory): _input = prompt.format_prompt(query=observation) document_context_result = self.llm_base(_input.to_string()) document_context_result_parsed = parser.parse(document_context_result) - return document_context_result_parsed.json() + document_context_result_parsed = json.loads(document_context_result_parsed.json()) + saliency_score = document_context_result_parsed["docs"][0]["saliency_score"] + saliency_values = document_context_result_parsed["docs"][0]["summary"] + + logging.info("Saliency is %s", str(saliency_score)) + logging.info("Saliency summary is %s", str(saliency_values)) + + return [saliency_score, saliency_values] + + # Example Usage + # attention_modulators = {"freshness": 0.8, "frequency": 0.7, "relevance": 0.9, "saliency": 0.85} + # diff_layer = DifferentiableLayer(attention_modulators) + # + # # Sample batch feedback + # feedbacks = [0.75, 0.8, 0.9] + # + # # Adjust weights based on batch feedback + # diff_layer.adjust_weights(feedbacks) + # + # print(diff_layer.get_weights()) + async def handle_modulator( self, modulator_name: str, attention_modulators: Dict[str, float], observation: str, namespace: Optional[str] = None, + memory: Optional[Dict[str, Any]] = None, ) -> Optional[List[Union[str, float]]]: """ Handle the given modulator based on the observation and namespace. @@ -638,22 +333,25 @@ class EpisodicBuffer(BaseMemory): """ modulator_value = attention_modulators.get(modulator_name, 0.0) modulator_functions = { - "freshness": lambda obs, ns: self.freshness(observation=obs, namespace=ns), - "frequency": lambda obs, ns: self.frequency(observation=obs, namespace=ns), - "relevance": lambda obs, ns: self.relevance(observation=obs, namespace=ns), - "saliency": lambda obs, ns: self.saliency(observation=obs, namespace=ns), + "freshness": lambda obs, ns, mem: self.freshness(observation=obs, namespace=ns, memory=mem), + "frequency": lambda obs, ns, mem: self.frequency(observation=obs, namespace=ns, memory=mem), + "relevance": lambda obs, ns, mem: self.relevance(observation=obs, namespace=ns, memory=mem), + "saliency": lambda obs, ns, mem: self.saliency(observation=obs, namespace=ns, memory=mem), } result_func = modulator_functions.get(modulator_name) if not result_func: return None - result = await result_func(observation, namespace) + result = await result_func(observation, namespace, memory) if not result: return None try: - if float(modulator_value) >= float(result[0]): + logging.info("Modulator %s", modulator_name) + logging.info("Modulator value %s", modulator_value) + logging.info("Result %s", result[0]) + if float(result[0]) >= float(modulator_value): return result except ValueError: pass @@ -679,12 +377,6 @@ class EpisodicBuffer(BaseMemory): attention_modulators: dict = None, ): """Generates the context to be used for the buffer and passed to the agent""" - try: - # we delete all memories in the episodic buffer, so we can start fresh - await self.delete_memories() - except: - # in case there are no memories, we pass - pass # we just filter the data here to make sure input is clean prompt_filter = ChatPromptTemplate.from_template( """Filter and remove uneccessary information that is not relevant in the query to @@ -695,45 +387,160 @@ class EpisodicBuffer(BaseMemory): # this part is partially done but the idea is to apply different attention modulators # to the data to fetch the most relevant information from the vector stores - context = [] - if attention_modulators: - from typing import Optional, Dict, List, Union - - lookup_value_semantic = await self.fetch_memories( - observation=str(output), namespace="SEMANTICMEMORY" - ) - context = [] - for memory in lookup_value_semantic["data"]["Get"]["SEMANTICMEMORY"]: - # extract memory id, and pass it to fetch function as a parameter - modulators = list(attention_modulators.keys()) - for modulator in modulators: - result = await self.handle_modulator( - modulator, - attention_modulators, - str(output), - namespace="EPISODICMEMORY", - ) - if result: - context.append(result) - context.append(memory) - else: - # defaults to semantic search if we don't want to apply algorithms on the vectordb data - lookup_value_episodic = await self.fetch_memories( - observation=str(output), namespace="EPISODICMEMORY" - ) - lookup_value_semantic = await self.fetch_memories( - observation=str(output), namespace="SEMANTICMEMORY" - ) - lookup_value_buffer = await self.fetch_memories(observation=str(output)) - - context.append(lookup_value_buffer) - context.append(lookup_value_semantic) - context.append(lookup_value_episodic) - class BufferModulators(BaseModel): + """Value of buffer modulators""" frequency: str = Field(..., description="Frequency score of the document") saliency: str = Field(..., description="Saliency score of the document") relevance: str = Field(..., description="Relevance score of the document") + description:str = Field(..., description="Latest buffer modulators") + direction: str= Field(..., description="Increase or a decrease of the modulator") + + parser = PydanticOutputParser(pydantic_object=BufferModulators) + + prompt = PromptTemplate( + template="""Structure the buffer modulators to be used for the buffer. \n + {format_instructions} \nOriginal observation is: + {query}\n """, + input_variables=["query"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + + # check if modulators exist, initialize the modulators if needed + if attention_modulators is None: + # try: + logging.info("Starting with attention mods") + attention_modulators = await self.fetch_memories(observation="Attention modulators", + namespace="BUFFERMEMORY") + + logging.info("Attention modulators exist %s", str(attention_modulators)) + lookup_value_episodic = await self.fetch_memories( + observation=str(output), namespace="EPISODICMEMORY" + ) + # lookup_value_episodic= lookup_value_episodic["data"]["Get"]["EPISODICMEMORY"][0]["text"] + prompt_classify = ChatPromptTemplate.from_template( + """You are a classifier. Determine if based on the previous query if the user was satisfied with the output : {query}""" + ) + json_structure = [{ + "name": "classifier", + "description": "Classification indicating if it's output is satisfactory", + "parameters": { + "type": "object", + "properties": { + "classification": { + "type": "boolean", + "description": "The classification true or false" + } + }, "required": ["classification"]} + }] + chain_filter = prompt_classify | self.llm.bind(function_call= {"name": "classifier"}, functions= json_structure) + classifier_output = await chain_filter.ainvoke({"query": lookup_value_episodic}) + arguments_str = classifier_output.additional_kwargs['function_call']['arguments'] + print("This is the arguments string", arguments_str) + arguments_dict = json.loads(arguments_str) + classfier_value = arguments_dict.get('classification', None) + + print("This is the classifier value", classfier_value) + + if classfier_value: + # adjust the weights of the modulators by adding a positive value + print("Lookup value, episodic", lookup_value_episodic["data"]["Get"]["EPISODICMEMORY"][0]["text"]) + prompt_classify = ChatPromptTemplate.from_template( + """ We know we need to increase the classifiers for our AI system. The classifiers are {modulators} The query is: {query}. Which of the classifiers should we decrease? Return just the modulator and desired value""" + ) + chain_modulator = prompt_classify | self.llm + classifier_output = await chain_modulator.ainvoke({"query": lookup_value_episodic, "modulators": str(attention_modulators)}) + print("classifier output 1", classifier_output) + diff_layer = DifferentiableLayer(attention_modulators) + adjusted_modulator = await diff_layer.adjust_weights(classifier_output) + _input = prompt.format_prompt(query=adjusted_modulator) + document_context_result = self.llm_base(_input.to_string()) + document_context_result_parsed = parser.parse(document_context_result) + print("Updating with the following weights", str(document_context_result_parsed)) + await self.add_memories(observation=str(document_context_result_parsed), params=params, namespace="BUFFERMEMORY") + else: + # adjust the weights of the modulators by adding a negative value + print("Lookup value, episodic", lookup_value_episodic) + prompt_classify = ChatPromptTemplate.from_template( + """ We know we need to decrease the classifiers for our AI system. The classifiers are {modulators} The query is: {query}. Which of the classifiers should we decrease? Return just the modulator and desired value""" + ) + chain_modulator_reduction = prompt_classify | self.llm + + classifier_output = await chain_modulator_reduction.ainvoke({"query": lookup_value_episodic, "modulators": str(attention_modulators)}) + print("classifier output 2", classifier_output) + diff_layer = DifferentiableLayer(attention_modulators) + adjusted_modulator =diff_layer.adjust_weights(classifier_output) + _input = prompt.format_prompt(query=adjusted_modulator) + document_context_result = self.llm_base(_input.to_string()) + document_context_result_parsed = parser.parse(document_context_result) + print("Updating with the following weights", str(document_context_result_parsed)) + await self.add_memories(observation=str(document_context_result_parsed), params=params, namespace="BUFFERMEMORY") + # except: + # # initialize the modulators with default values if they are not provided + # print("Starting with default modulators") + # attention_modulators = { + # "freshness": 0.5, + # "frequency": 0.5, + # "relevance": 0.5, + # "saliency": 0.5, + # } + # _input = prompt.format_prompt(query=attention_modulators) + # document_context_result = self.llm_base(_input.to_string()) + # document_context_result_parsed = parser.parse(document_context_result) + # await self.add_memories(observation=str(document_context_result_parsed), params=params, namespace="BUFFERMEMORY") + + elif attention_modulators: + pass + + + lookup_value_semantic = await self.fetch_memories( + observation=str(output), namespace="SEMANTICMEMORY" + ) + print("This is the lookup value semantic", len(lookup_value_semantic)) + context = [] + memory_scores = [] + + async def compute_score_for_memory(memory, output, attention_modulators): + modulators = list(attention_modulators.keys()) + total_score = 0 + num_scores = 0 + individual_scores = {} # Store individual scores with their modulator names + + for modulator in modulators: + result = await self.handle_modulator( + modulator_name=modulator, + attention_modulators=attention_modulators, + observation=str(output), + namespace="EPISODICMEMORY", + memory=memory, + ) + if result: + score = float(result[0]) # Assuming the first value in result is the score + individual_scores[modulator] = score # Store the score with its modulator name + total_score += score + num_scores += 1 + + average_score = total_score / num_scores if num_scores else 0 + return { + "memory": memory, + "average_score": average_score, + "individual_scores": individual_scores + } + + tasks = [ + compute_score_for_memory(memory=memory, output=output, attention_modulators=attention_modulators) + for memory in lookup_value_semantic["data"]["Get"]["SEMANTICMEMORY"] + ] + + print("HERE IS THE LENGTH OF THE TASKS", str(tasks)) + memory_scores = await asyncio.gather(*tasks) + # Sort the memories based on their average scores + sorted_memories = sorted(memory_scores, key=lambda x: x["average_score"], reverse=True)[:5] + # Store the sorted memories in the context + context.extend([item for item in sorted_memories]) + print("HERE IS THE CONTEXT", context) + + class BufferModulators(BaseModel): + attention_modulators: Dict[str, float] = Field(... , description="Attention modulators") class BufferRawContextTerms(BaseModel): """Schema for documentGroups""" @@ -745,18 +552,29 @@ class EpisodicBuffer(BaseMemory): document_content: str = Field( None, description="Shortened original content of the document" ) - document_relevance: str = Field( - None, - description="The relevance of the document for the task on the scale from 0 to 1", - ) attention_modulators_list: List[BufferModulators] = Field( ..., description="List of modulators" ) + average_modulator_score: str = Field(None, description="Average modulator score") + class StructuredEpisodicEvents(BaseModel): + """Schema for documentGroups""" + + event_order: str = Field( + ..., + description="Order when event occured", + ) + event_type: str = Field( + None, description="Type of the event" + ) + event_context: List[BufferModulators] = Field( + ..., description="Context of the event" + ) class BufferRawContextList(BaseModel): """Buffer raw context processed by the buffer""" docs: List[BufferRawContextTerms] = Field(..., description="List of docs") + events: List[StructuredEpisodicEvents] = Field(..., description="List of events") user_query: str = Field(..., description="The original user query") # we structure the data here to make it easier to work with @@ -774,6 +592,7 @@ class EpisodicBuffer(BaseMemory): _input = prompt.format_prompt(query=user_input, context=context) document_context_result = self.llm_base(_input.to_string()) document_context_result_parsed = parser.parse(document_context_result) + # print(document_context_result_parsed) return document_context_result_parsed async def get_task_list( @@ -939,12 +758,7 @@ class EpisodicBuffer(BaseMemory): result_tasks.append(task) result_tasks.append(output) - # print("HERE IS THE RESULT TASKS", str(result_tasks)) - # # buffer_result = await self.fetch_memories(observation=str(user_input)) - # - # print("HERE IS THE RESULT TASKS", str(buffer_result)) - class EpisodicTask(BaseModel): """Schema for an individual task.""" @@ -972,28 +786,29 @@ class EpisodicBuffer(BaseMemory): user_query: str = Field( ..., description="The order at which the task needs to be performed" ) - + attention_modulators: str = Field(..., description="List of attention modulators") parser = PydanticOutputParser(pydantic_object=EpisodicList) + date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') prompt = PromptTemplate( - template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}", - input_variables=["query", "steps", "buffer"], + template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}, date is:{date}, attention modulators are: {attention_modulators} \n", + input_variables=["query", "steps", "buffer", "date", "attention_modulators"], partial_variables={"format_instructions": parser.get_format_instructions()}, ) _input = prompt.format_prompt( query=user_input, steps=str(tasks_list) - , buffer=str(result_tasks) + , buffer=str(result_tasks), date= date, attention_modulators=attention_modulators ) - + print("HERE ARE THE STEPS, BUFFER AND DATE", str(tasks_list)) + print("here are the result_tasks", str(result_tasks)) # return "a few things to do like load episodic memory in a structured format" output = self.llm_base(_input.to_string()) result_parsing = parser.parse(output) lookup_value = await self.add_memories( observation=str(result_parsing.json()), params=params, namespace='EPISODICMEMORY' ) - # print("THE RESULT OF THIS QUERY IS ", result_parsing.json()) - await self.delete_memories() + # await self.delete_memories() return result_parsing.json() @@ -1188,11 +1003,14 @@ class Memory: async def _available_operations(self): return await self.long_term_memory.episodic_buffer.available_operations() + async def _provide_feedback(self, score:str =None, params: dict = None, attention_modulators: dict = None): + return await self.short_term_memory.episodic_buffer.provide_feedback(score=score, params=params, attention_modulators=attention_modulators) + async def main(): # if you want to run the script as a standalone script, do so with the examples below - memory = Memory(user_id="123") + memory = Memory(user_id="TestUser") await memory.async_init() params = { "version": "1.0", @@ -1212,17 +1030,25 @@ async def main(): "source": "url", "path": "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf" } - load_jack_london = await memory._add_semantic_memory(observation = "bla", loader_settings=loader_settings, params=params) - print(load_jack_london) + # load_jack_london = await memory._add_semantic_memory(observation = "bla", loader_settings=loader_settings, params=params) + # print(load_jack_london) - modulator = {"relevance": 0.0, "saliency": 0.0, "frequency": 0.0} - # # - run_main_buffer = await memory._run_main_buffer( + modulator = {"relevance": 0.1, "frequency": 0.1} + # await memory._delete_episodic_memory() + # + run_main_buffer = await memory._create_buffer_context( user_input="I want to know how does Buck adapt to life in the wild and then have that info translated to german ", params=params, attention_modulators=modulator, ) print(run_main_buffer) + # # + # run_main_buffer = await memory._run_main_buffer( + # user_input="I want to know how does Buck adapt to life in the wild and then have that info translated to german ", + # params=params, + # attention_modulators=None, + # ) + # print(run_main_buffer) # del_semantic = await memory._delete_semantic_memory() # print(del_semantic) diff --git a/level_2/modulators/modulators.py b/level_2/modulators/modulators.py new file mode 100644 index 000000000..4efefb044 --- /dev/null +++ b/level_2/modulators/modulators.py @@ -0,0 +1,32 @@ +import numpy as np + + +class DifferentiableLayer: + def __init__(self, attention_modulators: dict): + self.weights = {modulator: 1.0 for modulator in attention_modulators} + self.learning_rate = 0.1 + self.regularization_lambda = 0.01 + self.weight_decay = 0.99 + + async def adjust_weights(self, feedbacks: list[float]): + """ + Adjusts the weights of the attention modulators based on user feedbacks. + + Parameters: + - feedbacks: A list of feedback scores (between 0 and 1). + """ + avg_feedback = np.mean(feedbacks) + feedback_diff = 1.0 - avg_feedback + + # Adjust weights based on average feedback + for modulator in self.weights: + self.weights[modulator] += self.learning_rate * (-feedback_diff) - self.regularization_lambda * \ + self.weights[modulator] + self.weights[modulator] *= self.weight_decay + + # Decaying the learning rate + self.learning_rate *= 0.99 + + async def get_weights(self): + return self.weights + diff --git a/level_2/poetry.lock b/level_2/poetry.lock index 8233ad038..ba0c3e970 100644 --- a/level_2/poetry.lock +++ b/level_2/poetry.lock @@ -261,17 +261,17 @@ numpy = {version = ">=1.19.0", markers = "python_version >= \"3.9\""} [[package]] name = "boto3" -version = "1.28.32" +version = "1.28.37" description = "The AWS SDK for Python" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"}, - {file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"}, + {file = "boto3-1.28.37-py3-none-any.whl", hash = "sha256:709cf438ad3ea48d426e4659538fe1148fc2719469b52179d07a11c5d26abac6"}, + {file = "boto3-1.28.37.tar.gz", hash = "sha256:4aec1b54ba6cd352abba2cdd7cdc76e631a4d3ce79c55c0719f85f9c9842e4a2"}, ] [package.dependencies] -botocore = ">=1.31.32,<1.32.0" +botocore = ">=1.31.37,<1.32.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -280,13 +280,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.31.32" +version = "1.31.37" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"}, - {file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"}, + {file = "botocore-1.31.37-py3-none-any.whl", hash = "sha256:72e10759be3dff39c5eeb29f85c11a227c369c946d044f2caf62c352d6a6fc06"}, + {file = "botocore-1.31.37.tar.gz", hash = "sha256:5c92c8bc3c6b49950c95501b30f0ac551fd4952359b53a6fba243094028157de"}, ] [package.dependencies] @@ -662,6 +662,32 @@ ai = ["openai (>=0.27.6,<0.28.0)"] docx = ["docx2txt (>=0.8,<0.9)"] pdf = ["pypdf (>=3.3.0,<4.0.0)"] +[[package]] +name = "deepeval" +version = "0.10.12" +description = "Deep eval provides evaluation platform to accelerate development of LLMs and Agents" +optional = false +python-versions = "*" +files = [ + {file = "deepeval-0.10.12-py3-none-any.whl", hash = "sha256:239eb720e8a205afab1ae2425e483177bd76cde658bdac98658a6559bdba4f3f"}, + {file = "deepeval-0.10.12.tar.gz", hash = "sha256:80968d57a9da6c4fce6247d31ebf7fea228c76393e0d985804be68b722090732"}, +] + +[package.dependencies] +protobuf = "<=3.20.5" +pytest = "*" +requests = "*" +rich = "*" +sentence-transformers = "*" +tabulate = "*" +tqdm = "*" +transformers = "*" +typer = "*" + +[package.extras] +bias = ["Dbias", "tensorflow"] +toxic = ["detoxify"] + [[package]] name = "deprecated" version = "1.2.14" @@ -681,13 +707,13 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "dlt" -version = "0.3.12" +version = "0.3.14" description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "dlt-0.3.12-py3-none-any.whl", hash = "sha256:f9695a7fb98f5802e34f3f27d64e6ec1c0ccede48f26a1fdcd2db2c9280de9ac"}, - {file = "dlt-0.3.12.tar.gz", hash = "sha256:1fb4a947b2215627c1ee5725f2af80c6c7fcab60e7c3a57d49ab84b495444117"}, + {file = "dlt-0.3.14-py3-none-any.whl", hash = "sha256:b7672e153065796d0e7b0bc7eacfc48feff32a28e091eeca30f5a7180e42da2c"}, + {file = "dlt-0.3.14.tar.gz", hash = "sha256:b398ee07a1b87a6ac93130fc8e143d77e99a30d1bf957468d0252f23f563c01e"}, ] [package.dependencies] @@ -732,6 +758,7 @@ gs = ["gcsfs (>=2022.4.0)"] motherduck = ["duckdb (>=0.6.1,<0.9.0)", "pyarrow (>=8.0.0)"] parquet = ["pyarrow (>=8.0.0)"] postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] +pydantic = ["pydantic (>=1.10,<2.0)"] redshift = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] s3 = ["boto3 (>=1.25)", "s3fs (>=2022.4.0)"] snowflake = ["snowflake-connector-python[pandas] (>=2.9.0)"] @@ -881,6 +908,24 @@ files = [ [package.extras] devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] +[[package]] +name = "filelock" +version = "3.12.3" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, + {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""} + +[package.extras] +docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] + [[package]] name = "frozenlist" version = "1.4.0" @@ -1232,6 +1277,38 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "huggingface-hub" +version = "0.16.4" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"}, + {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic"] +quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + [[package]] name = "humanize" version = "4.8.0" @@ -1414,13 +1491,13 @@ data = ["language-data (>=1.1,<2.0)"] [[package]] name = "langsmith" -version = "0.0.26" +version = "0.0.28" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.26-py3-none-any.whl", hash = "sha256:61c1d4582104d96edde04e1eea1dae347645b691c44489a5871341a2a1a2a1eb"}, - {file = "langsmith-0.0.26.tar.gz", hash = "sha256:80a4ef1b663a24a460d25b9986ab2010c5d06b6061c65be473abafc0647d191a"}, + {file = "langsmith-0.0.28-py3-none-any.whl", hash = "sha256:f398782f41526c74e141e68fa28b9020e0be4bde18a1d4a76b357c8272fb81bd"}, + {file = "langsmith-0.0.28.tar.gz", hash = "sha256:34c15f9a8908be180001c58048b659ece6320d0bf8ffce4ca496a2428b35646e"}, ] [package.dependencies] @@ -1620,6 +1697,23 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + [[package]] name = "multidict" version = "6.0.4" @@ -1751,6 +1845,24 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "networkx" +version = "3.1" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.8" +files = [ + {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"}, + {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"}, +] + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + [[package]] name = "nltk" version = "3.8.1" @@ -2053,6 +2165,75 @@ files = [ [package.dependencies] ptyprocess = ">=0.5" +[[package]] +name = "pillow" +version = "10.0.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "Pillow-10.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f62406a884ae75fb2f818694469519fb685cc7eaff05d3451a9ebe55c646891"}, + {file = "Pillow-10.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d5db32e2a6ccbb3d34d87c87b432959e0db29755727afb37290e10f6e8e62614"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edf4392b77bdc81f36e92d3a07a5cd072f90253197f4a52a55a8cec48a12483b"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:520f2a520dc040512699f20fa1c363eed506e94248d71f85412b625026f6142c"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:8c11160913e3dd06c8ffdb5f233a4f254cb449f4dfc0f8f4549eda9e542c93d1"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a74ba0c356aaa3bb8e3eb79606a87669e7ec6444be352870623025d75a14a2bf"}, + {file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5d0dae4cfd56969d23d94dc8e89fb6a217be461c69090768227beb8ed28c0a3"}, + {file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22c10cc517668d44b211717fd9775799ccec4124b9a7f7b3635fc5386e584992"}, + {file = "Pillow-10.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:dffe31a7f47b603318c609f378ebcd57f1554a3a6a8effbc59c3c69f804296de"}, + {file = "Pillow-10.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:9fb218c8a12e51d7ead2a7c9e101a04982237d4855716af2e9499306728fb485"}, + {file = "Pillow-10.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d35e3c8d9b1268cbf5d3670285feb3528f6680420eafe35cccc686b73c1e330f"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ed64f9ca2f0a95411e88a4efbd7a29e5ce2cea36072c53dd9d26d9c76f753b3"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b6eb5502f45a60a3f411c63187db83a3d3107887ad0d036c13ce836f8a36f1d"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c1fbe7621c167ecaa38ad29643d77a9ce7311583761abf7836e1510c580bf3dd"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cd25d2a9d2b36fcb318882481367956d2cf91329f6892fe5d385c346c0649629"}, + {file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3b08d4cc24f471b2c8ca24ec060abf4bebc6b144cb89cba638c720546b1cf538"}, + {file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737a602fbd82afd892ca746392401b634e278cb65d55c4b7a8f48e9ef8d008d"}, + {file = "Pillow-10.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3a82c40d706d9aa9734289740ce26460a11aeec2d9c79b7af87bb35f0073c12f"}, + {file = "Pillow-10.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:bc2ec7c7b5d66b8ec9ce9f720dbb5fa4bace0f545acd34870eff4a369b44bf37"}, + {file = "Pillow-10.0.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:d80cf684b541685fccdd84c485b31ce73fc5c9b5d7523bf1394ce134a60c6883"}, + {file = "Pillow-10.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76de421f9c326da8f43d690110f0e79fe3ad1e54be811545d7d91898b4c8493e"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81ff539a12457809666fef6624684c008e00ff6bf455b4b89fd00a140eecd640"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce543ed15570eedbb85df19b0a1a7314a9c8141a36ce089c0a894adbfccb4568"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:685ac03cc4ed5ebc15ad5c23bc555d68a87777586d970c2c3e216619a5476223"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d72e2ecc68a942e8cf9739619b7f408cc7b272b279b56b2c83c6123fcfa5cdff"}, + {file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d50b6aec14bc737742ca96e85d6d0a5f9bfbded018264b3b70ff9d8c33485551"}, + {file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:00e65f5e822decd501e374b0650146063fbb30a7264b4d2744bdd7b913e0cab5"}, + {file = "Pillow-10.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:f31f9fdbfecb042d046f9d91270a0ba28368a723302786c0009ee9b9f1f60199"}, + {file = "Pillow-10.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:1ce91b6ec08d866b14413d3f0bbdea7e24dfdc8e59f562bb77bc3fe60b6144ca"}, + {file = "Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:349930d6e9c685c089284b013478d6f76e3a534e36ddfa912cde493f235372f3"}, + {file = "Pillow-10.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3a684105f7c32488f7153905a4e3015a3b6c7182e106fe3c37fbb5ef3e6994c3"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4f69b3700201b80bb82c3a97d5e9254084f6dd5fb5b16fc1a7b974260f89f43"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f07ea8d2f827d7d2a49ecf1639ec02d75ffd1b88dcc5b3a61bbb37a8759ad8d"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:040586f7d37b34547153fa383f7f9aed68b738992380ac911447bb78f2abe530"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f88a0b92277de8e3ca715a0d79d68dc82807457dae3ab8699c758f07c20b3c51"}, + {file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c7cf14a27b0d6adfaebb3ae4153f1e516df54e47e42dcc073d7b3d76111a8d86"}, + {file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3400aae60685b06bb96f99a21e1ada7bc7a413d5f49bce739828ecd9391bb8f7"}, + {file = "Pillow-10.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbc02381779d412145331789b40cc7b11fdf449e5d94f6bc0b080db0a56ea3f0"}, + {file = "Pillow-10.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9211e7ad69d7c9401cfc0e23d49b69ca65ddd898976d660a2fa5904e3d7a9baa"}, + {file = "Pillow-10.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:faaf07ea35355b01a35cb442dd950d8f1bb5b040a7787791a535de13db15ed90"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f72a021fbb792ce98306ffb0c348b3c9cb967dce0f12a49aa4c3d3fdefa967"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f7c16705f44e0504a3a2a14197c1f0b32a95731d251777dcb060aa83022cb2d"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:76edb0a1fa2b4745fb0c99fb9fb98f8b180a1bbceb8be49b087e0b21867e77d3"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:368ab3dfb5f49e312231b6f27b8820c823652b7cd29cfbd34090565a015e99ba"}, + {file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:608bfdee0d57cf297d32bcbb3c728dc1da0907519d1784962c5f0c68bb93e5a3"}, + {file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5c6e3df6bdd396749bafd45314871b3d0af81ff935b2d188385e970052091017"}, + {file = "Pillow-10.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:7be600823e4c8631b74e4a0d38384c73f680e6105a7d3c6824fcf226c178c7e6"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:92be919bbc9f7d09f7ae343c38f5bb21c973d2576c1d45600fce4b74bafa7ac0"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8182b523b2289f7c415f589118228d30ac8c355baa2f3194ced084dac2dbba"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:38250a349b6b390ee6047a62c086d3817ac69022c127f8a5dc058c31ccef17f3"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:88af2003543cc40c80f6fca01411892ec52b11021b3dc22ec3bc9d5afd1c5334"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:c189af0545965fa8d3b9613cfdb0cd37f9d71349e0f7750e1fd704648d475ed2"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce7b031a6fc11365970e6a5686d7ba8c63e4c1cf1ea143811acbb524295eabed"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db24668940f82321e746773a4bc617bfac06ec831e5c88b643f91f122a785684"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:efe8c0681042536e0d06c11f48cebe759707c9e9abf880ee213541c5b46c5bf3"}, + {file = "Pillow-10.0.0.tar.gz", hash = "sha256:9c82b5b3e043c7af0d95792d0d20ccf68f61a1fec6b3530e718b688422727396"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + [[package]] name = "pinecone-client" version = "2.2.2" @@ -2100,13 +2281,13 @@ pyee = "9.0.4" [[package]] name = "pluggy" -version = "1.2.0" +version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, - {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] [package.extras] @@ -2165,6 +2346,37 @@ files = [ cymem = ">=2.0.2,<2.1.0" murmurhash = ">=0.28.0,<1.1.0" +[[package]] +name = "protobuf" +version = "3.20.3" +description = "Protocol Buffers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "protobuf-3.20.3-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99"}, + {file = "protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e"}, + {file = "protobuf-3.20.3-cp310-cp310-win32.whl", hash = "sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c"}, + {file = "protobuf-3.20.3-cp310-cp310-win_amd64.whl", hash = "sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7"}, + {file = "protobuf-3.20.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469"}, + {file = "protobuf-3.20.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4"}, + {file = "protobuf-3.20.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4"}, + {file = "protobuf-3.20.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454"}, + {file = "protobuf-3.20.3-cp37-cp37m-win32.whl", hash = "sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905"}, + {file = "protobuf-3.20.3-cp37-cp37m-win_amd64.whl", hash = "sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c"}, + {file = "protobuf-3.20.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7"}, + {file = "protobuf-3.20.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee"}, + {file = "protobuf-3.20.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050"}, + {file = "protobuf-3.20.3-cp38-cp38-win32.whl", hash = "sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86"}, + {file = "protobuf-3.20.3-cp38-cp38-win_amd64.whl", hash = "sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9"}, + {file = "protobuf-3.20.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b"}, + {file = "protobuf-3.20.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b"}, + {file = "protobuf-3.20.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402"}, + {file = "protobuf-3.20.3-cp39-cp39-win32.whl", hash = "sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480"}, + {file = "protobuf-3.20.3-cp39-cp39-win_amd64.whl", hash = "sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7"}, + {file = "protobuf-3.20.3-py2.py3-none-any.whl", hash = "sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db"}, + {file = "protobuf-3.20.3.tar.gz", hash = "sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2"}, +] + [[package]] name = "ptyprocess" version = "0.7.0" @@ -2281,13 +2493,13 @@ plugins = ["importlib-metadata"] [[package]] name = "pypdf" -version = "3.15.2" +version = "3.15.4" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" optional = false python-versions = ">=3.6" files = [ - {file = "pypdf-3.15.2-py3-none-any.whl", hash = "sha256:f6e598292be34187287a609c72815c1502b3dc2c997b374ba0870ce79d2e975a"}, - {file = "pypdf-3.15.2.tar.gz", hash = "sha256:cdf7d75ebb8901f3352cf9488c5f662c6de9c52e432c429d15cada67ba372fce"}, + {file = "pypdf-3.15.4-py3-none-any.whl", hash = "sha256:791f0a52ddf390709f1f1b0c05c4d8cde13829b4f7cb91b4003b9bdd352bc944"}, + {file = "pypdf-3.15.4.tar.gz", hash = "sha256:a2780ed01dc4da23ac1542209f58fd3d951d8dd37c3c0309d123cd2f2679fb03"}, ] [package.extras] @@ -2732,6 +2944,165 @@ botocore = ">=1.12.36,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +[[package]] +name = "safetensors" +version = "0.3.3" +description = "Fast and Safe Tensor serialization" +optional = false +python-versions = "*" +files = [ + {file = "safetensors-0.3.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:92e4d0c8b2836120fddd134474c5bda8963f322333941f8b9f643e5b24f041eb"}, + {file = "safetensors-0.3.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3dcadb6153c42addc9c625a622ebde9293fabe1973f9ef31ba10fb42c16e8536"}, + {file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:08f26b61e1b0a14dc959aa9d568776bd038805f611caef1de04a80c468d4a7a4"}, + {file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:17f41344d9a075f2f21b289a49a62e98baff54b5754240ba896063bce31626bf"}, + {file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f1045f798e1a16a6ced98d6a42ec72936d367a2eec81dc5fade6ed54638cd7d2"}, + {file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:eaf0e4bc91da13f21ac846a39429eb3f3b7ed06295a32321fa3eb1a59b5c70f3"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25149180d4dc8ca48bac2ac3852a9424b466e36336a39659b35b21b2116f96fc"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9e943bf78c39de8865398a71818315e7d5d1af93c7b30d4da3fc852e62ad9bc"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cccfcac04a010354e87c7a2fe16a1ff004fc4f6e7ef8efc966ed30122ce00bc7"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07121f427e646a50d18c1be0fa1a2cbf6398624c31149cd7e6b35486d72189e"}, + {file = "safetensors-0.3.3-cp310-cp310-win32.whl", hash = "sha256:a85e29cbfddfea86453cc0f4889b4bcc6b9c155be9a60e27be479a34e199e7ef"}, + {file = "safetensors-0.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:e13adad4a3e591378f71068d14e92343e626cf698ff805f61cdb946e684a218e"}, + {file = "safetensors-0.3.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:cbc3312f134baf07334dd517341a4b470b2931f090bd9284888acb7dfaf4606f"}, + {file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d15030af39d5d30c22bcbc6d180c65405b7ea4c05b7bab14a570eac7d7d43722"}, + {file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:f84a74cbe9859b28e3d6d7715ac1dd3097bebf8d772694098f6d42435245860c"}, + {file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:10d637423d98ab2e6a4ad96abf4534eb26fcaf8ca3115623e64c00759374e90d"}, + {file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:3b46f5de8b44084aff2e480874c550c399c730c84b2e8ad1bddb062c94aa14e9"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76da691a82dfaf752854fa6d17c8eba0c8466370c5ad8cf1bfdf832d3c7ee17"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4e342fd54e66aa9512dd13e410f791e47aa4feeb5f4c9a20882c72f3d272f29"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:178fd30b5dc73bce14a39187d948cedd0e5698e2f055b7ea16b5a96c9b17438e"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e8fdf7407dba44587ed5e79d5de3533d242648e1f2041760b21474bd5ea5c8c"}, + {file = "safetensors-0.3.3-cp311-cp311-win32.whl", hash = "sha256:7d3b744cee8d7a46ffa68db1a2ff1a1a432488e3f7a5a97856fe69e22139d50c"}, + {file = "safetensors-0.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f579877d30feec9b6ba409d05fa174633a4fc095675a4a82971d831a8bb60b97"}, + {file = "safetensors-0.3.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:2fff5b19a1b462c17322998b2f4b8bce43c16fe208968174d2f3a1446284ceed"}, + {file = "safetensors-0.3.3-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:41adb1d39e8aad04b16879e3e0cbcb849315999fad73bc992091a01e379cb058"}, + {file = "safetensors-0.3.3-cp37-cp37m-macosx_12_0_x86_64.whl", hash = "sha256:0f2b404250b3b877b11d34afcc30d80e7035714a1116a3df56acaca6b6c00096"}, + {file = "safetensors-0.3.3-cp37-cp37m-macosx_13_0_x86_64.whl", hash = "sha256:b43956ef20e9f4f2e648818a9e7b3499edd6b753a0f5526d4f6a6826fbee8446"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d61a99b34169981f088ccfbb2c91170843efc869a0a0532f422db7211bf4f474"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0008aab36cd20e9a051a68563c6f80d40f238c2611811d7faa5a18bf3fd3984"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93d54166072b143084fdcd214a080a088050c1bb1651016b55942701b31334e4"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c32ee08f61cea56a5d62bbf94af95df6040c8ab574afffaeb7b44ae5da1e9e3"}, + {file = "safetensors-0.3.3-cp37-cp37m-win32.whl", hash = "sha256:351600f367badd59f7bfe86d317bb768dd8c59c1561c6fac43cafbd9c1af7827"}, + {file = "safetensors-0.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:034717e297849dae1af0a7027a14b8647bd2e272c24106dced64d83e10d468d1"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8530399666748634bc0b301a6a5523756931b0c2680d188e743d16304afe917a"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:9d741c1f1621e489ba10aa3d135b54202684f6e205df52e219d5eecd673a80c9"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:0c345fd85b4d2093a5109596ff4cd9dfc2e84992e881b4857fbc4a93a3b89ddb"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:69ccee8d05f55cdf76f7e6c87d2bdfb648c16778ef8acfd2ecc495e273e9233e"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:c08a9a4b7a4ca389232fa8d097aebc20bbd4f61e477abc7065b5c18b8202dede"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:a002868d2e3f49bbe81bee2655a411c24fa1f8e68b703dec6629cb989d6ae42e"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bd2704cb41faa44d3ec23e8b97330346da0395aec87f8eaf9c9e2c086cdbf13"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b2951bf3f0ad63df5e6a95263652bd6c194a6eb36fd4f2d29421cd63424c883"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07114cec116253ca2e7230fdea30acf76828f21614afd596d7b5438a2f719bd8"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab43aeeb9eadbb6b460df3568a662e6f1911ecc39387f8752afcb6a7d96c087"}, + {file = "safetensors-0.3.3-cp38-cp38-win32.whl", hash = "sha256:f2f59fce31dd3429daca7269a6b06f65e6547a0c248f5116976c3f1e9b73f251"}, + {file = "safetensors-0.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:c31ca0d8610f57799925bf08616856b39518ab772c65093ef1516762e796fde4"}, + {file = "safetensors-0.3.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:59a596b3225c96d59af412385981f17dd95314e3fffdf359c7e3f5bb97730a19"}, + {file = "safetensors-0.3.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:82a16e92210a6221edd75ab17acdd468dd958ef5023d9c6c1289606cc30d1479"}, + {file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:98a929e763a581f516373ef31983ed1257d2d0da912a8e05d5cd12e9e441c93a"}, + {file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:12b83f1986cd16ea0454c636c37b11e819d60dd952c26978310a0835133480b7"}, + {file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:f439175c827c2f1bbd54df42789c5204a10983a30bc4242bc7deaf854a24f3f0"}, + {file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:0085be33b8cbcb13079b3a8e131656e05b0bc5e6970530d4c24150f7afd76d70"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3ec70c87b1e910769034206ad5efc051069b105aac1687f6edcd02526767f4"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f490132383e5e490e710608f4acffcb98ed37f91b885c7217d3f9f10aaff9048"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79d1b6c7ed5596baf79c80fbce5198c3cdcc521ae6a157699f427aba1a90082d"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad3cc8006e7a86ee7c88bd2813ec59cd7cc75b03e6fa4af89b9c7b235b438d68"}, + {file = "safetensors-0.3.3-cp39-cp39-win32.whl", hash = "sha256:ab29f54c6b8c301ca05fa014728996bd83aac6e21528f893aaf8945c71f42b6d"}, + {file = "safetensors-0.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:0fa82004eae1a71e2aa29843ef99de9350e459a0fc2f65fc6ee0da9690933d2d"}, + {file = "safetensors-0.3.3.tar.gz", hash = "sha256:edb7072d788c4f929d0f5735d3a2fb51e5a27f833587828583b7f5747af1a2b8"}, +] + +[package.extras] +all = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "flax (>=0.6.3)", "h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "isort (>=5.5.4)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)", "tensorflow (==2.11.0)", "torch (>=1.10)"] +dev = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "flax (>=0.6.3)", "h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "isort (>=5.5.4)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)", "tensorflow (==2.11.0)", "torch (>=1.10)"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)"] +numpy = ["numpy (>=1.21.6)"] +paddlepaddle = ["numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)"] +pinned-tf = ["tensorflow (==2.11.0)"] +quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] +tensorflow = ["numpy (>=1.21.6)", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "numpy (>=1.21.6)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)"] +torch = ["numpy (>=1.21.6)", "torch (>=1.10)"] + +[[package]] +name = "scikit-learn" +version = "1.3.0" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scikit-learn-1.3.0.tar.gz", hash = "sha256:8be549886f5eda46436b6e555b0e4873b4f10aa21c07df45c4bc1735afbccd7a"}, + {file = "scikit_learn-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:981287869e576d42c682cf7ca96af0c6ac544ed9316328fd0d9292795c742cf5"}, + {file = "scikit_learn-1.3.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:436aaaae2c916ad16631142488e4c82f4296af2404f480e031d866863425d2a2"}, + {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7e28d8fa47a0b30ae1bd7a079519dd852764e31708a7804da6cb6f8b36e3630"}, + {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae80c08834a473d08a204d966982a62e11c976228d306a2648c575e3ead12111"}, + {file = "scikit_learn-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:552fd1b6ee22900cf1780d7386a554bb96949e9a359999177cf30211e6b20df6"}, + {file = "scikit_learn-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79970a6d759eb00a62266a31e2637d07d2d28446fca8079cf9afa7c07b0427f8"}, + {file = "scikit_learn-1.3.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:850a00b559e636b23901aabbe79b73dc604b4e4248ba9e2d6e72f95063765603"}, + {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee04835fb016e8062ee9fe9074aef9b82e430504e420bff51e3e5fffe72750ca"}, + {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d953531f5d9f00c90c34fa3b7d7cfb43ecff4c605dac9e4255a20b114a27369"}, + {file = "scikit_learn-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:151ac2bf65ccf363664a689b8beafc9e6aae36263db114b4ca06fbbbf827444a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a885a9edc9c0a341cab27ec4f8a6c58b35f3d449c9d2503a6fd23e06bbd4f6a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:9877af9c6d1b15486e18a94101b742e9d0d2f343d35a634e337411ddb57783f3"}, + {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c470f53cea065ff3d588050955c492793bb50c19a92923490d18fcb637f6383a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd6e2d7389542eae01077a1ee0318c4fec20c66c957f45c7aac0c6eb0fe3c612"}, + {file = "scikit_learn-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:3a11936adbc379a6061ea32fa03338d4ca7248d86dd507c81e13af428a5bc1db"}, + {file = "scikit_learn-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:998d38fcec96584deee1e79cd127469b3ad6fefd1ea6c2dfc54e8db367eb396b"}, + {file = "scikit_learn-1.3.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:ded35e810438a527e17623ac6deae3b360134345b7c598175ab7741720d7ffa7"}, + {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e8102d5036e28d08ab47166b48c8d5e5810704daecf3a476a4282d562be9a28"}, + {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7617164951c422747e7c32be4afa15d75ad8044f42e7d70d3e2e0429a50e6718"}, + {file = "scikit_learn-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d54fb9e6038284548072df22fd34777e434153f7ffac72c8596f2d6987110dd"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3" +scipy = ">=1.5.0" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"] + +[[package]] +name = "scipy" +version = "1.9.3" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"}, + {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"}, + {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"}, + {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"}, + {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"}, + {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"}, + {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"}, +] + +[package.dependencies] +numpy = ">=1.18.5,<1.26.0" + +[package.extras] +dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"] +test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "selenium" version = "4.11.2" @@ -2760,6 +3131,82 @@ files = [ {file = "semver-3.0.1.tar.gz", hash = "sha256:9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1"}, ] +[[package]] +name = "sentence-transformers" +version = "2.2.2" +description = "Multilingual text embeddings" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "sentence-transformers-2.2.2.tar.gz", hash = "sha256:dbc60163b27de21076c9a30d24b5b7b6fa05141d68cf2553fa9a77bf79a29136"}, +] + +[package.dependencies] +huggingface-hub = ">=0.4.0" +nltk = "*" +numpy = "*" +scikit-learn = "*" +scipy = "*" +sentencepiece = "*" +torch = ">=1.6.0" +torchvision = "*" +tqdm = "*" +transformers = ">=4.6.0,<5.0.0" + +[[package]] +name = "sentencepiece" +version = "0.1.99" +description = "SentencePiece python wrapper" +optional = false +python-versions = "*" +files = [ + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0eb528e70571b7c02723e5804322469b82fe7ea418c96051d0286c0fa028db73"}, + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d7fafb2c4e4659cbdf303929503f37a26eabc4ff31d3a79bf1c5a1b338caa7"}, + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be9cf5b9e404c245aeb3d3723c737ba7a8f5d4ba262ef233a431fa6c45f732a0"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baed1a26464998f9710d20e52607c29ffd4293e7c71c6a1f83f51ad0911ec12c"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9832f08bb372d4c8b567612f8eab9e36e268dff645f1c28f9f8e851be705f6d1"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:019e7535108e309dae2b253a75834fc3128240aa87c00eb80732078cdc182588"}, + {file = "sentencepiece-0.1.99-cp310-cp310-win32.whl", hash = "sha256:fa16a830416bb823fa2a52cbdd474d1f7f3bba527fd2304fb4b140dad31bb9bc"}, + {file = "sentencepiece-0.1.99-cp310-cp310-win_amd64.whl", hash = "sha256:14b0eccb7b641d4591c3e12ae44cab537d68352e4d3b6424944f0c447d2348d5"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6d3c56f24183a1e8bd61043ff2c58dfecdc68a5dd8955dc13bab83afd5f76b81"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed6ea1819fd612c989999e44a51bf556d0ef6abfb553080b9be3d347e18bcfb7"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2a0260cd1fb7bd8b4d4f39dc2444a8d5fd4e0a0c4d5c899810ef1abf99b2d45"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a1abff4d1ff81c77cac3cc6fefa34fa4b8b371e5ee51cb7e8d1ebc996d05983"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004e6a621d4bc88978eecb6ea7959264239a17b70f2cbc348033d8195c9808ec"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db361e03342c41680afae5807590bc88aa0e17cfd1a42696a160e4005fcda03b"}, + {file = "sentencepiece-0.1.99-cp311-cp311-win32.whl", hash = "sha256:2d95e19168875b70df62916eb55428a0cbcb834ac51d5a7e664eda74def9e1e0"}, + {file = "sentencepiece-0.1.99-cp311-cp311-win_amd64.whl", hash = "sha256:f90d73a6f81248a909f55d8e6ef56fec32d559e1e9af045f0b0322637cb8e5c7"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:62e24c81e74bd87a6e0d63c51beb6527e4c0add67e1a17bac18bcd2076afcfeb"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57efcc2d51caff20d9573567d9fd3f854d9efe613ed58a439c78c9f93101384a"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a904c46197993bd1e95b93a6e373dca2f170379d64441041e2e628ad4afb16f"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d89adf59854741c0d465f0e1525b388c0d174f611cc04af54153c5c4f36088c4"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-win32.whl", hash = "sha256:47c378146928690d1bc106fdf0da768cebd03b65dd8405aa3dd88f9c81e35dba"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-win_amd64.whl", hash = "sha256:9ba142e7a90dd6d823c44f9870abdad45e6c63958eb60fe44cca6828d3b69da2"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7b1a9ae4d7c6f1f867e63370cca25cc17b6f4886729595b885ee07a58d3cec3"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0f644c9d4d35c096a538507b2163e6191512460035bf51358794a78515b74f7"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8843d23a0f686d85e569bd6dcd0dd0e0cbc03731e63497ca6d5bacd18df8b85"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e6f690a1caebb4867a2e367afa1918ad35be257ecdb3455d2bbd787936f155"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-win32.whl", hash = "sha256:8a321866c2f85da7beac74a824b4ad6ddc2a4c9bccd9382529506d48f744a12c"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-win_amd64.whl", hash = "sha256:c42f753bcfb7661c122a15b20be7f684b61fc8592c89c870adf52382ea72262d"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85b476406da69c70586f0bb682fcca4c9b40e5059814f2db92303ea4585c650c"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cfbcfe13c69d3f87b7fcd5da168df7290a6d006329be71f90ba4f56bc77f8561"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:445b0ec381af1cd4eef95243e7180c63d9c384443c16c4c47a28196bd1cda937"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6890ea0f2b4703f62d0bf27932e35808b1f679bdb05c7eeb3812b935ba02001"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb71af492b0eefbf9f2501bec97bcd043b6812ab000d119eaf4bd33f9e283d03"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b866b5bd3ddd54166bbcbf5c8d7dd2e0b397fac8537991c7f544220b1f67bc"}, + {file = "sentencepiece-0.1.99-cp38-cp38-win32.whl", hash = "sha256:b133e8a499eac49c581c3c76e9bdd08c338cc1939e441fee6f92c0ccb5f1f8be"}, + {file = "sentencepiece-0.1.99-cp38-cp38-win_amd64.whl", hash = "sha256:0eaf3591dd0690a87f44f4df129cf8d05d8a4029b5b6709b489b8e27f9a9bcff"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38efeda9bbfb55052d482a009c6a37e52f42ebffcea9d3a98a61de7aee356a28"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c030b081dc1e1bcc9fadc314b19b740715d3d566ad73a482da20d7d46fd444c"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84dbe53e02e4f8a2e45d2ac3e430d5c83182142658e25edd76539b7648928727"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b0f55d0a0ee1719b4b04221fe0c9f0c3461dc3dabd77a035fa2f4788eb3ef9a"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e800f206cd235dc27dc749299e05853a4e4332e8d3dfd81bf13d0e5b9007d9"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae1c40cda8f9d5b0423cfa98542735c0235e7597d79caf318855cdf971b2280"}, + {file = "sentencepiece-0.1.99-cp39-cp39-win32.whl", hash = "sha256:c84ce33af12ca222d14a1cdd37bd76a69401e32bc68fe61c67ef6b59402f4ab8"}, + {file = "sentencepiece-0.1.99-cp39-cp39-win_amd64.whl", hash = "sha256:350e5c74d739973f1c9643edb80f7cc904dc948578bcb1d43c6f2b173e5d18dd"}, + {file = "sentencepiece-0.1.99.tar.gz", hash = "sha256:189c48f5cb2949288f97ccdb97f0473098d9c3dcf5a3d99d4eabe719ec27297f"}, +] + [[package]] name = "setuptools" version = "68.1.2" @@ -3190,6 +3637,34 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, +] + +[package.dependencies] +mpmath = ">=0.19" + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + [[package]] name = "tenacity" version = "8.2.3" @@ -3290,6 +3765,17 @@ mxnet = ["mxnet (>=1.5.1,<1.6.0)"] tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"] torch = ["torch (>=1.6.0)"] +[[package]] +name = "threadpoolctl" +version = "3.2.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.2.0-py3-none-any.whl", hash = "sha256:2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032"}, + {file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"}, +] + [[package]] name = "tiktoken" version = "0.4.0" @@ -3335,6 +3821,60 @@ requests = ">=2.26.0" [package.extras] blobfile = ["blobfile (>=2)"] +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +optional = false +python-versions = "*" +files = [ + {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, + {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, + {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, + {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, + {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, + {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, + {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, + {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, + {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, + {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, + {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, + {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, +] + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + [[package]] name = "tomli" version = "2.0.1" @@ -3357,6 +3897,83 @@ files = [ {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, ] +[[package]] +name = "torch" +version = "2.0.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"}, + {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"}, + {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"}, + {file = "torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:567f84d657edc5582d716900543e6e62353dbe275e61cdc36eda4929e46df9e7"}, + {file = "torch-2.0.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:787b5a78aa7917465e9b96399b883920c88a08f4eb63b5a5d2d1a16e27d2f89b"}, + {file = "torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e617b1d0abaf6ced02dbb9486803abfef0d581609b09641b34fa315c9c40766d"}, + {file = "torch-2.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b6019b1de4978e96daa21d6a3ebb41e88a0b474898fe251fd96189587408873e"}, + {file = "torch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:dbd68cbd1cd9da32fe5d294dd3411509b3d841baecb780b38b3b7b06c7754434"}, + {file = "torch-2.0.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:ef654427d91600129864644e35deea761fb1fe131710180b952a6f2e2207075e"}, + {file = "torch-2.0.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:25aa43ca80dcdf32f13da04c503ec7afdf8e77e3a0183dd85cd3e53b2842e527"}, + {file = "torch-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ef3ea3d25441d3957348f7e99c7824d33798258a2bf5f0f0277cbcadad2e20d"}, + {file = "torch-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0882243755ff28895e8e6dc6bc26ebcf5aa0911ed81b2a12f241fc4b09075b13"}, + {file = "torch-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:f66aa6b9580a22b04d0af54fcd042f52406a8479e2b6a550e3d9f95963e168c8"}, + {file = "torch-2.0.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:1adb60d369f2650cac8e9a95b1d5758e25d526a34808f7448d0bd599e4ae9072"}, + {file = "torch-2.0.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:1bcffc16b89e296826b33b98db5166f990e3b72654a2b90673e817b16c50e32b"}, + {file = "torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e10e1597f2175365285db1b24019eb6f04d53dcd626c735fc502f1e8b6be9875"}, + {file = "torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490"}, + {file = "torch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8742bdc62946c93f75ff92da00e3803216c6cce9b132fbca69664ca38cfb3e18"}, + {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"}, + {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"}, +] + +[package.dependencies] +filelock = "*" +jinja2 = "*" +networkx = "*" +sympy = "*" +typing-extensions = "*" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] + +[[package]] +name = "torchvision" +version = "0.15.2" +description = "image and video datasets and models for torch deep learning" +optional = false +python-versions = ">=3.8" +files = [ + {file = "torchvision-0.15.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7754088774e810c5672b142a45dcf20b1bd986a5a7da90f8660c43dc43fb850c"}, + {file = "torchvision-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37eb138e13f6212537a3009ac218695483a635c404b6cc1d8e0d0d978026a86d"}, + {file = "torchvision-0.15.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:54143f7cc0797d199b98a53b7d21c3f97615762d4dd17ad45a41c7e80d880e73"}, + {file = "torchvision-0.15.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:1eefebf5fbd01a95fe8f003d623d941601c94b5cec547b420da89cb369d9cf96"}, + {file = "torchvision-0.15.2-cp310-cp310-win_amd64.whl", hash = "sha256:96fae30c5ca8423f4b9790df0f0d929748e32718d88709b7b567d2f630c042e3"}, + {file = "torchvision-0.15.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5f35f6bd5bcc4568e6522e4137fa60fcc72f4fa3e615321c26cd87e855acd398"}, + {file = "torchvision-0.15.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:757505a0ab2be7096cb9d2bf4723202c971cceddb72c7952a7e877f773de0f8a"}, + {file = "torchvision-0.15.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:012ad25cfd9019ff9b0714a168727e3845029be1af82296ff1e1482931fa4b80"}, + {file = "torchvision-0.15.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b02a7ffeaa61448737f39a4210b8ee60234bda0515a0c0d8562f884454105b0f"}, + {file = "torchvision-0.15.2-cp311-cp311-win_amd64.whl", hash = "sha256:10be76ceded48329d0a0355ac33da131ee3993ff6c125e4a02ab34b5baa2472c"}, + {file = "torchvision-0.15.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f12415b686dba884fb086f53ac803f692be5a5cdd8a758f50812b30fffea2e4"}, + {file = "torchvision-0.15.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:31211c01f8b8ec33b8a638327b5463212e79a03e43c895f88049f97af1bd12fd"}, + {file = "torchvision-0.15.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c55f9889e436f14b4f84a9c00ebad0d31f5b4626f10cf8018e6c676f92a6d199"}, + {file = "torchvision-0.15.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:9a192f2aa979438f23c20e883980b23d13268ab9f819498774a6d2eb021802c2"}, + {file = "torchvision-0.15.2-cp38-cp38-win_amd64.whl", hash = "sha256:c07071bc8d02aa8fcdfe139ab6a1ef57d3b64c9e30e84d12d45c9f4d89fb6536"}, + {file = "torchvision-0.15.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4790260fcf478a41c7ecc60a6d5200a88159fdd8d756e9f29f0f8c59c4a67a68"}, + {file = "torchvision-0.15.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:987ab62225b4151a11e53fd06150c5258ced24ac9d7c547e0e4ab6fbca92a5ce"}, + {file = "torchvision-0.15.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:63df26673e66cba3f17e07c327a8cafa3cce98265dbc3da329f1951d45966838"}, + {file = "torchvision-0.15.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b85f98d4cc2f72452f6792ab4463a3541bc5678a8cdd3da0e139ba2fe8b56d42"}, + {file = "torchvision-0.15.2-cp39-cp39-win_amd64.whl", hash = "sha256:07c462524cc1bba5190c16a9d47eac1fca024d60595a310f23c00b4ffff18b30"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0" +requests = "*" +torch = "2.0.1" + +[package.extras] +scipy = ["scipy"] + [[package]] name = "tqdm" version = "4.66.1" @@ -3377,6 +3994,75 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "transformers" +version = "4.32.1" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "transformers-4.32.1-py3-none-any.whl", hash = "sha256:b930d3dbd907a3f300cf49e54d63a56f8a0ab16b01a2c2a61ecff37c6de1da08"}, + {file = "transformers-4.32.1.tar.gz", hash = "sha256:1edc8ae1de357d97c3d36b04412aa63d55e6fc0c4b39b419a7d380ed947d2252"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.15.1,<1.0" +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +safetensors = ">=0.3.1" +tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.20.3)"] +agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.9,!=1.12.0)"] +all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +docs-specific = ["hf-doc-builder"] +fairscale = ["fairscale (>0.3)"] +flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune]", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] +ray = ["ray[tune]"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic (<2)", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx"] +tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm"] +tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +torch = ["accelerate (>=0.20.3)", "torch (>=1.9,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow (<10.0.0)", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow (<10.0.0)"] + [[package]] name = "trio" version = "0.22.2" @@ -3546,13 +4232,13 @@ colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python [[package]] name = "weaviate-client" -version = "3.23.0" +version = "3.23.2" description = "A python native Weaviate client" optional = false python-versions = ">=3.8" files = [ - {file = "weaviate-client-3.23.0.tar.gz", hash = "sha256:3ffd7f1460c9e32755d84d4f5fc63dfc0bd990dbe2c3dc20d5c68119d467680e"}, - {file = "weaviate_client-3.23.0-py3-none-any.whl", hash = "sha256:3d3bb75c1d96b2b71e213c5eb885ae3e3f42e4304955383c467d100187d9ff8e"}, + {file = "weaviate-client-3.23.2.tar.gz", hash = "sha256:1c8c94df032dd2fa5a4ea615fc69ccb983ffad5cc02974f78c793839e61ac150"}, + {file = "weaviate_client-3.23.2-py3-none-any.whl", hash = "sha256:88ffc38cca07806d64726cc74bc194c7da50b222aa4e2cd129f4c1f5e53e9b61"}, ] [package.dependencies] @@ -3780,4 +4466,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991" +content-hash = "761b58204631452d77e13bbc2d61034704e8e109619db4addd26ec159b9bb176" diff --git a/level_2/pyproject.toml b/level_2/pyproject.toml index 5a252ba37..9f4e66302 100644 --- a/level_2/pyproject.toml +++ b/level_2/pyproject.toml @@ -40,6 +40,7 @@ weaviate-client = "^3.22.1" python-multipart = "^0.0.6" deep-translator = "^1.11.4" humanize = "^4.8.0" +deepeval = "^0.10.12" diff --git a/level_2/tests/__pycache__/crud_test.cpython-311-pytest-7.4.0.pyc b/level_2/tests/__pycache__/crud_test.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 000000000..e540734a9 Binary files /dev/null and b/level_2/tests/__pycache__/crud_test.cpython-311-pytest-7.4.0.pyc differ diff --git a/level_2/tests/__pycache__/semantic_tests.cpython-311-pytest-7.4.0.pyc b/level_2/tests/__pycache__/semantic_tests.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 000000000..4d29a55c3 Binary files /dev/null and b/level_2/tests/__pycache__/semantic_tests.cpython-311-pytest-7.4.0.pyc differ diff --git a/level_2/tests/base_test_set.json b/level_2/tests/base_test_set.json new file mode 100644 index 000000000..1f5dab926 --- /dev/null +++ b/level_2/tests/base_test_set.json @@ -0,0 +1,38 @@ + { + "q1": { + "question": "What does Buck learn from being harnessed and made to work with François?", + "answer": "Buck learns several important lessons from being harnessed and made to work with François. First, he learns the lesson of obedience and the consequences of disobedience. He quickly realizes that François demands instant obedience and uses his whip to enforce it, and that he must listen and follow commands promptly to avoid the whip [16]. Second, Buck learns the value of hard work and the satisfaction it brings. Despite the new and strange nature of the work, Buck learns quickly and makes remarkable progress, earning respect from François [16][19]. Furthermore, Buck learns important skills related to pulling a sled, such as stopping at command ('ho'), going ahead ('mush'), swinging wide on bends, and staying clear of the wheeler when the sled goes downhill. He also learns the dynamics of working in a team and how to respond to the lead dog's instructions [16][19]." + }, + "q2": { + "question": "How many chapters does the document have", + "answer": "The document has a total of 7 chapters. The chapter titles are: Into the Primitive [Chapter 1012], The Law of Club and Fang [Chapter 1013], The Dominant Primordial Beast [Chapter 1014], Who Has Won to Mastership [Chapter 1015], The Toil of Trace and Trail [Chapter 1016], For the Love of a Man [Chapter 1017], The Sounding of the Call [Chapter 1018]." + }, + "q3": { + "question": "Who kidnapped Buck?", + "answer": "Buck was kidnapped by one of the gardener's helpers named Manuel, who sold him to strangers for a profit." + }, + "q4": { + "question": "What is the name of the gardener's helper who kidnapped Buck?", + "answer": "The name of the gardener's helper who kidnapped Buck is Manuel." + }, + "q5": { + "question": "Where was Buck taken after being kidnapped?", + "answer": "After being kidnapped, Buck was taken by Manuel through the orchard to a little flag station known as College Park [7a]. Eventually, Buck was thrown into a baggage car of a train, where he remained unconscious until he woke up and watched the man in the red sweater [8] [11a]. The specific location Buck was taken to after being kidnapped is not explicitly mentioned in the given texts." + }, + "q6": { + "question": "What is the law of club and fang?", + "answer": "The law of club and fang refers to the harsh and primal rules of survival in the wild, where physical strength and aggression determine dominance and power. The club represents the power wielded by humans over animals. Buck realizes that in order to survive in this new environment, he must adapt and submit to this law. The law of club and fang signifies the brutal and primitive nature of life in the wild." + }, + "q7": { + "question": "What is the mother of Buck?", + "answer": "The mother of Buck, the dog in the story 'The Call of the Wild' by Jack London, is a Scottish shepherd dog named Shep." + }, + "q8": { + "question": "How did Buck feel after being kidnapped?", + "answer": "After being kidnapped, Buck felt anger and resentment towards his captors [7]. He was initially cooperative but grew angry as he was mistreated and felt violated and vilely treated during his transportation [8]." + }, + "q9": { + "question": "Was Buck beaten in captivity?", + "answer": "Yes, Buck was beaten while in captivity. In document snippet [11], Buck rushed at the man who had been tormenting him, and the man delivered a blow that rendered Buck senseless." + } + } \ No newline at end of file diff --git a/level_2/tests/crud_test.py b/level_2/tests/crud_test.py index 403273481..38165d39b 100644 --- a/level_2/tests/crud_test.py +++ b/level_2/tests/crud_test.py @@ -74,3 +74,5 @@ class TestMemory(unittest.TestCase): if __name__ == '__main__': unittest.main() + + diff --git a/level_2/tests/semantic_tests.py b/level_2/tests/semantic_tests.py new file mode 100644 index 000000000..ba957dae7 --- /dev/null +++ b/level_2/tests/semantic_tests.py @@ -0,0 +1,119 @@ +import os +import openai +from deepeval.metrics.factual_consistency import assert_factual_consistency +import dotenv +dotenv.load_dotenv() +from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory +openai.api_key = os.getenv("OPENAI_API_KEY", "") +from deepeval.metrics.overall_score import assert_overall_score +import json +from deepeval.metrics.overall_score import OverallScoreMetric + +# Write a sample ChatGPT function + + +async def main(): + async def generate_context(query: str='bla', context:str=None): + memory = Memory(user_id="TestUser") + + await memory.async_init() + + + memory_loaded = await memory._fetch_semantic_memory(observation=query, params=None) + + if memory_loaded: + return memory_loaded["data"]["Get"]["SEMANTICMEMORY"][0]["text"] + else: + params = { + "version": "1.0", + "agreement_id": "AG123456", + "privacy_policy": "https://example.com/privacy", + "terms_of_service": "https://example.com/terms", + "format": "json", + "schema_version": "1.1", + "checksum": "a1b2c3d4e5f6", + "owner": "John Doe", + "license": "MIT", + "validity_start": "2023-08-01", + "validity_end": "2024-07-31", + } + loader_settings = { + "format": "PDF", + "source": "url", + "path": "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf" + } + load_jack_london = await memory._add_semantic_memory(observation = query, loader_settings=loader_settings, params=params) + memory_loaded = await memory._fetch_semantic_memory(observation=query, params=None) + return memory_loaded["data"]["Get"]["SEMANTICMEMORY"][0]["text"] + + # return load_jack_london + # + # modulator = {"relevance": 0.0, "saliency": 0.0, "frequency": 0.0} + # # # + # run_main_buffer = await memory._create_buffer_context( + # user_input="I want to know how does Buck adapt to life in the wild and then have that info translated to german ", + # params=params, + # attention_modulators=modulator, + # ) + + async def generate_chatgpt_output(query:str, context:str=None): + if context is None: + context = await generate_context(query=query) + # print(context) + else: + pass + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "assistant", "content": f"{context}"}, + {"role": "user", "content": query} + ] + ) + llm_output = response.choices[0].message.content + # print(llm_output) + return llm_output + + with open('base_test_set.json', 'r') as f: + data = json.load(f) + # + async def test_overall_score(query:str, output:str=None, expected_output:str=None, context:str=None, context_type:str=None): + if context_type == "gpt_search": + context = "" + elif context_type == "base_memory_context": + context = await generate_context(query=query) + output = context + elif context_type == "hybrid_search": + context = await generate_context(query=query) + output = await generate_chatgpt_output(query) + elif context_type == "memory_search": + pass + + metric = OverallScoreMetric() + score = metric.measure( + query=query, + output=output, + expected_output=expected_output, + context=context + ) + print('here is the score', score) + + return score + + # await generate_chatgpt_output(query=" When was call of the wild written?") + scores = {} + for key, item in data.items(): + question = item['question'] + expected_ans = item['answer'] + values = await test_overall_score(query=question, expected_output=expected_ans, context_type="hybrid_search") + scores[key] = values + + print(scores) + + + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) \ No newline at end of file diff --git a/level_2/utils.py b/level_2/utils.py new file mode 100644 index 000000000..0baa766d7 --- /dev/null +++ b/level_2/utils.py @@ -0,0 +1,166 @@ +import os +from datetime import datetime +from typing import List + +from langchain import PromptTemplate, OpenAI +from langchain.output_parsers import PydanticOutputParser +from pydantic import BaseModel, Field +import dotenv +from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory +dotenv.load_dotenv() + +llm_base = OpenAI( + temperature=0.0, + max_tokens=1200, + openai_api_key=os.environ.get("OPENAI_API_KEY"), + model_name="gpt-4-0613", + ) +async def _add_to_episodic(user_input, tasks_list, result_tasks, attention_modulators, params): + + + memory = Memory(user_id="TestUser") + await memory.async_init() + class EpisodicTask(BaseModel): + """Schema for an individual task.""" + + task_order: str = Field( + ..., description="The order at which the task needs to be performed" + ) + task_name: str = Field( + None, description="The task that needs to be performed" + ) + operation: str = Field(None, description="The operation to be performed") + operation_result: str = Field( + None, description="The result of the operation" + ) + + class EpisodicList(BaseModel): + """Schema for the record containing a list of tasks.""" + + tasks: List[EpisodicTask] = Field(..., description="List of tasks") + start_date: str = Field( + ..., description="The order at which the task needs to be performed" + ) + end_date: str = Field( + ..., description="The order at which the task needs to be performed" + ) + user_query: str = Field( + ..., description="The order at which the task needs to be performed" + ) + attention_modulators: str = Field(..., description="List of attention modulators") + + parser = PydanticOutputParser(pydantic_object=EpisodicList) + date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + prompt = PromptTemplate( + template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}, date is:{date}, attention modulators are: {attention_modulators} \n", + input_variables=["query", "steps", "buffer", "date", "attention_modulators"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + + _input = prompt.format_prompt( + query=user_input, steps=str(tasks_list) + , buffer=str(result_tasks), date=date, attention_modulators=attention_modulators + ) + + # return "a few things to do like load episodic memory in a structured format" + output = llm_base(_input.to_string()) + result_parsing = parser.parse(output) + lookup_value = await memory._add_episodic_memory( + observation=str(result_parsing.json()), params=params + ) + + +async def add_to_buffer(adjusted_modulator=None, params={}): + memory = Memory(user_id="TestUser") + await memory.async_init() + class BufferModulators(BaseModel): + """Value of buffer modulators""" + frequency: str = Field(..., description="Frequency score of the document") + saliency: str = Field(..., description="Saliency score of the document") + relevance: str = Field(..., description="Relevance score of the document") + description: str = Field(..., description="Latest buffer modulators") + direction: str = Field(..., description="Increase or a decrease of the modulator") + + parser = PydanticOutputParser(pydantic_object=BufferModulators) + + prompt = PromptTemplate( + template="""Structure the buffer modulators to be used for the buffer. \n + {format_instructions} \nOriginal observation is: + {query}\n """, + input_variables=["query"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + _input = prompt.format_prompt(query=adjusted_modulator) + document_context_result = llm_base(_input.to_string()) + document_context_result_parsed = parser.parse(document_context_result) + await memory._add_buffer_memory(user_input=str(document_context_result_parsed), params=params) + return document_context_result_parsed.json() + + +async def delete_from_buffer(): + from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory + memory = Memory(user_id="TestUser") + await memory.async_init() + await memory._delete_buffer_memory() + +async def delete_from_episodic(): + from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory + memory = Memory(user_id="TestUser") + await memory.async_init() + await memory._delete_episodic_memory() + + +async def get_from_episodic(observation=None): + from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory + memory = Memory(user_id="TestUser") + await memory.async_init() + return await memory._fetch_episodic_memory(observation=observation) + +async def get_from_buffer(observation=None): + from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory + memory = Memory(user_id="TestUser") + await memory.async_init() + return await memory._fetch_buffer_memory(user_input=observation) + + +async def main(): + params = { + "version": "1.0", + "agreement_id": "AG123456", + "privacy_policy": "https://example.com/privacy", + "terms_of_service": "https://example.com/terms", + "format": "json", + "schema_version": "1.1", + "checksum": "a1b2c3d4e5f6", + "owner": "John Doe", + "license": "MIT", + "validity_start": "2023-08-01", + "validity_end": "2024-07-31", + } + loader_settings = { + "format": "PDF", + "source": "url", + "path": "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf" + } + modulator = {"relevance": 1.0, "saliency": 1.0, "frequency": 1.0, "freshness": 1.0, "repetition": 1.0} + user_input = "I want to know how does Buck adapt to life in the wild" + # tasks_list = """tasks": [{"task_order": "1", "task_name": "Fetch Information", "operation": "fetch from vector store", "original_query": "I want to know how does Buck adapt to life in the wild"]""" + out_tasks = """here are the result_tasks [{'task_order': '1', 'task_name': 'Fetch Information', 'operation': 'fetch from vector store', 'original_query': 'I want to know how does Buck adapt to life in the wild'}, {'docs': [{'semantic_search_term': "Buck's adaptation to wild life", 'document_content': 'THE CALL OF THE WILD 30 \nout of his desire for mastery. He was preëminently cunning, and could \nbide his time with a patience that was nothing less than primitive. \nIt was inevitable that the clash for leadership should come. Buck \nwanted it. He wanted it because it was his nature, because he had been \ngripped tight by that nameless, incomprehensible pride of the trail and \ntrace—that pride which holds dogs in the toil to the last gasp, which \nlures them to die joyfully in the harness, and breaks their hearts if they \nare cut out of the harness. This was the pride of Dave as wheel-dog, of \nSol-leks as he pulled with all his strength; the pride that laid hold of \nthem at break of camp, transforming them from sour and sullen brutes \ninto straining, eager, ambitious creatures; the pride that spurred them on \nall day and dropped them at pitch of camp at night, letting them fall back \ninto gloomy unrest and uncontent. This was the pride that bore up Spitz \nand made him thrash the sled-dogs who blundered and shirked in the \ntraces or hid away at harness-up time in the morning. Likewise it was \nthis pride that made him fear Buck as a possible lead-dog. And this was \nBuck’s pride, too. \nHe openly threatened the other’s leadership. He came between him \nand the shirks he should have punished. And he did it deliberately. One \nnight there was a heavy snowfall, and in the morning Pike, the \nmalingerer, did not appear. He was securely hidden in his nest under a \nfoot of snow. François called him and sought him in vain. Spitz was wild \nwith wrath. He raged through the camp, smelling and digging in every \nlikely place, snarling so frightfully that Pike heard and shivered in his \nhiding-place. \nBut when he was at last unearthed, and Spitz flew at him to punish \nhim, Buck flew, with equal rage, in between. So unexpected was it, and \nso shrewdly managed, that Spitz was hurled backward and off his feet. \nPike, who had been trembling abjectly, took heart at this open mutiny, \nand sprang upon his overthrown leader. Buck, to whom fairplay was a \nforgotten code, likewise sprang upon Spitz. But François, chuckling at \nthe incident while unswerving in the administration of justice, brought \nhis lash down upon Buck with all his might. This failed to drive Buck \nfrom his prostrate rival, and the butt of the whip was brought into play. \nHalf-stunned by the blow, Buck was knocked backward and the lash laid', 'document_relevance': '0.75', 'attention_modulators_list': [{'frequency': 'High', 'saliency': 'High', 'relevance': 'High'}]}], 'user_query': 'I want to know how does Buck adapt to life in the wild and then have that info translated to german'}, {'task_order': '2', 'task_name': 'Translate Information', 'operation': 'translate', 'original_query': 'then have that info translated to german'}, 'DER RUF DER WILDNIS 30\naus seinem Wunsch nach Meisterschaft. Er war überaus schlau und konnte es\nwartete seine Zeit mit einer Geduld ab, die geradezu primitiv war.\nEs war unvermeidlich, dass es zu einem Kampf um die Führung kam. Bock\nwollte es. Er wollte es, weil es in seiner Natur lag, weil er es gewesen war\nfestgehalten von diesem namenlosen, unverständlichen Stolz des Weges und\nSpur – dieser Stolz, der Hunde bis zum letzten Atemzug in der Mühsal hält, der\nlockt sie dazu, freudig im Geschirr zu sterben, und bricht ihnen das Herz, wenn sie es tun\nwerden aus dem Kabelbaum herausgeschnitten. Das war der Stolz von Dave als Radhund\nSol-leks, als er mit aller Kraft zog; der Stolz, der mich ergriff\nsie beim Abbruch des Lagers und verwandelte sie in mürrische und mürrische Bestien\nin anstrengende, eifrige, ehrgeizige Wesen; der Stolz, der sie anspornte\nden ganzen Tag und setzte sie nachts auf dem Stellplatz des Lagers ab und ließ sie zurückfallen\nin düstere Unruhe und Unzufriedenheit. Das war der Stolz, der Spitz trug\nund ließ ihn die Schlittenhunde verprügeln, die in der Gegend herumstolperten und sich scheuten\nSpuren hinterlassen oder sich morgens beim Angurten versteckt haben. Ebenso war es\nDieser Stolz ließ ihn Buck als möglichen Leithund fürchten. Und das war\nAuch Bucks Stolz.\nEr bedrohte offen die Führung des anderen. Er kam zwischen ihn\nund die Schirks hätte er bestrafen sollen. Und er hat es absichtlich getan. Eins\nNachts gab es starken Schneefall und am Morgen Pike, der\nSimulant, erschien nicht. Er war sicher in seinem Nest unter einer Decke versteckt\nFuß Schnee. François rief ihn an und suchte ihn vergeblich. Spitz war wild\nmit Zorn. Er tobte durch das Lager, schnupperte und wühlte darin herum\nWahrscheinlicher Ort, er knurrte so schrecklich, dass Pike es hörte und in seinem Kopf zitterte\nVersteck.\nAber als er endlich ausgegraben wurde, flog Spitz auf ihn zu, um ihn zu bestrafen\nihm, Buck flog mit der gleichen Wut dazwischen. So unerwartet war es, und\nEs gelang ihm so geschickt, dass Spitz nach hinten geschleudert wurde und von den Füßen fiel.\nPike, der erbärmlich gezittert hatte, fasste angesichts dieser offenen Meuterei Mut.\nund sprang auf seinen gestürzten Anführer. Buck, für den Fairplay wichtig war\nvergessener Code, sprang ebenfalls Spitz auf. Aber François kicherte\nder Vorfall, während unerschütterlich in der Rechtspflege, gebracht\nEr schlug mit aller Kraft auf Buck ein. Das gelang Buck nicht\nvon seinem am Boden liegenden Rivalen, und der Peitschenkolben wurde ins Spiel gebracht.\nVon dem Schlag halb betäubt, wurde Buck nach hinten geschleudert und mit der Peitsche niedergeschlagen']""" + + await _add_to_episodic(user_input=user_input, result_tasks=out_tasks, tasks_list=None, attention_modulators=modulator, params=params) + # await delete_from_episodic() + # aa = await get_from_episodic(observation="summary") + # await delete_from_buffer() + modulator_changed = {"relevance": 0.9, "saliency": 0.9, "frequency": 0.9} + await add_to_buffer(adjusted_modulator=modulator_changed) + + # aa = await get_from_buffer(observation="summary") + # print(aa) + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) + + diff --git a/level_2/vectordb/basevectordb.py b/level_2/vectordb/basevectordb.py new file mode 100644 index 000000000..3093286f7 --- /dev/null +++ b/level_2/vectordb/basevectordb.py @@ -0,0 +1,131 @@ +# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client +import logging +from io import BytesIO + +from level_2.vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB + +logging.basicConfig(level=logging.INFO) +import marvin +import requests +from dotenv import load_dotenv +from langchain.document_loaders import PyPDFLoader +from langchain.retrievers import WeaviateHybridSearchRetriever +from weaviate.gql.get import HybridFusion + +load_dotenv() +from typing import Optional + +import tracemalloc + +tracemalloc.start() + +import os +from datetime import datetime +from langchain.embeddings.openai import OpenAIEmbeddings +from dotenv import load_dotenv +from langchain.schema import Document +import uuid +import weaviate + +load_dotenv() + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") + +LTM_MEMORY_ID_DEFAULT = "00000" +ST_MEMORY_ID_DEFAULT = "0000" +BUFFER_ID_DEFAULT = "0000" + + +class VectorDBFactory: + def create_vector_db( + self, + user_id: str, + index_name: str, + memory_id: str, + ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT, + st_memory_id: str = ST_MEMORY_ID_DEFAULT, + buffer_id: str = BUFFER_ID_DEFAULT, + db_type: str = "pinecone", + namespace: str = None, + ): + db_map = {"pinecone": PineconeVectorDB, "weaviate": WeaviateVectorDB} + + if db_type in db_map: + return db_map[db_type]( + user_id, + index_name, + memory_id, + ltm_memory_id, + st_memory_id, + buffer_id, + namespace, + ) + + raise ValueError(f"Unsupported database type: {db_type}") + + + + + + +class BaseMemory: + def __init__( + self, + user_id: str, + memory_id: Optional[str], + index_name: Optional[str], + db_type: str, + namespace: str, + ): + self.user_id = user_id + self.memory_id = memory_id + self.index_name = index_name + self.namespace = namespace + self.memory_type_id = str(uuid.uuid4()) + self.db_type = db_type + factory = VectorDBFactory() + self.vector_db = factory.create_vector_db( + self.user_id, + self.index_name, + self.memory_id, + db_type=self.db_type, + namespace=self.namespace, + ) + + def init_client(self, namespace: str): + + return self.vector_db.init_weaviate_client(namespace) + + async def add_memories( + self, + observation: Optional[str] = None, + loader_settings: dict = None, + params: Optional[dict] = None, + namespace: Optional[str] = None, + ): + + return await self.vector_db.add_memories( + observation=observation, loader_settings=loader_settings, + params=params, namespace=namespace + ) + # Add other db_type conditions if necessary + + async def fetch_memories( + self, + observation: str, + params: Optional[str] = None, + namespace: Optional[str] = None, + n_of_observations: Optional[int] = 2, + ): + + return await self.vector_db.fetch_memories( + observation=observation, params=params, + namespace=namespace, + n_of_observations=n_of_observations + ) + + async def delete_memories(self, params: Optional[str] = None): + return await self.vector_db.delete_memories(params) + + # Additional methods for specific Memory can be added here diff --git a/level_2/vectordb/vectordb.py b/level_2/vectordb/vectordb.py new file mode 100644 index 000000000..4fa4f0308 --- /dev/null +++ b/level_2/vectordb/vectordb.py @@ -0,0 +1,355 @@ + +# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client +import logging +from io import BytesIO + + + +logging.basicConfig(level=logging.INFO) +import marvin +import requests +from dotenv import load_dotenv +from langchain.document_loaders import PyPDFLoader +from langchain.retrievers import WeaviateHybridSearchRetriever +from weaviate.gql.get import HybridFusion + +load_dotenv() +from typing import Optional + +import tracemalloc + +tracemalloc.start() + +import os +from datetime import datetime +from langchain.embeddings.openai import OpenAIEmbeddings +from dotenv import load_dotenv +from langchain.schema import Document +import uuid +import weaviate + +load_dotenv() + + +LTM_MEMORY_ID_DEFAULT = "00000" +ST_MEMORY_ID_DEFAULT = "0000" +BUFFER_ID_DEFAULT = "0000" +class VectorDB: + OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + + def __init__( + self, + user_id: str, + index_name: str, + memory_id: str, + ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT, + st_memory_id: str = ST_MEMORY_ID_DEFAULT, + buffer_id: str = BUFFER_ID_DEFAULT, + namespace: str = None, + ): + self.user_id = user_id + self.index_name = index_name + self.namespace = namespace + self.memory_id = memory_id + self.ltm_memory_id = ltm_memory_id + self.st_memory_id = st_memory_id + self.buffer_id = buffer_id + +class PineconeVectorDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_pinecone(self.index_name) + + def init_pinecone(self, index_name): + # Pinecone initialization logic + pass + + +class WeaviateVectorDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_weaviate(self.namespace) + + def init_weaviate(self, namespace: str): + # Weaviate initialization logic + embeddings = OpenAIEmbeddings() + auth_config = weaviate.auth.AuthApiKey( + api_key=os.environ.get("WEAVIATE_API_KEY") + ) + client = weaviate.Client( + url=os.environ.get("WEAVIATE_URL"), + auth_client_secret=auth_config, + additional_headers={"X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")}, + ) + retriever = WeaviateHybridSearchRetriever( + client=client, + index_name=namespace, + text_key="text", + attributes=[], + embedding=embeddings, + create_schema_if_missing=True, + ) + return retriever # If this is part of the initialization, call it here. + + def init_weaviate_client(self, namespace: str): + # Weaviate client initialization logic + auth_config = weaviate.auth.AuthApiKey( + api_key=os.environ.get("WEAVIATE_API_KEY") + ) + client = weaviate.Client( + url=os.environ.get("WEAVIATE_URL"), + auth_client_secret=auth_config, + additional_headers={"X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")}, + ) + return client + + def _document_loader(self, observation: str, loader_settings: dict): + # Create an in-memory file-like object for the PDF content + + if loader_settings.get("format") == "PDF": + + if loader_settings.get("source") == "url": + pdf_response = requests.get(loader_settings["path"]) + pdf_stream = BytesIO(pdf_response.content) + contents = pdf_stream.read() + tmp_location = os.path.join("/tmp", "tmp.pdf") + with open(tmp_location, "wb") as tmp_file: + tmp_file.write(contents) + + # Process the PDF using PyPDFLoader + loader = PyPDFLoader(tmp_location) + # adapt this for different chunking strategies + pages = loader.load_and_split() + return pages + + if loader_settings.get("source") == "file": + # Process the PDF using PyPDFLoader + # might need adapting for different loaders + OCR + # need to test the path + loader = PyPDFLoader(loader_settings["path"]) + pages = loader.load_and_split() + + return pages + else: + # Process the text by just loading the base text + return observation + + + async def add_memories( + self, observation: str, loader_settings: dict = None, params: dict = None ,namespace:str=None + ): + # Update Weaviate memories here + print(self.namespace) + if namespace is None: + namespace = self.namespace + retriever = self.init_weaviate(namespace) + + def _stuct(observation, params): + """Utility function to not repeat metadata structure""" + # needs smarter solution, like dynamic generation of metadata + return [ + Document( + metadata={ + # "text": observation, + "user_id": str(self.user_id), + "memory_id": str(self.memory_id), + "ltm_memory_id": str(self.ltm_memory_id), + "st_memory_id": str(self.st_memory_id), + "buffer_id": str(self.buffer_id), + "version": params.get("version", None) or "", + "agreement_id": params.get("agreement_id", None) or "", + "privacy_policy": params.get("privacy_policy", None) or "", + "terms_of_service": params.get("terms_of_service", None) or "", + "format": params.get("format", None) or "", + "schema_version": params.get("schema_version", None) or "", + "checksum": params.get("checksum", None) or "", + "owner": params.get("owner", None) or "", + "license": params.get("license", None) or "", + "validity_start": params.get("validity_start", None) or "", + "validity_end": params.get("validity_end", None) or "" + # **source_metadata, + }, + page_content=observation, + ) + ] + + if loader_settings: + # Load the document + document = self._document_loader(observation, loader_settings) + print("DOC LENGTH", len(document)) + for doc in document: + document_to_load = _stuct(doc.page_content, params) + retriever.add_documents( + document_to_load + ) + + return retriever.add_documents( + _stuct(observation, params) + ) + + async def fetch_memories( + self, observation: str, namespace: str, params: dict = None, n_of_observations =int(2) + ): + """ + Get documents from weaviate. + + Parameters: + - observation (str): User query. + - namespace (str): Type of memory we access. + - params (dict, optional): + - n_of_observations (int, optional): For weaviate, equals to autocut, defaults to 1. Ranges from 1 to 3. Check weaviate docs for more info. + + Returns: + Describe the return type and what the function returns. + + Args a json containing: + query (str): The query string. + path (list): The path for filtering, e.g., ['year']. + operator (str): The operator for filtering, e.g., 'Equal'. + valueText (str): The value for filtering, e.g., '2017*'. + + Example: + get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*') + + """ + client = self.init_weaviate_client(self.namespace) + + print(self.namespace) + print(str(datetime.now())) + print(observation) + if namespace is None: + namespace = self.namespace + + params_user_id = { + "path": ["user_id"], + "operator": "Like", + "valueText": self.user_id, + } + + if params: + query_output = ( + client.query.get( + namespace, + [ + # "text", + "user_id", + "memory_id", + "ltm_memory_id", + "st_memory_id", + "buffer_id", + "version", + "agreement_id", + "privacy_policy", + "terms_of_service", + "format", + "schema_version", + "checksum", + "owner", + "license", + "validity_start", + "validity_end", + ], + ) + .with_where(params) + .with_near_text({"concepts": [observation]}) + .with_additional( + ["id", "creationTimeUnix", "lastUpdateTimeUnix", "score",'distance'] + ) + .with_where(params_user_id) + .with_limit(10) + .do() + ) + return query_output + else: + query_output = ( + client.query.get( + namespace, + + [ + "text", + "user_id", + "memory_id", + "ltm_memory_id", + "st_memory_id", + "buffer_id", + "version", + "agreement_id", + "privacy_policy", + "terms_of_service", + "format", + "schema_version", + "checksum", + "owner", + "license", + "validity_start", + "validity_end", + ], + ) + .with_additional( + ["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance'] + ) + .with_hybrid( + query=observation, + fusion_type=HybridFusion.RELATIVE_SCORE + ) + .with_autocut(n_of_observations) + .with_where(params_user_id) + .with_limit(10) + .do() + ) + return query_output + + async def delete_memories(self, params: dict = None): + client = self.init_weaviate_client(self.namespace) + if params: + where_filter = { + "path": ["id"], + "operator": "Equal", + "valueText": params.get("id", None), + } + return client.batch.delete_objects( + class_name=self.namespace, + # Same `where` filter as in the GraphQL API + where=where_filter, + ) + else: + # Delete all objects + print("HERE IS THE USER ID", self.user_id) + return client.batch.delete_objects( + class_name=self.namespace, + where={ + "path": ["user_id"], + "operator": "Equal", + "valueText": self.user_id, + }, + ) + + def update_memories(self, observation, namespace: str, params: dict = None): + client = self.init_weaviate_client(self.namespace) + + client.data_object.update( + data_object={ + # "text": observation, + "user_id": str(self.user_id), + "memory_id": str(self.memory_id), + "ltm_memory_id": str(self.ltm_memory_id), + "st_memory_id": str(self.st_memory_id), + "buffer_id": str(self.buffer_id), + "version": params.get("version", None) or "", + "agreement_id": params.get("agreement_id", None) or "", + "privacy_policy": params.get("privacy_policy", None) or "", + "terms_of_service": params.get("terms_of_service", None) or "", + "format": params.get("format", None) or "", + "schema_version": params.get("schema_version", None) or "", + "checksum": params.get("checksum", None) or "", + "owner": params.get("owner", None) or "", + "license": params.get("license", None) or "", + "validity_start": params.get("validity_start", None) or "", + "validity_end": params.get("validity_end", None) or "" + # **source_metadata, + }, + class_name="Test", + uuid=params.get("id", None), + consistency_level=weaviate.data.replication.ConsistencyLevel.ALL, # default QUORUM + ) + return