Build the docker and push

This commit is contained in:
Vasilije 2023-12-16 15:15:30 +01:00
parent 7d0ee16d46
commit 05aaee69b3
5 changed files with 146 additions and 94 deletions

View file

@ -115,10 +115,15 @@ async def user_query_to_graph(payload: Payload):
@app.post("/document-to-graph-db")
async def document_to_graph_db(payload: Payload):
logging.info("Adding documents to graph db")
try:
decoded_payload = payload.payload
if 'settings' in decoded_payload and decoded_payload['settings'] is not None:
settings_for_loader = decoded_payload['settings']
else:
settings_for_loader = None
async with session_scope(session=AsyncSessionLocal()) as session:
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], loader_settings =decoded_payload['settings'])
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], loader_settings =settings_for_loader)
return result
except Exception as e:

36
level_4/bin/dockerize Executable file
View file

@ -0,0 +1,36 @@
set -euo pipefail
AWS_REGION=${region:-eu-west-1}
AWS_DEPLOYMENT_ACCOUNT=${account:-463722570299}
AWS_REPOSITORY=${repo:-"${AWS_DEPLOYMENT_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com"}
STAGE=${stage:-"dev"}
SHA_SHORT="$(git rev-parse --short HEAD)"
CUR_DATE="$(date +%Y%m%d%H%M%S)"
VERSION="$STAGE-$CUR_DATE-$SHA_SHORT"
IMAGE_NAME=${image_name:-promethai-${STAGE}-promethai-backend}
REPO_NAME="${AWS_REPOSITORY}/${IMAGE_NAME}"
FULL_IMAGE_NAME="${REPO_NAME}:${VERSION}"
APP_DIR=${app_dir:-"."}
PUBLISH=${publish:-false}
echo "Building docker image ${FULL_IMAGE_NAME} located in dir ${app_dir}"
pushd "${APP_DIR}" &&
docker buildx build --platform linux/amd64 \
--build-arg STAGE=${STAGE} \
-t "${FULL_IMAGE_NAME}" . &&
echo "${VERSION}" >/tmp/.DOCKER_IMAGE_VERSION &&
echo "Successfully built docker image ${FULL_IMAGE_NAME}"
if [ "${PUBLISH}" = true ]; then
echo "Pushing docker image ${FULL_IMAGE_NAME} to ECR repository to AWS account ${AWS_DEPLOYMENT_ACCOUNT}"
if [ "${PUBLISH}" = true ]; then
echo "logging in"
aws ecr get-login-password --region "${AWS_REGION}" | docker login --username AWS --password-stdin "${AWS_REPOSITORY}"
fi
docker push "${FULL_IMAGE_NAME}" &&
echo "Successfully pushed docker image ${FULL_IMAGE_NAME} to ECR repository"
fi

View file

@ -151,7 +151,7 @@ class WeaviateVectorDB(VectorDB):
# Update Weaviate memories here
if namespace is None:
namespace = self.namespace
retriever = self.init_weaviate(embeddings=embeddings,namespace = namespace, retriever_type="single_document_context")
retriever = self.init_weaviate(embeddings=OpenAIEmbeddings(),namespace = namespace, retriever_type="single_document_context")
if loader_settings:
# Assuming _document_loader returns a list of documents
documents = await _document_loader(observation, loader_settings)
@ -167,15 +167,19 @@ class WeaviateVectorDB(VectorDB):
Document(metadata=params, page_content=doc.page_content)])
else:
chunk_count = 0
documents = await _document_loader(observation, loader_settings)
from cognitive_architecture.database.vectordb.chunkers.chunkers import chunk_data
documents = [chunk_data(chunk_strategy="VANILLA", source_data=observation, chunk_size=50,
chunk_overlap=20)]
for doc in documents[0]:
chunk_count += 1
params['chunk_order'] = chunk_count
# document_to_load = self._stuct(observation, params, metadata_schema_class)
logging.info("Loading document with defautl loader settings %s", str(doc))
# logging.info("Loading document with defautl loader settings %s", str(document_to_load))
retriever.add_documents([
Document(metadata=params, page_content=doc)])
Document(metadata=params, page_content=doc.page_content)])
async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid', **kwargs):
"""

View file

@ -4,6 +4,7 @@ import string
import uuid
from graphviz import Digraph
from sqlalchemy import or_
from sqlalchemy.orm import contains_eager
@ -194,34 +195,37 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st
Example Usage:
"""
try:
result = await session.execute(
select(Operation)
.join(Operation.docs) # Explicit join with docs table
.join(Operation.memories) # Explicit join with memories table
.options(
contains_eager(Operation.docs), # Informs ORM of the join for docs
contains_eager(Operation.memories) # Informs ORM of the join for memories
)
.where(
(Operation.user_id == user_id) & # Filter by user_id
(Operation.docs.graph_summary == False) # Filter by user_id
)
.order_by(Operation.created_at.desc()) # Order by creation date
# try:
result = await session.execute(
select(Operation)
.join(Operation.docs) # Explicit join with docs table
.join(Operation.memories) # Explicit join with memories table
.options(
contains_eager(Operation.docs), # Informs ORM of the join for docs
contains_eager(Operation.memories) # Informs ORM of the join for memories
)
.where(
(Operation.user_id == user_id) & # Filter by user_id
or_(
DocsModel.graph_summary == False, # Condition 1: graph_summary is False
DocsModel.graph_summary == None # Condition 3: graph_summary is None
) # Filter by user_id
)
.order_by(Operation.created_at.desc()) # Order by creation date
)
operations = result.unique().scalars().all()
operations = result.unique().scalars().all()
# Extract memory names and document names and IDs
memory_names = [memory.memory_name for op in operations for memory in op.memories]
docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs]
# Extract memory names and document names and IDs
memory_names = [memory.memory_name for op in operations for memory in op.memories]
docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs]
return memory_names, docs
return memory_names, docs
except Exception as e:
# Handle the exception as needed
print(f"An error occurred: {e}")
return None
# except Exception as e:
# # Handle the exception as needed
# print(f"An error occurred: {e}")
# return None
async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
"""
Asynchronously retrieves memory names associated with a specific document ID.

View file

@ -100,6 +100,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
DocsModel(
id=str(uuid.uuid4()),
operation_id=job_id,
graph_summary= False,
doc_name=doc
)
)
@ -139,7 +140,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
observation=content, params=params, loader_settings=loader_settings)
await update_entity(session, Operation, job_id, "SUCCESS")
return result, namespace_id
# return result, namespace_id
async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_input: str):
@ -175,74 +176,76 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu
async def add_documents_to_graph_db(session: AsyncSession, user_id: str= None, loader_settings:dict=None, stupid_local_testing_flag=False): #clean this up Vasilije, don't be sloppy
""""""
try:
# await update_document_vectordb_namespace(postgres_session, user_id)
memory_names, docs = await get_unsumarized_vector_db_namespace(session, user_id)
logging.info("Memory names are", memory_names)
logging.info("Docs are", docs)
for doc, memory_name in zip(docs, memory_names):
doc_name, doc_id = doc
if stupid_local_testing_flag:
classification = [{
"DocumentCategory": "Literature",
"Title": "Bartleby, the Scrivener",
"Summary": "The document is a narrative about an enigmatic copyist named Bartleby who works in a law office. Despite initially being a diligent employee, Bartleby begins to refuse tasks with the phrase 'I would prefer not to' and eventually stops working altogether. His passive resistance and mysterious behavior confound the narrator, who is also his employer. Bartleby's refusal to leave the office leads to various complications, and he is eventually taken to the Tombs as a vagrant. The story ends with Bartleby's death and the revelation that he may have previously worked in the Dead Letter Office, which adds a layer of poignancy to his character.",
"d_id": "2a5c571f-bad6-4649-a4ac-36e4bb4f34cd"
},
{
"DocumentCategory": "Science",
"Title": "The Mysterious World of Quantum Mechanics",
"Summary": "This article delves into the fundamentals of quantum mechanics, exploring its paradoxical nature where particles can exist in multiple states simultaneously. It discusses key experiments and theories that have shaped our understanding of the quantum world, such as the double-slit experiment, Schrödinger's cat, and quantum entanglement. The piece also touches upon the implications of quantum mechanics for future technology, including quantum computing and cryptography.",
"d_id": "f4e2c3b1-4567-8910-11a2-b3c4d5e6f7g8"
},
{
"DocumentCategory": "History",
"Title": "The Rise and Fall of the Roman Empire",
"Summary": "This essay provides an overview of the Roman Empire's history, from its foundation to its eventual decline. It examines the political, social, and economic factors that contributed to the empire's expansion and success, as well as those that led to its downfall. Key events and figures such as Julius Caesar, the Punic Wars, and the transition from republic to empire are discussed. The essay concludes with an analysis of the empire's lasting impact on Western civilization.",
"d_id": "8h7g6f5e-4d3c-2b1a-09e8-d7c6b5a4f3e2"
},
{
"DocumentCategory": "Technology",
"Title": "The Future of Artificial Intelligence",
"Summary": "This report explores the current state and future prospects of artificial intelligence (AI). It covers the evolution of AI from simple algorithms to advanced neural networks capable of deep learning. The document discusses various applications of AI in industries such as healthcare, finance, and transportation, as well as ethical considerations and potential risks associated with AI development. Predictions for future advancements and their societal impact are also presented.",
"d_id": "3c2b1a09-d8e7-f6g5-h4i3-j1k2l3m4n5o6"
},
{
"DocumentCategory": "Economics",
"Title": "Global Economic Trends and Predictions",
"Summary": "This analysis examines major trends in the global economy, including the rise of emerging markets, the impact of technology on job markets, and shifts in international trade. It delves into the economic effects of recent global events, such as pandemics and geopolitical conflicts, and discusses how these might shape future economic policies and practices. The document provides predictions for economic growth, inflation rates, and currency fluctuations in the coming years.",
"d_id": "7k6j5h4g-3f2e-1d0c-b8a9-m7n6o5p4q3r2"
}
]
for classification in classification:
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
password=config.graph_database_password)
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
neo4j_graph_db.query(rs, classification)
# select doc from the store
neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, document_id=doc_id)
else:
try:
classification_content = fetch_document_vectordb_namespace(session, user_id, memory_name)
except:
classification_content = "None"
classification = await classify_documents(doc_name, document_id =doc_id, content=classification_content)
logging.info("Classification is", str(classification))
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
password=config.graph_database_password)
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
neo4j_graph_db.query(rs, classification)
# select doc from the store
neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name,
document_id=doc_id)
await update_entity(session, DocsModel, doc_id, True)
except:
pass
# try:
# await update_document_vectordb_namespace(postgres_session, user_id)
memory_names, docs = await get_unsumarized_vector_db_namespace(session, user_id)
logging.info("Memory names are", memory_names)
logging.info("Docs are", docs)
for doc, memory_name in zip(docs, memory_names):
doc_name, doc_id = doc
# if stupid_local_testing_flag:
# classification = [{
# "DocumentCategory": "Literature",
# "Title": "Bartleby, the Scrivener",
# "Summary": "The document is a narrative about an enigmatic copyist named Bartleby who works in a law office. Despite initially being a diligent employee, Bartleby begins to refuse tasks with the phrase 'I would prefer not to' and eventually stops working altogether. His passive resistance and mysterious behavior confound the narrator, who is also his employer. Bartleby's refusal to leave the office leads to various complications, and he is eventually taken to the Tombs as a vagrant. The story ends with Bartleby's death and the revelation that he may have previously worked in the Dead Letter Office, which adds a layer of poignancy to his character.",
# "d_id": "2a5c571f-bad6-4649-a4ac-36e4bb4f34cd"
# },
# {
# "DocumentCategory": "Science",
# "Title": "The Mysterious World of Quantum Mechanics",
# "Summary": "This article delves into the fundamentals of quantum mechanics, exploring its paradoxical nature where particles can exist in multiple states simultaneously. It discusses key experiments and theories that have shaped our understanding of the quantum world, such as the double-slit experiment, Schrödinger's cat, and quantum entanglement. The piece also touches upon the implications of quantum mechanics for future technology, including quantum computing and cryptography.",
# "d_id": "f4e2c3b1-4567-8910-11a2-b3c4d5e6f7g8"
# },
# {
# "DocumentCategory": "History",
# "Title": "The Rise and Fall of the Roman Empire",
# "Summary": "This essay provides an overview of the Roman Empire's history, from its foundation to its eventual decline. It examines the political, social, and economic factors that contributed to the empire's expansion and success, as well as those that led to its downfall. Key events and figures such as Julius Caesar, the Punic Wars, and the transition from republic to empire are discussed. The essay concludes with an analysis of the empire's lasting impact on Western civilization.",
# "d_id": "8h7g6f5e-4d3c-2b1a-09e8-d7c6b5a4f3e2"
# },
# {
# "DocumentCategory": "Technology",
# "Title": "The Future of Artificial Intelligence",
# "Summary": "This report explores the current state and future prospects of artificial intelligence (AI). It covers the evolution of AI from simple algorithms to advanced neural networks capable of deep learning. The document discusses various applications of AI in industries such as healthcare, finance, and transportation, as well as ethical considerations and potential risks associated with AI development. Predictions for future advancements and their societal impact are also presented.",
# "d_id": "3c2b1a09-d8e7-f6g5-h4i3-j1k2l3m4n5o6"
# },
# {
# "DocumentCategory": "Economics",
# "Title": "Global Economic Trends and Predictions",
# "Summary": "This analysis examines major trends in the global economy, including the rise of emerging markets, the impact of technology on job markets, and shifts in international trade. It delves into the economic effects of recent global events, such as pandemics and geopolitical conflicts, and discusses how these might shape future economic policies and practices. The document provides predictions for economic growth, inflation rates, and currency fluctuations in the coming years.",
# "d_id": "7k6j5h4g-3f2e-1d0c-b8a9-m7n6o5p4q3r2"
# }
# ]
# for classification in classification:
#
# neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
# password=config.graph_database_password)
# rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
# neo4j_graph_db.query(rs, classification)
#
# # select doc from the store
# neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, document_id=doc_id)
# else:
try:
classification_content = await fetch_document_vectordb_namespace(session, user_id, memory_name)
except:
classification_content = "None"
#
# classification = await classify_documents(doc_name, document_id =doc_id, content=classification_content)
#
# logging.info("Classification is", str(classification))
# neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
# password=config.graph_database_password)
# rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
# neo4j_graph_db.query(rs, classification)
#
# # select doc from the store
# neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name,
# document_id=doc_id)
await update_entity(session, DocsModel, doc_id, True)
# except:
# pass
class ResponseString(BaseModel):
response: str = Field(..., default_factory=list)