Merge pull request #44 from topoteretes/feature/postgres_deployment

Fixes to database manager
This commit is contained in:
Vasilije 2024-02-20 21:23:09 +01:00 committed by GitHub
commit 2fe437c92a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 105 additions and 71 deletions

View file

@ -41,6 +41,8 @@ RUN apt-get update -q && \
/var/tmp/*
WORKDIR /app
# Set the PYTHONPATH environment variable to include the /app directory
ENV PYTHONPATH=/app
COPY cognitive_architecture/ /app/cognitive_architecture
COPY main.py /app

View file

@ -1,4 +1,5 @@
"""Configuration for cognee - cognitive architecture framework."""
import logging
import os
import configparser
import uuid
@ -36,6 +37,7 @@ class Config:
db_user: str = os.getenv("DB_USER", "cognee")
db_password: str = os.getenv("DB_PASSWORD", "cognee")
sqlalchemy_logging: bool = os.getenv("SQLALCHEMY_LOGGING", True)
graph_name = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
# Model parameters
model: str = "gpt-4-1106-preview"
@ -55,29 +57,34 @@ class Config:
or os.getenv("AWS_ENV") == "dev"
or os.getenv("AWS_ENV") == "prd"
):
load_dotenv()
logging.info("graph_db_url: %s", os.getenv("GRAPH_DB_URL_PROD"))
graph_database_url: str = os.getenv("GRAPH_DB_URL_PROD")
graph_database_username: str = os.getenv("GRAPH_DB_USER")
graph_database_password: str = os.getenv("GRAPH_DB_PW")
else:
logging.info("graph_db_urlvvv: %s", os.getenv("GRAPH_DB_URL"))
graph_database_url: str = os.getenv("GRAPH_DB_URL")
graph_database_username: str = os.getenv("GRAPH_DB_USER")
graph_database_password: str = os.getenv("GRAPH_DB_PW")
weaviate_url: str = os.getenv("WEAVIATE_URL")
weaviate_api_key: str = os.getenv("WEAVIATE_API_KEY")
postgres_user: str = os.getenv("POSTGRES_USER")
postgres_password: str = os.getenv("POSTGRES_PASSWORD")
postgres_db: str = os.getenv("POSTGRES_DB")
if (
os.getenv("ENV") == "prod"
or os.getenv("ENV") == "dev"
or os.getenv("AWS_ENV") == "dev"
or os.getenv("AWS_ENV") == "prd"
):
postgres_host: str = os.getenv("POSTGRES_PROD_HOST")
elif os.getenv("ENV") == "docker":
postgres_host: str = os.getenv("POSTGRES_HOST_DOCKER")
elif os.getenv("ENV") == "local":
postgres_host: str = os.getenv("POSTGRES_HOST_LOCAL")
load_dotenv()
db_type = 'postgresql'
db_host: str = os.getenv("POSTGRES_HOST")
logging.info("db_host: %s", db_host)
db_user: str = os.getenv("POSTGRES_USER")
db_password: str = os.getenv("POSTGRES_PASSWORD")
db_name: str = os.getenv("POSTGRES_DB")
# Client ID
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)

View file

@ -1,9 +1,15 @@
import pickle
from pathlib import Path
from cognitive_architecture.config import Config
import networkx as nx
config = Config()
config = config.load()
class NetworkXGraphDB:
"""A class to manage a graph database using NetworkX"""
# graph_path = (Path(config.db_path) / config.graph_name).absolute()
def __init__(self, filename="cognee_graph.pkl"):
self.filename = filename
try:

View file

@ -14,17 +14,19 @@ RETRY_DELAY = 5
def get_sqlalchemy_database_url(
db_type = globalConfig.db_type,
db_name = globalConfig.db_name,
base_path = globalConfig.db_path,
db_path = globalConfig.db_path,
user = globalConfig.db_user,
password = globalConfig.db_password,
host = globalConfig.db_host,
port = globalConfig.db_port,
):
"""Get the SQLAlchemy database URL based on parameters."""
db_path = (Path(base_path) / db_name).absolute()
if db_type == "sqlite":
db_path = (Path(db_path) / db_name).absolute()
return f"sqlite+aiosqlite:///{db_path}" # SQLite uses file path
elif db_type == "duckdb":
db_path = (Path(db_path) / db_name).absolute()
return f"duckdb+aiosqlite:///{db_path}"
elif db_type == "postgresql":
# Ensure optional parameters are handled gracefully

View file

@ -1,3 +1,4 @@
""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
from datetime import datetime
from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
from sqlalchemy.orm import relationship
@ -7,6 +8,7 @@ from ..database import Base
class DocsModel(Base):
""" Docs model"""
__tablename__ = "docs"
id = Column(String, primary_key=True)

View file

@ -1,3 +1,4 @@
""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
from datetime import datetime
from sqlalchemy import Column, String, DateTime, ForeignKey
from sqlalchemy.orm import relationship
@ -5,6 +6,7 @@ from ..database import Base
class MemoryModel(Base):
""" Memory model"""
__tablename__ = "memories"
id = Column(String, primary_key=True)

View file

@ -1,4 +1,5 @@
# metadata.py
""" MetaData model """
from datetime import datetime
from sqlalchemy import Column, String, DateTime, ForeignKey
from sqlalchemy.orm import relationship
@ -8,6 +9,7 @@ from ..database import Base
class MetaDatas(Base):
""" MetaData model"""
__tablename__ = "metadatas"
id = Column(String, primary_key=True)

View file

@ -1,4 +1,5 @@
# operation.py
""" Operation model """
from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
from sqlalchemy.orm import relationship
@ -8,6 +9,7 @@ from ..database import Base
class Operation(Base):
""" Operation model"""
__tablename__ = "operations"
id = Column(String, primary_key=True)

View file

@ -1,4 +1,5 @@
# session.py
""" Session model """
from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
from sqlalchemy.orm import relationship
@ -9,6 +10,7 @@ from ..database import Base
class Session(Base):
""" Session model"""
__tablename__ = "sessions"
id = Column(String, primary_key=True)

View file

@ -1,4 +1,5 @@
# user.py
""" User model """
from datetime import datetime
from sqlalchemy import Column, String, DateTime
from sqlalchemy.orm import relationship
@ -14,6 +15,7 @@ from ..database import Base
class User(Base):
""" User model"""
__tablename__ = "users"
id = Column(String, primary_key=True, index=True)

View file

@ -12,12 +12,11 @@ parent_dir = os.path.dirname(current_dir)
# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)
# API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true"
environment = os.getenv("AWS_ENV", "dev")
def fetch_secret(secret_name, region_name, env_file_path):
def fetch_secret(secret_name:str, region_name:str, env_file_path:str):
"""Fetch the secret from AWS Secrets Manager and write it to the .env file."""
print("Initializing session")
session = boto3.session.Session()
print("Session initialized")
@ -28,20 +27,19 @@ def fetch_secret(secret_name, region_name, env_file_path):
response = client.get_secret_value(SecretId=secret_name)
except Exception as e:
print(f"Error retrieving secret: {e}")
return None
return f"Error retrieving secret: {e}"
if "SecretString" in response:
secret = response["SecretString"]
else:
secret = response["SecretBinary"]
with open(env_file_path, "w") as env_file:
env_file.write(secret)
print("Secrets are added to the .env file.")
if os.path.exists(env_file_path):
print(f"The .env file is located at: {env_file_path}")
with open(env_file_path, "w") as env_file:
env_file.write(secret)
print("Secrets are added to the .env file.")
load_dotenv()
print("The .env file is loaded.")
else:

View file

@ -1,38 +1,38 @@
""" This module contains the functions that are used to query the language model. """
import os
from ..shared.data_models import Node, Edge, KnowledgeGraph, GraphQLQuery, MemorySummary
from ..config import Config
import instructor
from openai import OpenAI
import logging
from ..shared.data_models import KnowledgeGraph, MemorySummary
from ..config import Config
config = Config()
config.load()
print(config.model)
print(config.openai_key)
OPENAI_API_KEY = config.openai_key
aclient = instructor.patch(OpenAI())
import logging
# Function to read query prompts from files
def read_query_prompt(filename):
"""Read a query prompt from a file."""
try:
with open(filename, "r") as file:
return file.read()
except FileNotFoundError:
logging.info(f"Error: File not found. Attempted to read: {filename}")
logging.info(f"Current working directory: {os.getcwd()}")
logging.info(f"Error: File not found. Attempted to read: %s {filename}")
logging.info(f"Current working directory: %s {os.getcwd()}")
return None
except Exception as e:
logging.info(f"An error occurred: {e}")
logging.info(f"An error occurred: %s {e}")
return None
def generate_graph(input) -> KnowledgeGraph:
"""Generate a knowledge graph from a user query."""
model = "gpt-4-1106-preview"
user_prompt = f"Use the given format to extract information from the following input: {input}."
system_prompt = read_query_prompt(
@ -57,20 +57,26 @@ def generate_graph(input) -> KnowledgeGraph:
async def generate_summary(input) -> MemorySummary:
"""Generate a summary from a user query."""
out = aclient.chat.completions.create(
model=config.model,
messages=[
{
"role": "user",
"content": f"""Use the given format summarize and reduce the following input: {input}. """,
"content": f"""Use the given format summarize
and reduce the following input: {input}. """,
},
{
"role": "system",
"content": """You are a top-tier algorithm
designed for summarizing existing knowledge graphs in structured formats based on a knowledge graph.
designed for summarizing existing knowledge
graphs in structured formats based on a knowledge graph.
## 1. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination.
## 2. Don't forget your main goal is to reduce the number of nodes in the knowledge graph while preserving the information contained in it.""",
Adhere to the rules strictly.
Non-compliance will result in termination.
## 2. Don't forget your main goal
is to reduce the number of nodes in the knowledge graph
while preserving the information contained in it.""",
},
],
response_model=MemorySummary,
@ -79,6 +85,7 @@ async def generate_summary(input) -> MemorySummary:
def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph:
"""Generate a knowledge graph from a user query."""
system_prompt = read_query_prompt(
"cognitive_architecture/llm/prompts/generate_graph_prompt.txt"
)
@ -87,7 +94,8 @@ def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph:
messages=[
{
"role": "user",
"content": f"""Use the given format to extract information from the following input: {input}. """,
"content": f"""Use the given format to
extract information from the following input: {input}. """,
},
{"role": "system", "content": system_prompt},
],

View file

@ -1,3 +1,4 @@
"""Tools for interacting with OpenAI's GPT-3, GPT-4 API"""
import asyncio
import random
import os

View file

@ -1,9 +1,10 @@
"""Data models for the cognitive architecture."""
from typing import Optional, List
from pydantic import BaseModel, Field
class Node(BaseModel):
"""Node in a knowledge graph."""
id: int
description: str
category: str
@ -14,6 +15,7 @@ class Node(BaseModel):
class Edge(BaseModel):
"""Edge in a knowledge graph."""
source: int
target: int
description: str
@ -23,14 +25,17 @@ class Edge(BaseModel):
class KnowledgeGraph(BaseModel):
"""Knowledge graph."""
nodes: List[Node] = Field(..., default_factory=list)
edges: List[Edge] = Field(..., default_factory=list)
class GraphQLQuery(BaseModel):
"""GraphQL query."""
query: str
class MemorySummary(BaseModel):
""" Memory summary. """
nodes: List[Node] = Field(..., default_factory=list)
edges: List[Edge] = Field(..., default_factory=list)

View file

@ -1,9 +1,10 @@
""" This module provides language processing functions for language detection and translation. """
import logging
import boto3
from botocore.exceptions import BotoCoreError, ClientError
from langdetect import detect, LangDetectException
import iso639
import logging
# Basic configuration of the logging system
logging.basicConfig(
@ -30,7 +31,7 @@ def detect_language(text):
try:
# Detect the language using langdetect
detected_lang_iso639_1 = detect(trimmed_text)
logging.info(f"Detected ISO 639-1 code: {detected_lang_iso639_1}")
logging.info(f"Detected ISO 639-1 code: %s {detected_lang_iso639_1}")
# Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2)
if detected_lang_iso639_1 == "hr":
@ -38,9 +39,9 @@ def detect_language(text):
return detected_lang_iso639_1
except LangDetectException as e:
logging.error(f"Language detection error: {e}")
logging.error(f"Language detection error: %s {e}")
except Exception as e:
logging.error(f"Unexpected error: {e}")
logging.error(f"Unexpected error: %s {e}")
return -1
@ -57,8 +58,10 @@ def translate_text(
Parameters:
text (str): The text to be translated.
source_language (str): The source language code (e.g., 'sr' for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
target_language (str): The target language code (e.g., 'en' for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
source_language (str): The source language code (e.g., 'sr' for Serbian).
ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
target_language (str): The target language code (e.g., 'en' for English).
ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
region_name (str): AWS region name.
Returns:
@ -82,20 +85,9 @@ def translate_text(
return result.get("TranslatedText", "No translation found.")
except BotoCoreError as e:
logging.info(f"BotoCoreError occurred: {e}")
logging.info(f"BotoCoreError occurred: %s {e}")
return "Error with AWS Translate service configuration or request."
except ClientError as e:
logging.info(f"ClientError occurred: {e}")
logging.info(f"ClientError occurred: %s {e}")
return "Error with AWS client or network issue."
source_language = "sr"
target_language = "en"
text_to_translate = "Ja volim da pecam i idem na reku da šetam pored nje ponekad"
translated_text = translate_text(text_to_translate, source_language, target_language)
print(translated_text)
# print(detect_language("Koliko krava ide u setnju?"))

View file

@ -1,3 +1,5 @@
""" This module contains utility functions for the cognitive architecture. """
import os
import random
import string
@ -13,7 +15,13 @@ from cognitive_architecture.database.relationaldb.models.user import User
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
import logging
from cognitive_architecture.database.relationaldb.models.operation import Operation
from cognitive_architecture.database.relationaldb.database_crud import (
session_scope,
add_entity,
update_entity,
fetch_job_id,
)
class Node:
def __init__(self, id, description, color):
@ -72,6 +80,7 @@ def get_document_names(doc_input):
def format_dict(d):
""" Format a dictionary as a string."""
# Initialize an empty list to store formatted items
formatted_items = []
@ -93,6 +102,7 @@ def format_dict(d):
def append_uuid_to_variable_names(variable_mapping):
""" Append a UUID to the variable names to make them unique."""
unique_variable_mapping = {}
for original_name in variable_mapping.values():
unique_name = f"{original_name}_{uuid.uuid4().hex}"
@ -102,6 +112,7 @@ def append_uuid_to_variable_names(variable_mapping):
# Update the functions to use the unique variable names
def create_node_variable_mapping(nodes):
""" Create a mapping of node identifiers to unique variable names."""
mapping = {}
for node in nodes:
variable_name = f"{node['category']}{node['id']}".lower()
@ -110,6 +121,7 @@ def create_node_variable_mapping(nodes):
def create_edge_variable_mapping(edges):
""" Create a mapping of edge identifiers to unique variable names."""
mapping = {}
for edge in edges:
# Construct a unique identifier for the edge
@ -124,17 +136,10 @@ def generate_letter_uuid(length=8):
return "".join(random.choice(letters) for _ in range(length))
from cognitive_architecture.database.relationaldb.models.operation import Operation
from cognitive_architecture.database.relationaldb.database_crud import (
session_scope,
add_entity,
update_entity,
fetch_job_id,
)
async def get_vectordb_namespace(session: AsyncSession, user_id: str):
""" Asynchronously retrieves the latest memory names for a given user."""
try:
result = await session.execute(
select(MemoryModel.memory_name)
@ -151,6 +156,7 @@ async def get_vectordb_namespace(session: AsyncSession, user_id: str):
async def get_vectordb_document_name(session: AsyncSession, user_id: str):
""" Asynchronously retrieves the latest memory names for a given user."""
try:
result = await session.execute(
select(DocsModel.doc_name)
@ -167,6 +173,7 @@ async def get_vectordb_document_name(session: AsyncSession, user_id: str):
async def get_model_id_name(session: AsyncSession, id: str):
""" Asynchronously retrieves the latest memory names for a given user."""
try:
result = await session.execute(
select(MemoryModel.memory_name)
@ -236,12 +243,6 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st
return memory_details, docs
# except Exception as e:
# # Handle the exception as needed
# print(f"An error occurred: {e}")
# return None
async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
"""
Asynchronously retrieves memory names associated with a specific document ID.