Merge pull request #44 from topoteretes/feature/postgres_deployment

Fixes to database manager
2024-02-20 21:23:09 +01:00 · 2024-02-20 21:23:09 +01:00 · 2fe437c92a
commit 2fe437c92a
parent 0ee8899f68 423e0d508b
16 changed files with 105 additions and 71 deletions
--- a/2
+++ b/2
@ -41,6 +41,8 @@ RUN apt-get update -q && \
        /var/tmp/*

 WORKDIR /app
+# Set the PYTHONPATH environment variable to include the /app directory
+ENV PYTHONPATH=/app

 COPY cognitive_architecture/ /app/cognitive_architecture
 COPY main.py /app
--- a/cognitive_architecture/config.py
+++ b/cognitive_architecture/config.py
@ -1,4 +1,5 @@
 """Configuration for cognee - cognitive architecture framework."""
+import logging
 import os
 import configparser
 import uuid
@ -36,6 +37,7 @@ class Config:
    db_user: str = os.getenv("DB_USER", "cognee")
    db_password: str = os.getenv("DB_PASSWORD", "cognee")
    sqlalchemy_logging: bool = os.getenv("SQLALCHEMY_LOGGING", True)
+    graph_name = os.getenv("GRAPH_NAME", "cognee_graph.pkl")

    # Model parameters
    model: str = "gpt-4-1106-preview"
@ -55,29 +57,34 @@ class Config:
        or os.getenv("AWS_ENV") == "dev"
        or os.getenv("AWS_ENV") == "prd"
    ):
+        load_dotenv()
+        logging.info("graph_db_url: %s", os.getenv("GRAPH_DB_URL_PROD"))
        graph_database_url: str = os.getenv("GRAPH_DB_URL_PROD")
        graph_database_username: str = os.getenv("GRAPH_DB_USER")
        graph_database_password: str = os.getenv("GRAPH_DB_PW")
    else:
+        logging.info("graph_db_urlvvv: %s", os.getenv("GRAPH_DB_URL"))
        graph_database_url: str = os.getenv("GRAPH_DB_URL")
        graph_database_username: str = os.getenv("GRAPH_DB_USER")
        graph_database_password: str = os.getenv("GRAPH_DB_PW")
    weaviate_url: str = os.getenv("WEAVIATE_URL")
    weaviate_api_key: str = os.getenv("WEAVIATE_API_KEY")
-    postgres_user: str = os.getenv("POSTGRES_USER")
-    postgres_password: str = os.getenv("POSTGRES_PASSWORD")
-    postgres_db: str = os.getenv("POSTGRES_DB")
+
    if (
        os.getenv("ENV") == "prod"
        or os.getenv("ENV") == "dev"
        or os.getenv("AWS_ENV") == "dev"
        or os.getenv("AWS_ENV") == "prd"
    ):
-        postgres_host: str = os.getenv("POSTGRES_PROD_HOST")
-    elif os.getenv("ENV") == "docker":
-        postgres_host: str = os.getenv("POSTGRES_HOST_DOCKER")
-    elif os.getenv("ENV") == "local":
-        postgres_host: str = os.getenv("POSTGRES_HOST_LOCAL")
+        load_dotenv()
+        db_type = 'postgresql'
+
+        db_host: str = os.getenv("POSTGRES_HOST")
+        logging.info("db_host: %s", db_host)
+        db_user: str = os.getenv("POSTGRES_USER")
+        db_password: str = os.getenv("POSTGRES_PASSWORD")
+        db_name: str = os.getenv("POSTGRES_DB")
+

    # Client ID
    anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
--- a/cognitive_architecture/database/graphdb/networkx/networkx_graph.py
+++ b/cognitive_architecture/database/graphdb/networkx/networkx_graph.py
@ -1,9 +1,15 @@
 import pickle
-
+from pathlib import Path
+from cognitive_architecture.config import Config
 import networkx as nx
+config = Config()
+config  = config.load()
+


 class NetworkXGraphDB:
+    """A class to manage a graph database using NetworkX"""
+    # graph_path = (Path(config.db_path) / config.graph_name).absolute()
    def __init__(self, filename="cognee_graph.pkl"):
        self.filename = filename
        try:
--- a/cognitive_architecture/database/relationaldb/database.py
+++ b/cognitive_architecture/database/relationaldb/database.py
@ -14,17 +14,19 @@ RETRY_DELAY = 5
 def get_sqlalchemy_database_url(
    db_type = globalConfig.db_type,
    db_name = globalConfig.db_name,
-    base_path = globalConfig.db_path,
+    db_path = globalConfig.db_path,
    user = globalConfig.db_user,
    password = globalConfig.db_password,
    host = globalConfig.db_host,
    port = globalConfig.db_port,
 ):
    """Get the SQLAlchemy database URL based on parameters."""
-    db_path = (Path(base_path) / db_name).absolute()
+
    if db_type == "sqlite":
+        db_path = (Path(db_path) / db_name).absolute()
        return f"sqlite+aiosqlite:///{db_path}"  # SQLite uses file path
    elif db_type == "duckdb":
+        db_path = (Path(db_path) / db_name).absolute()
        return f"duckdb+aiosqlite:///{db_path}"
    elif db_type == "postgresql":
        # Ensure optional parameters are handled gracefully
--- a/cognitive_architecture/database/relationaldb/models/docs.py
+++ b/cognitive_architecture/database/relationaldb/models/docs.py
@ -1,3 +1,4 @@
+""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
 from sqlalchemy.orm import relationship
@ -7,6 +8,7 @@ from ..database import Base


 class DocsModel(Base):
+    """ Docs model"""
    __tablename__ = "docs"

    id = Column(String, primary_key=True)
--- a/cognitive_architecture/database/relationaldb/models/memory.py
+++ b/cognitive_architecture/database/relationaldb/models/memory.py
@ -1,3 +1,4 @@
+""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@ -5,6 +6,7 @@ from ..database import Base


 class MemoryModel(Base):
+    """ Memory model"""
    __tablename__ = "memories"

    id = Column(String, primary_key=True)
--- a/cognitive_architecture/database/relationaldb/models/metadatas.py
+++ b/cognitive_architecture/database/relationaldb/models/metadatas.py
@ -1,4 +1,5 @@
 # metadata.py
+""" MetaData model """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@ -8,6 +9,7 @@ from ..database import Base


 class MetaDatas(Base):
+    """ MetaData model"""
    __tablename__ = "metadatas"

    id = Column(String, primary_key=True)
--- a/cognitive_architecture/database/relationaldb/models/operation.py
+++ b/cognitive_architecture/database/relationaldb/models/operation.py
@ -1,4 +1,5 @@
 # operation.py
+""" Operation model """
 from datetime import datetime
 from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@ -8,6 +9,7 @@ from ..database import Base


 class Operation(Base):
+    """ Operation model"""
    __tablename__ = "operations"

    id = Column(String, primary_key=True)
--- a/cognitive_architecture/database/relationaldb/models/sessions.py
+++ b/cognitive_architecture/database/relationaldb/models/sessions.py
@ -1,4 +1,5 @@
 # session.py
+""" Session model """
 from datetime import datetime
 from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
@ -9,6 +10,7 @@ from ..database import Base


 class Session(Base):
+    """ Session model"""
    __tablename__ = "sessions"

    id = Column(String, primary_key=True)
--- a/cognitive_architecture/database/relationaldb/models/user.py
+++ b/cognitive_architecture/database/relationaldb/models/user.py
@ -1,4 +1,5 @@
 # user.py
+""" User model """
 from datetime import datetime
 from sqlalchemy import Column, String, DateTime
 from sqlalchemy.orm import relationship
@ -14,6 +15,7 @@ from ..database import Base


 class User(Base):
+    """ User model"""
    __tablename__ = "users"

    id = Column(String, primary_key=True, index=True)
--- a/cognitive_architecture/fetch_secret.py
+++ b/cognitive_architecture/fetch_secret.py
@ -12,12 +12,11 @@ parent_dir = os.path.dirname(current_dir)
 # Add the parent directory to sys.path
 sys.path.insert(0, parent_dir)

-# API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true"
-
 environment = os.getenv("AWS_ENV", "dev")


-def fetch_secret(secret_name, region_name, env_file_path):
+def fetch_secret(secret_name:str, region_name:str, env_file_path:str):
+    """Fetch the secret from AWS Secrets Manager and write it to the .env file."""
    print("Initializing session")
    session = boto3.session.Session()
    print("Session initialized")
@ -28,20 +27,19 @@ def fetch_secret(secret_name, region_name, env_file_path):
        response = client.get_secret_value(SecretId=secret_name)
    except Exception as e:
        print(f"Error retrieving secret: {e}")
-        return None
+        return f"Error retrieving secret: {e}"

    if "SecretString" in response:
        secret = response["SecretString"]
    else:
        secret = response["SecretBinary"]

+    with open(env_file_path, "w") as env_file:
+        env_file.write(secret)
+        print("Secrets are added to the .env file.")
+
    if os.path.exists(env_file_path):
        print(f"The .env file is located at: {env_file_path}")
-
-        with open(env_file_path, "w") as env_file:
-            env_file.write(secret)
-            print("Secrets are added to the .env file.")
-
        load_dotenv()
        print("The .env file is loaded.")
    else:
--- a/cognitive_architecture/llm/queries.py
+++ b/cognitive_architecture/llm/queries.py
@ -1,38 +1,38 @@
+""" This module contains the functions that are used to query the language model. """
 import os
-
-from ..shared.data_models import Node, Edge, KnowledgeGraph, GraphQLQuery, MemorySummary
-from ..config import Config
 import instructor
 from openai import OpenAI
+import logging
+from ..shared.data_models import  KnowledgeGraph,  MemorySummary
+from ..config import Config
+
+

 config = Config()
 config.load()

-print(config.model)
-print(config.openai_key)
-
 OPENAI_API_KEY = config.openai_key

 aclient = instructor.patch(OpenAI())

-import logging
-

 # Function to read query prompts from files
 def read_query_prompt(filename):
+    """Read a query prompt from a file."""
    try:
        with open(filename, "r") as file:
            return file.read()
    except FileNotFoundError:
-        logging.info(f"Error: File not found. Attempted to read: {filename}")
-        logging.info(f"Current working directory: {os.getcwd()}")
+        logging.info(f"Error: File not found. Attempted to read: %s {filename}")
+        logging.info(f"Current working directory: %s {os.getcwd()}")
        return None
    except Exception as e:
-        logging.info(f"An error occurred: {e}")
+        logging.info(f"An error occurred: %s {e}")
        return None


 def generate_graph(input) -> KnowledgeGraph:
+    """Generate a knowledge graph from a user query."""
    model = "gpt-4-1106-preview"
    user_prompt = f"Use the given format to extract information from the following input: {input}."
    system_prompt = read_query_prompt(
@ -57,20 +57,26 @@ def generate_graph(input) -> KnowledgeGraph:


 async def generate_summary(input) -> MemorySummary:
+    """Generate a summary from a user query."""
    out = aclient.chat.completions.create(
        model=config.model,
        messages=[
            {
                "role": "user",
-                "content": f"""Use the given format summarize and reduce the following input: {input}. """,
+                "content": f"""Use the given format summarize 
+                and reduce the following input: {input}. """,
            },
            {
                "role": "system",
                "content": """You are a top-tier algorithm
-                designed for summarizing existing knowledge graphs in structured formats based on a knowledge graph.
+                designed for summarizing existing knowledge 
+                graphs in structured formats based on a knowledge graph.
                ## 1. Strict Compliance
-                Adhere to the rules strictly. Non-compliance will result in termination.
-                ## 2. Don't forget your main goal is to reduce the number of nodes in the knowledge graph while preserving the information contained in it.""",
+                Adhere to the rules strictly. 
+                Non-compliance will result in termination.
+                ## 2. Don't forget your main goal 
+                is to reduce the number of nodes in the knowledge graph 
+                while preserving the information contained in it.""",
            },
        ],
        response_model=MemorySummary,
@ -79,6 +85,7 @@ async def generate_summary(input) -> MemorySummary:


 def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph:
+    """Generate a knowledge graph from a user query."""
    system_prompt = read_query_prompt(
        "cognitive_architecture/llm/prompts/generate_graph_prompt.txt"
    )
@ -87,7 +94,8 @@ def user_query_to_edges_and_nodes(input: str) -> KnowledgeGraph:
        messages=[
            {
                "role": "user",
-                "content": f"""Use the given format to extract information from the following input: {input}. """,
+                "content": f"""Use the given format to
+                 extract information from the following input: {input}. """,
            },
            {"role": "system", "content": system_prompt},
        ],
--- a/cognitive_architecture/openai_tools.py
+++ b/cognitive_architecture/openai_tools.py
@ -1,3 +1,4 @@
+"""Tools for interacting with OpenAI's GPT-3, GPT-4 API"""
 import asyncio
 import random
 import os
--- a/cognitive_architecture/shared/data_models.py
+++ b/cognitive_architecture/shared/data_models.py
@ -1,9 +1,10 @@
+"""Data models for the cognitive architecture."""
 from typing import Optional, List
-
 from pydantic import BaseModel, Field


 class Node(BaseModel):
+    """Node in a knowledge graph."""
    id: int
    description: str
    category: str
@ -14,6 +15,7 @@ class Node(BaseModel):


 class Edge(BaseModel):
+    """Edge in a knowledge graph."""
    source: int
    target: int
    description: str
@ -23,14 +25,17 @@ class Edge(BaseModel):


 class KnowledgeGraph(BaseModel):
+    """Knowledge graph."""
    nodes: List[Node] = Field(..., default_factory=list)
    edges: List[Edge] = Field(..., default_factory=list)


 class GraphQLQuery(BaseModel):
+    """GraphQL query."""
    query: str


 class MemorySummary(BaseModel):
+    """ Memory summary. """
    nodes: List[Node] = Field(..., default_factory=list)
    edges: List[Edge] = Field(..., default_factory=list)
--- a/cognitive_architecture/shared/language_processing.py
+++ b/cognitive_architecture/shared/language_processing.py
@ -1,9 +1,10 @@
+""" This module provides language processing functions for language detection and translation. """
+import logging
 import boto3
 from botocore.exceptions import BotoCoreError, ClientError
 from langdetect import detect, LangDetectException
 import iso639

-import logging

 # Basic configuration of the logging system
 logging.basicConfig(
@ -30,7 +31,7 @@ def detect_language(text):
    try:
        # Detect the language using langdetect
        detected_lang_iso639_1 = detect(trimmed_text)
-        logging.info(f"Detected ISO 639-1 code: {detected_lang_iso639_1}")
+        logging.info(f"Detected ISO 639-1 code: %s {detected_lang_iso639_1}")

        # Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2)
        if detected_lang_iso639_1 == "hr":
@ -38,9 +39,9 @@ def detect_language(text):
        return detected_lang_iso639_1

    except LangDetectException as e:
-        logging.error(f"Language detection error: {e}")
+        logging.error(f"Language detection error: %s  {e}")
    except Exception as e:
-        logging.error(f"Unexpected error: {e}")
+        logging.error(f"Unexpected error: %s  {e}")

    return -1

@ -57,8 +58,10 @@ def translate_text(

    Parameters:
    text (str): The text to be translated.
-    source_language (str): The source language code (e.g., 'sr' for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
-    target_language (str): The target language code (e.g., 'en' for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
+    source_language (str): The source language code (e.g., 'sr' for Serbian).
+     ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
+    target_language (str): The target language code (e.g., 'en' for English).
+    ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
    region_name (str): AWS region name.

    Returns:
@ -82,20 +85,9 @@ def translate_text(
        return result.get("TranslatedText", "No translation found.")

    except BotoCoreError as e:
-        logging.info(f"BotoCoreError occurred: {e}")
+        logging.info(f"BotoCoreError occurred: %s  {e}")
        return "Error with AWS Translate service configuration or request."

    except ClientError as e:
-        logging.info(f"ClientError occurred: {e}")
+        logging.info(f"ClientError occurred: %s  {e}")
        return "Error with AWS client or network issue."
-
-
-source_language = "sr"
-target_language = "en"
-text_to_translate = "Ja volim da pecam i idem na reku da šetam pored nje ponekad"
-
-translated_text = translate_text(text_to_translate, source_language, target_language)
-print(translated_text)
-
-
-# print(detect_language("Koliko krava ide u setnju?"))
--- a/cognitive_architecture/utils.py
+++ b/cognitive_architecture/utils.py
@ -1,3 +1,5 @@
+""" This module contains utility functions for the cognitive architecture. """
+
 import os
 import random
 import string
@ -13,7 +15,13 @@ from cognitive_architecture.database.relationaldb.models.user import User
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 import logging
-
+from cognitive_architecture.database.relationaldb.models.operation import Operation
+from cognitive_architecture.database.relationaldb.database_crud import (
+    session_scope,
+    add_entity,
+    update_entity,
+    fetch_job_id,
+)

 class Node:
    def __init__(self, id, description, color):
@ -72,6 +80,7 @@ def get_document_names(doc_input):


 def format_dict(d):
+    """ Format a dictionary as a string."""
    # Initialize an empty list to store formatted items
    formatted_items = []

@ -93,6 +102,7 @@ def format_dict(d):


 def append_uuid_to_variable_names(variable_mapping):
+    """ Append a UUID to the variable names to make them unique."""
    unique_variable_mapping = {}
    for original_name in variable_mapping.values():
        unique_name = f"{original_name}_{uuid.uuid4().hex}"
@ -102,6 +112,7 @@ def append_uuid_to_variable_names(variable_mapping):

 # Update the functions to use the unique variable names
 def create_node_variable_mapping(nodes):
+    """ Create a mapping of node identifiers to unique variable names."""
    mapping = {}
    for node in nodes:
        variable_name = f"{node['category']}{node['id']}".lower()
@ -110,6 +121,7 @@ def create_node_variable_mapping(nodes):


 def create_edge_variable_mapping(edges):
+    """ Create a mapping of edge identifiers to unique variable names."""
    mapping = {}
    for edge in edges:
        # Construct a unique identifier for the edge
@ -124,17 +136,10 @@ def generate_letter_uuid(length=8):
    return "".join(random.choice(letters) for _ in range(length))


-from cognitive_architecture.database.relationaldb.models.operation import Operation
-from cognitive_architecture.database.relationaldb.database_crud import (
-    session_scope,
-    add_entity,
-    update_entity,
-    fetch_job_id,
-)
-


 async def get_vectordb_namespace(session: AsyncSession, user_id: str):
+    """ Asynchronously retrieves the latest memory names for a given user."""
    try:
        result = await session.execute(
            select(MemoryModel.memory_name)
@ -151,6 +156,7 @@ async def get_vectordb_namespace(session: AsyncSession, user_id: str):


 async def get_vectordb_document_name(session: AsyncSession, user_id: str):
+    """ Asynchronously retrieves the latest memory names for a given user."""
    try:
        result = await session.execute(
            select(DocsModel.doc_name)
@ -167,6 +173,7 @@ async def get_vectordb_document_name(session: AsyncSession, user_id: str):


 async def get_model_id_name(session: AsyncSession, id: str):
+    """ Asynchronously retrieves the latest memory names for a given user."""
    try:
        result = await session.execute(
            select(MemoryModel.memory_name)
@ -236,12 +243,6 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st

    return memory_details, docs

-    # except Exception as e:
-    #     # Handle the exception as needed
-    #     print(f"An error occurred: {e}")
-    #     return None
-
-
 async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
    """
    Asynchronously retrieves memory names associated with a specific document ID.