diff --git a/.gitignore b/.gitignore index 4d1265323..d28c68f55 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,4 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +.vscode/ diff --git a/Dockerfile b/Dockerfile index f8e1163b4..f0642ded9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,15 @@ - FROM python:3.11 # Set build argument +ARG DEBUG ARG API_ENABLED # Set environment variable based on the build argument -ENV API_ENABLED=${API_ENABLED} \ - PIP_NO_CACHE_DIR=true +ENV DEBUG=${DEBUG} +ENV API_ENABLED=${API_ENABLED} +ENV PIP_NO_CACHE_DIR=true ENV PATH="${PATH}:/root/.poetry/bin" + RUN pip install poetry WORKDIR /app @@ -15,6 +17,7 @@ COPY pyproject.toml poetry.lock /app/ # Install the dependencies RUN poetry config virtualenvs.create false && \ + poetry lock --no-update && \ poetry install --no-root --no-dev RUN apt-get update -q && \ @@ -37,15 +40,13 @@ RUN apt-get update -q && \ /tmp/* \ /var/tmp/* - - WORKDIR /app + COPY cognitive_architecture/ /app/cognitive_architecture COPY main.py /app COPY api.py /app - COPY entrypoint.sh /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh -ENTRYPOINT ["/app/entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/README.md b/README.md index f55db2149..e89ef71f0 100644 --- a/README.md +++ b/README.md @@ -2,107 +2,114 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps - - -

- - promethAI logo +

+ + Cognee logo - -

-

Open-source framework for building and testing RAGs and Cognitive Architectures, designed for accuracy, transparency, and control.

- -

- -cognee forks - - - -cognee stars - - -cognee pull-requests - - - - - +

+ Open-source framework for building and testing RAGs and Cognitive Architectures, designed for accuracy, transparency, and control.

-[//]: # (

Follow PromethAI

) - -[//]: # (

) - -[//]: # () - -[//]: # (Follow _promethAI) - -[//]: # () - -[//]: # (

) - -[//]: # () - -[//]: # (

) - -[//]: # () - -[//]: # (

) - - -

Share cognee Repository

- -

- - -Follow _promethAI -Share on Telegram - -Share on Reddit - Buy Me A Coffee +

+ + cognee forks + + + cognee stars + + + cognee pull-requests + + + cognee releases>
+  </a>
+</p>
 
+<p>
+  <b>Share cognee Repository</b>
+</p>
+<p>
+  <a href= + Follow Cognee + + + Share on Telegram + + + + + + Share on Reddit + + + + + + Buy Me A Coffee +


- - [Star us on Github!](https://www.github.com/topoteretes/cognee) Jump into the world of RAG architecture, inspired by human cognitive processes, using Python. -[cognee](www.cognee.ai) runs in iterations, from POC towards production ready code. +Cognee runs in iterations, from POC towards production ready code. -To read more about the approach and details on cognitive architecture, see the blog post: [AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps](https://topoteretes.notion.site/Going-beyond-Langchain-Weaviate-and-towards-a-production-ready-modern-data-platform-7351d77a1eba40aab4394c24bef3a278?pvs=4) +To read more about the approach and details on cognitive architecture, see the blog post: AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps -Try it yourself on Whatsapp with one of our [partners](www.keepi.ai) by typing /save _content_ followed by /query _content_ - - - -### Get Started in Moments - -Running [cognee](www.cognee.ai) is a breeze. Simply run `cp env.example .env` and `docker compose up cognee` in your terminal. -Send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000 - - - -### Current Focus - -#### Integration to keepi.ai and other apps -Uses Neo4j to map user preferences into a graph structure consisting of semantic, episodic, and procedural memory. - -Fetches information and stores information and files on Whatsapp chatbot using [keepi.ai](www.keepi.ai) - -Uses the graph to answer user queries and store new information in the graph. - - -### Architecture - -![Image](https://github.com/topoteretes/PromethAI-Memory/blob/main/assets/img.png) +Try it yourself on Whatsapp with one of our partners by typing `/save {content you want to save}` followed by `/query {knowledge you saved previously}` + +## Getting started + +In order to run cognee you need to have Docker installed on your machine. + +Run Cognee in a couple of steps: +- Run `cp .env.template .env` in your terminal and set all the environment variables +- Run `docker compose up` in order to start graph and relational databases +- Run `docker compose up cognee` in order start Cognee + + + +## Debugging +In order to run Cognee with debugger attached you need to build the Cognee image with the `DEBUG` flag set to true. + +- `docker compose build cognee --no-cache --build-arg DEBUG=true` +- `docker compose up cognee` + +### Visual Studio Code debugger + +Add the following configuration to VSCode `Run and Debug` configurations array: +```json + { + "name": "Attach (remote debug)", + "type": "python", + "request": "attach", + "port": 5678, + "host": "127.0.0.1", + "pathMappings": [{ + "localRoot": "${workspaceFolder}", + "remoteRoot": "." + }] + } +``` + +It should look like this: + + + +## Current Focus + +### Integration with keepi.ai and other apps +- Cognee uses Neo4j graph database to map user data into a graph structure consisting of semantic, episodic, and procedural memory. +- Stores data and files through the WhatsApp chatbot keepi.ai +- Uses the graph to answer user queries and store new information in the graph. +## Architecture ### How Cognee Enhances Your Contextual Memory @@ -112,15 +119,6 @@ Our framework for the OpenAI, Graph (Neo4j) and Vector (Weaviate) databases intr - Document Topology: Structure and store documents in public and private domains. - Personalized Context: Provide a context object to the LLM for a better response. +
- - - - - - - - - - - +![Image](assets/architecture.png) diff --git a/api.py b/api.py index 48a7f6c21..c499702f6 100644 --- a/api.py +++ b/api.py @@ -15,6 +15,9 @@ from cognitive_architecture.vectorstore_manager import Memory from dotenv import load_dotenv from main import add_documents_to_graph_db, user_context_enrichment from cognitive_architecture.config import Config +from fastapi import Depends + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") # Set up logging logging.basicConfig( @@ -25,7 +28,10 @@ logging.basicConfig( logger = logging.getLogger(__name__) load_dotenv() -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + +config = Config() +config.load() + app = FastAPI(debug=True) # # from auth.cognito.JWTBearer import JWTBearer @@ -33,31 +39,12 @@ app = FastAPI(debug=True) # # auth = JWTBearer(jwks) -from fastapi import Depends - -config = Config() -config.load() - -@app.get( - "/", -) -""" -Root endpoint that returns a welcome message. -""" -async def root(): -class ImageResponse(BaseModel): - success: bool - message: str - - -@app.get( - "/", -) +@app.get("/") async def root(): """ Root endpoint that returns a welcome message. """ - return {"message": "Hello, World, I am alive!"} + return { "message": "Hello, World, I am alive!" } @app.get("/health") @@ -234,10 +221,11 @@ async def drop_db(payload: Payload): else: pass - from cognitive_architecture.database.create_database_tst import drop_database, create_admin_engine + from cognitive_architecture.database.create_database import drop_database, create_admin_engine engine = create_admin_engine(username, password, host, database_name) - drop_database(engine) + connection = engine.raw_connection() + drop_database(connection, database_name) return JSONResponse(content={"response": "DB dropped"}, status_code=200) else: @@ -249,10 +237,11 @@ async def drop_db(payload: Payload): else: pass - from cognitive_architecture.database.create_database_tst import create_database, create_admin_engine + from cognitive_architecture.database.create_database import create_database, create_admin_engine engine = create_admin_engine(username, password, host, database_name) - create_database(engine) + connection = engine.raw_connection() + create_database(connection, database_name) return JSONResponse(content={"response": " DB drop"}, status_code=200) diff --git a/assets/img.png b/assets/architecture.png similarity index 100% rename from assets/img.png rename to assets/architecture.png diff --git a/assets/cognee-logo.png b/assets/cognee-logo.png new file mode 100644 index 000000000..d02a831fb Binary files /dev/null and b/assets/cognee-logo.png differ diff --git a/assets/User_graph.png b/assets/memory-graph.png similarity index 100% rename from assets/User_graph.png rename to assets/memory-graph.png diff --git a/assets/vscode-debug-config.png b/assets/vscode-debug-config.png new file mode 100644 index 000000000..21f54400a Binary files /dev/null and b/assets/vscode-debug-config.png differ diff --git a/cognitive_architecture/config.py b/cognitive_architecture/config.py index 22e0be9ae..3892383f5 100644 --- a/cognitive_architecture/config.py +++ b/cognitive_architecture/config.py @@ -1,6 +1,7 @@ import os import json import configparser +import logging import uuid from typing import Optional, List, Dict, Any from dataclasses import dataclass, field diff --git a/cognitive_architecture/database/create_database.py b/cognitive_architecture/database/create_database.py index 2d0107807..ef0ddeacc 100644 --- a/cognitive_architecture/database/create_database.py +++ b/cognitive_architecture/database/create_database.py @@ -1,110 +1,63 @@ -# this is needed to import classes from other modules -# script_dir = os.path.dirname(os.path.abspath(__file__)) -# # Get the parent directory of your script and add it to sys.path -# parent_dir = os.path.dirname(script_dir) -# sys.path.append(parent_dir) +import os +import logging +import psycopg2 +from dotenv import load_dotenv +from postgres.database import Base +from sqlalchemy import create_engine, text + from postgres.models import memory from postgres.models import metadatas from postgres.models import operation from postgres.models import sessions from postgres.models import user from postgres.models import docs -# from cognitive_architecture.config import Config -# config = Config() -# config.load() - - -from postgres.database import Base - -from sqlalchemy import create_engine, text -import psycopg2 -from dotenv import load_dotenv load_dotenv() -import os - - -import os - - -if os.environ.get('AWS_ENV') == 'prd' or os.environ.get('AWS_ENV') == 'dev': - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') -elif os.environ.get('AWS_ENV') == 'local': - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') -else: - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') - - +logger = logging.getLogger(__name__) def create_admin_engine(username, password, host, database_name): admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}" return create_engine(admin_url) - -def database_exists(username, password, host, db_name): - engine = create_admin_engine(username, password, host, db_name) - connection = engine.connect() +def database_exists(connection, db_name): query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") result = connection.execute(query).fetchone() - connection.close() - engine.dispose() return result is not None - -def create_database(username, password, host, db_name): - engine = create_admin_engine(username, password, host, db_name) - connection = engine.raw_connection() +def create_database(connection, db_name): connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = connection.cursor() cursor.execute(f"CREATE DATABASE {db_name}") cursor.close() - connection.close() - engine.dispose() - -def drop_database(username, password, host, db_name): - engine = create_admin_engine(username, password, host) - connection = engine.raw_connection() +def drop_database(connection, db_name): connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = connection.cursor() cursor.execute(f"DROP DATABASE IF EXISTS {db_name}") cursor.close() - connection.close() - engine.dispose() - print(f"Database {db_name} dropped successfully.") def create_tables(engine): - Base.metadata.create_all(bind=engine) + Base.metadata.create_all(bind = engine) if __name__ == "__main__": - # - # username = os.getenv('POSTGRES_USER') - # password = os.getenv('POSTGRES_PASSWORD') - # database_name = os.getenv('POSTGRES_DB') - # environment = os.environ.get("ENV") - # host = config.postgres_host - # username = config.postgres_user - # password = config.postgres_password - # database_name = config.postgres_db + host = os.environ.get('POSTGRES_HOST') + username = os.environ.get('POSTGRES_USER') + password = os.environ.get('POSTGRES_PASSWORD') + database_name = os.environ.get('POSTGRES_DB') engine = create_admin_engine(username, password, host, database_name) + connection = engine.connect() - print(Base.metadata.tables) + # print(Base.metadata.tables) - if not database_exists(username, password, host, database_name): - print(f"Database {database_name} does not exist. Creating...") - create_database(username, password, host, database_name) - print(f"Database {database_name} created successfully.") + if not database_exists(connection, database_name): + logger.info(f"Database {database_name} does not exist. Creating...") + create_database(connection, database_name) + logger.info(f"Database {database_name} created successfully.") + + connection.close() + engine.dispose() create_tables(engine) \ No newline at end of file diff --git a/cognitive_architecture/database/create_database_tst.py b/cognitive_architecture/database/create_database_tst.py deleted file mode 100644 index 3af3a42f5..000000000 --- a/cognitive_architecture/database/create_database_tst.py +++ /dev/null @@ -1,112 +0,0 @@ -# this is needed to import classes from other modules -# script_dir = os.path.dirname(os.path.abspath(__file__)) -# # Get the parent directory of your script and add it to sys.path -# parent_dir = os.path.dirname(script_dir) -# sys.path.append(parent_dir) -from cognitive_architecture.database.postgres.models import memory -from cognitive_architecture.database.postgres.models import metadatas -from cognitive_architecture.database.postgres.models import operation -from cognitive_architecture.database.postgres.models import sessions -from cognitive_architecture.database.postgres.models import user -from cognitive_architecture.database.postgres.models import docs -# from cognitive_architecture.config import Config -# config = Config() -# config.load() - - - -from postgres.database import Base - -from sqlalchemy import create_engine, text -import psycopg2 -from dotenv import load_dotenv -load_dotenv() -import os - - -import os - - -if os.environ.get('AWS_ENV') == 'prd' or os.environ.get('AWS_ENV') == 'dev': - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') -elif os.environ.get('AWS_ENV') == 'local': - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') -else: - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') - - - -def create_admin_engine(username, password, host, database_name): - admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}" - return create_engine(admin_url) - - -def database_exists(username, password, host, db_name): - engine = create_admin_engine(username, password, host, db_name) - connection = engine.connect() - query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") - result = connection.execute(query).fetchone() - connection.close() - engine.dispose() - return result is not None - - -def create_database(username, password, host, db_name): - engine = create_admin_engine(username, password, host, db_name) - connection = engine.raw_connection() - connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - cursor = connection.cursor() - cursor.execute(f"CREATE DATABASE {db_name}") - cursor.close() - connection.close() - engine.dispose() - - -def drop_database(username, password, host, db_name): - engine = create_admin_engine(username, password, host) - connection = engine.raw_connection() - connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - cursor = connection.cursor() - cursor.execute(f"DROP DATABASE IF EXISTS {db_name}") - cursor.close() - connection.close() - engine.dispose() - print(f"Database {db_name} dropped successfully.") - - - -def create_tables(engine): - Base.metadata.create_all(bind=engine) - -if __name__ == "__main__": - # - # username = os.getenv('POSTGRES_USER') - # password = os.getenv('POSTGRES_PASSWORD') - # database_name = os.getenv('POSTGRES_DB') - # environment = os.environ.get("ENV") - # host = config.postgres_host - # username = config.postgres_user - # password = config.postgres_password - # database_name = config.postgres_db - - engine = create_admin_engine(username, password, host, database_name) - - print(Base.metadata.tables) - - if not database_exists(username, password, host, database_name): - print(f"Database {database_name} does not exist. Creating...") - create_database(username, password, host, database_name) - print(f"Database {database_name} created successfully.") - - create_tables(engine) - - diff --git a/cognitive_architecture/fetch_secret.py b/cognitive_architecture/fetch_secret.py index b8635b009..71ae7025c 100644 --- a/cognitive_architecture/fetch_secret.py +++ b/cognitive_architecture/fetch_secret.py @@ -1,7 +1,7 @@ import os -from dotenv import load_dotenv -import os import sys +import boto3 +from dotenv import load_dotenv # Get the directory that contains your script current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -12,23 +12,19 @@ parent_dir = os.path.dirname(current_dir) # Add the parent directory to sys.path sys.path.insert(0, parent_dir) - # API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true" -import boto3 environment = os.getenv("AWS_ENV", "dev") - - def fetch_secret(secret_name, region_name, env_file_path): print("Initializing session") session = boto3.session.Session() print("Session initialized") - client = session.client(service_name="secretsmanager", region_name=region_name) + client = session.client(service_name="secretsmanager", region_name = region_name) print("Client initialized") try: - response = client.get_secret_value(SecretId=secret_name) + response = client.get_secret_value(SecretId = secret_name) except Exception as e: print(f"Error retrieving secret: {e}") return None @@ -38,52 +34,27 @@ def fetch_secret(secret_name, region_name, env_file_path): else: secret = response["SecretBinary"] - with open(env_file_path, "w") as env_file: - env_file.write(secret) - if os.path.exists(env_file_path): - print(f"The .env file is located at: {os.path.abspath(env_file_path)}") + print(f"The .env file is located at: {env_file_path}") + + with open(env_file_path, "w") as env_file: + env_file.write(secret) + print("Secrets are added to the .env file.") + load_dotenv() - PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "") - - print("LEN OF PINECONE_API_KEY", len(PINECONE_API_KEY)) + print("The .env file is loaded.") else: - print("The .env file was not found.") - return "Success in loading env files" + print(f"The .env file was not found at: {env_file_path}.") +ENV_FILE_PATH = os.path.abspath("../.env") -env_file = "../.env" -if os.path.exists(env_file): +if os.path.exists(ENV_FILE_PATH): # Load default environment variables (.env) load_dotenv() - print("cognee is running") - - + print("Cognee is already running...") else: - secrets = fetch_secret( + fetch_secret( f"promethai-{environment}-backend-secretso-promethaijs-dotenv", "eu-west-1", - "../.env", + ENV_FILE_PATH, ) - if secrets: - print(secrets) - load_dotenv() - - -# Check if "dev" is present in the task ARN -if "dev" in environment: - # Fetch the secret - secrets = fetch_secret( - f"promethai-dev-backend-secretso-promethaijs-dotenv", - "eu-west-1", - "../.env", - ) - load_dotenv() -elif "prd" in environment: - # Fetch the secret - secrets = fetch_secret( - f"promethai-prd-backend-secretso-promethaijs-dotenv", - "eu-west-1", - "../.env", - ) - load_dotenv() diff --git a/docker-compose.yml b/docker-compose.yml index 28b65da18..80e6826cd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,7 @@ services: - cognee_backend cognee: + container_name: cognee networks: - cognee_backend build: @@ -30,6 +31,7 @@ services: - 443:443 - 80:80 - 50051:50051 + - 5678:5678 depends_on: - postgres - neo4j @@ -38,6 +40,7 @@ services: limits: cpus: "4.0" memory: 8GB + postgres: image: postgres container_name: postgres @@ -50,6 +53,7 @@ services: - cognee_backend ports: - "5432:5432" + networks: cognee_backend: name: cognee_backend diff --git a/entrypoint.sh b/entrypoint.sh index f1f1f6664..9642879da 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,24 +1,35 @@ #!/bin/bash -export ENVIRONMENT -# Run Python scripts with error handling -echo "Running fetch_secret.py" +# export ENVIRONMENT + +echo $DEBUG +echo $ENVIRONMENT + if [ "$ENVIRONMENT" != "local" ]; then echo "Running fetch_secret.py" + python cognitive_architecture/fetch_secret.py + if [ $? -ne 0 ]; then echo "Error: fetch_secret.py failed" exit 1 fi else - echo "ENVIRONMENT ($ENVIRONMENT) is active, skipping fetch_secret.py" + echo '"local" environment is active, skipping fetch_secret.py' fi echo "Running create_database.py" + python cognitive_architecture/database/create_database.py if [ $? -ne 0 ]; then echo "Error: create_database.py failed" exit 1 fi -# Start Gunicorn + echo "Starting Gunicorn" -gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app + +if [ "$DEBUG" = true ]; then + echo "Waiting for the debugger to attach..." + python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app +else + gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app +fi diff --git a/main.py b/main.py index b10220674..e1e61fba7 100644 --- a/main.py +++ b/main.py @@ -183,10 +183,17 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu ) detected_language = detect_language(query_input) - translated_query = translate_text(query_input, detected_language, "en") + + if detected_language is not "en": + translated_query = translate_text(query_input, detected_language, "en") + else: + translated_query = query_input + neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, password=config.graph_database_password) - cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id,translated_query) + + cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id, translated_query) result = neo4j_graph_db.query(cypher_query) + neo4j_graph_db.run_merge_query(user_id=user_id, memory_type="SemanticMemory", similarity_threshold=0.8) neo4j_graph_db.run_merge_query(user_id=user_id, memory_type="EpisodicMemory", similarity_threshold=0.8) neo4j_graph_db.close() diff --git a/pyproject.toml b/pyproject.toml index c1ba66029..fa7d6174a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,10 +59,7 @@ neo4j = "^5.14.1" grpcio = "^1.60.0" langdetect = "^1.0.9" iso639 = "^0.1.4" - - - - +debugpy = "^1.8.0" [build-system] requires = ["poetry-core"]