diff --git a/README.md b/README.md index 33bd7e9f4..5afa0c6cc 100644 --- a/README.md +++ b/README.md @@ -15,18 +15,18 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready
Open-source framework for building and testing RAGs and Cognitive Architectures, designed for accuracy, transparency, and control.
@@ -72,40 +72,28 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready -This repo is built to test and evolve RAG architecture, inspired by human cognitive processes, using Python. It's aims to be production ready, testable, but give great visibility in how we build RAG applications. - -This project is a part of the [PromethAI](https://prometh.ai/) ecosystem. - -The project run in iterations, from POC towards production ready code. -The iterations are numbered from 0 to 7, with 0 being the simplest iteration and 7 being the most complex one. -To run a specific iteration, navigate to the iteration's folder and follow the instructions in the README file. +This repo is built to test and evolve RAG architecture, inspired by human cognitive processes, using Python. +It's aims to be production ready, testable, and give great visibility in how we build RAG applications. +It runs in iterations, from POC towards production ready code. To read more about the approach and details on cognitive architecture, see the blog post: [AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps](https://topoteretes.notion.site/Going-beyond-Langchain-Weaviate-and-towards-a-production-ready-modern-data-platform-7351d77a1eba40aab4394c24bef3a278?pvs=4) - -_Keep Ithaka always in your mind. -Arriving there is what you’re destined for. -But don’t hurry the journey at all. -Better if it lasts for years_ +Try it on Whatsapp with one of our partners Keepi.ai by typing /save {content} followed by /query {content} ### Current Focus -#### Level 4 - Dynamic Graph Memory Manager + DB + Rag Test Manager -Scope: Use Neo4j to map the user queries into a knowledge graph based on cognitive architecture -Blog post: Soon! -- Dynamic Memory Manager -> store the data in N hierarchical stores -- Dynamic Graph -> map the user queries into a knowledge graph -- Classification -> classify the user queries and choose relevant graph nodes -- Context manager -> generate context for LLM to process containing Semantic, Episodic and Vector store data -- Postgres DB to store metadata -- Docker -- API +#### Level 5 - Integration to keepi.ai and other apps +Scope: Use Neo4j to map user preferences into a graph structure consisting of semantic, episodic, and procedural memory. +Fetch information and store information and files on Whatsapp chatbot using Keepi.ai +Use the graph to answer user queries and store new information in the graph. + +  ### Installation -### Run the level 4 +### Run cognee Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed. @@ -115,28 +103,13 @@ Copy the .env.example to .env and fill in the variables ```docker compose up ``` -Run +And send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000 -``` python main.py ``` If you are running natively, change ENVIRONMENT to local in the .env file If you are running in docker, change ENVIRONMENT to postgres in the .env file -Run - -``` python main.py ``` - -Or run - - ``` docker compose up promethai-mem ``` - -And send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000 - - - - - diff --git a/cognitive_architecture/database/graph_database/graph.py b/cognitive_architecture/database/graph_database/graph.py index 2efd613af..6ee32d53f 100644 --- a/cognitive_architecture/database/graph_database/graph.py +++ b/cognitive_architecture/database/graph_database/graph.py @@ -602,7 +602,7 @@ class Neo4jGraphDB(AbstractGraphDB): return cypher_query - def construct_merge_query(self, user_id: str, relationship_type: str, memory_type: str, + def run_merge_query(self, user_id: str, memory_type: str, similarity_threshold: float) -> str: """ Constructs a Cypher query to merge nodes in a Neo4j database based on a similarity threshold. @@ -614,26 +614,50 @@ class Neo4jGraphDB(AbstractGraphDB): Parameters: user_id (str): The ID of the user whose related nodes are to be merged. - relationship_type (str): The type of relationship between 'Memory' nodes and the nodes to be merged. memory_type (str): The memory type property of the nodes to be merged. similarity_threshold (float): The threshold above which nodes will be considered similar enough to be merged. Returns: str: A Cypher query string that can be executed in a Neo4j session. """ - query = f""" - MATCH (u:User {{userId: '{user_id}'}})-[:HAS_MEMORY]->(m:Memory) - MATCH (m)-[r:HAS_KNOWLEDGE]->(n1), (m)-[r2:HAS_KNOWLEDGE]->(n2) - WHERE id(n1) < id(n2) AND - {relationship_type} = TYPE(r) AND - {relationship_type} = TYPE(r2) AND - n1.memory_type = {memory_type} AND - n2.memory_type = {memory_type} AND - apoc.text.levenshteinSimilarity(toLower(n1.description), toLower(n2.description)) > {str(similarity_threshold)} - WITH n1, n2 - CALL apoc.refactor.mergeNodes([n1, n2], {{mergeRels: true}}) YIELD node - RETURN node - """ + if memory_type == 'SemanticMemory': + relationship_base = 'HAS_SEMANTIC_MEMORY' + relationship_type = 'HAS_KNOWLEDGE' + memory_label = 'semantic' + elif memory_type == 'EpisodicMemory': + relationship_base = 'HAS_EPISODIC_MEMORY' + # relationship_type = 'EPISODIC_MEMORY' + relationship_type = 'HAS_EVENT' + memory_label='episodic' + elif memory_type == 'Buffer': + relationship_base = 'HAS_BUFFER_MEMORY' + relationship_type = 'CURRENTLY_HOLDING' + memory_label= 'buffer' + + + query= f"""MATCH (u:User {{userId: '{user_id}'}})-[:{relationship_base}]->(sm:{memory_type}) + MATCH (sm)-[:{relationship_type}]->(n) + RETURN labels(n) AS NodeType, collect(n) AS Nodes + """ + + node_results = self.query(query) + + node_types = [record['NodeType'] for record in node_results] + + for node in node_types: + query = f""" + MATCH (u:User {{userId: "{user_id}"}})-[:{relationship_base}]->(m:{memory_type}) + MATCH (m)-[:{relationship_type}]->(n1:{node[0]} {{memory_type: "{memory_label}"}}), + (m)-[:{relationship_type}]->(n2:{node[0]} {{memory_type: "{memory_label}"}}) + WHERE id(n1) < id(n2) AND + apoc.text.levenshteinSimilarity(toLower(n1.description), toLower(n2.description)) > {similarity_threshold} + WITH n1, n2 + LIMIT 1 + CALL apoc.refactor.mergeNodes([n1, n2], {{mergeRels: true}}) YIELD node + RETURN node + """ + self.query(query) + self.close() return query def get_namespaces_by_document_category(self, user_id: str, category: str): @@ -788,7 +812,7 @@ class Neo4jGraphDB(AbstractGraphDB): -from networkx_graph import NetworkXGraphDB +from .networkx_graph import NetworkXGraphDB class GraphDBFactory: def create_graph_db(self, db_type, **kwargs): if db_type == 'neo4j': diff --git a/cognitive_architecture/llm/queries.py b/cognitive_architecture/llm/queries.py index ac647d657..a3b16ed09 100644 --- a/cognitive_architecture/llm/queries.py +++ b/cognitive_architecture/llm/queries.py @@ -17,18 +17,27 @@ OPENAI_API_KEY = config.openai_key aclient = instructor.patch(OpenAI()) load_dotenv() +import logging # Function to read query prompts from files def read_query_prompt(filename): - with open(filename, 'r') as file: - return file.read() + try: + with open(filename, 'r') as file: + return file.read() + except FileNotFoundError: + logging.info(f"Error: File not found. Attempted to read: {filename}") + logging.info(f"Current working directory: {os.getcwd()}") + return None + except Exception as e: + logging.info(f"An error occurred: {e}") + return None def generate_graph(input) -> KnowledgeGraph: - model = "gpt-4-1106-preview" # Define the model here + model = "gpt-4-1106-preview" user_prompt = f"Use the given format to extract information from the following input: {input}." - system_prompt = read_query_prompt('prompts/generate_graph_prompt.txt') + system_prompt = read_query_prompt('cognitive_architecture/llm/prompts/generate_graph_prompt.txt') out = aclient.chat.completions.create( model=model, @@ -50,7 +59,7 @@ def generate_graph(input) -> KnowledgeGraph: async def generate_summary(input) -> MemorySummary: out = aclient.chat.completions.create( - model="gpt-4-1106-preview", + model=config.model, messages=[ { "role": "user", @@ -69,7 +78,7 @@ async def generate_summary(input) -> MemorySummary: def user_query_to_edges_and_nodes( input: str) ->KnowledgeGraph: - system_prompt = read_query_prompt('prompts/generate_graph_prompt.txt') + system_prompt = read_query_prompt('cognitive_architecture/llm/prompts/generate_graph_prompt.txt') return aclient.chat.completions.create( model=config.model, messages=[ diff --git a/docker-compose.yml b/docker-compose.yml index 8ad851f78..28b65da18 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,6 @@ services: volumes: - "./:/app" - ./.data:/app/.data - environment: - HOST=0.0.0.0 - ENVIRONMENT=local @@ -39,8 +38,6 @@ services: limits: cpus: "4.0" memory: 8GB - - postgres: image: postgres container_name: postgres @@ -53,27 +50,6 @@ services: - cognee_backend ports: - "5432:5432" - -# superset: -# platform: linux/amd64 -# build: -# context: ./superset -# dockerfile: Dockerfile -# container_name: superset -# environment: -# - ADMIN_USERNAME=admin -# - ADMIN_EMAIL=vasilije@topoteretes.com -# - ADMIN_PASSWORD=admin -# - POSTGRES_USER=bla -# - POSTGRES_PASSWORD=bla -# - POSTGRES_DB=bubu -# networks: -# - promethai_mem_backend -# ports: -# - '8088:8088' -# depends_on: -# - postgres - networks: cognee_backend: name: cognee_backend diff --git a/main.py b/main.py index 0aa2bceca..abc59520d 100644 --- a/main.py +++ b/main.py @@ -160,6 +160,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con result = await memory.dynamic_method_call(dynamic_memory_class, 'add_memories', observation=content, params=params, loader_settings=loader_settings) await update_entity(session, Operation, job_id, "SUCCESS") + return 1 async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_input: str): @@ -181,9 +182,13 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu ), ) + detected_language = detect_language(query_input) + translated_query = translate_text(query_input, detected_language, "en") neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, password=config.graph_database_password) - cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id,query_input) + cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id,translated_query) result = neo4j_graph_db.query(cypher_query) + neo4j_graph_db.run_merge_query(user_id=user_id, memory_type="SemanticMemory", similarity_threshold=0.8) + neo4j_graph_db.run_merge_query(user_id=user_id, memory_type="EpisodicMemory", similarity_threshold=0.8) neo4j_graph_db.close() await update_entity(session, Operation, job_id, "SUCCESS") @@ -628,6 +633,8 @@ async def relevance_feedback(query: str, input_type: str): break # Exit the loop if a result of type bool is obtained return result + + async def main(): user_id = "user_test_1_1" @@ -640,8 +647,8 @@ async def main(): class GraphQLQuery(BaseModel): query: str - gg = await user_query_to_graph_db(session, user_id, "How does cognitive architecture work?") - print(gg) + # gg = await user_query_to_graph_db(session, user_id, "How does cognitive architecture work?") + # print(gg) # def cypher_statement_correcting( input: str) -> str: # out = aclient.chat.completions.create( @@ -709,8 +716,12 @@ async def main(): # await create_public_memory(user_id=user_id, labels=['sr'], topic="PublicMemory") # await add_documents_to_graph_db(session, user_id) # - # neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, - # password=config.graph_database_password) + neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, + password=config.graph_database_password) + + out = neo4j_graph_db.run_merge_query(user_id = user_id, memory_type="SemanticMemory", similarity_threshold=0.5) + bb = neo4j_graph_db.query(out) + print(bb) # await attach_user_to_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")