Rename the build package name and refactored the local runner

2024-01-25 16:46:39 +01:00 · 2024-01-25 16:46:39 +01:00 · 54772d038a
commit 54772d038a
parent 52058c0696
5 changed files with 91 additions and 98 deletions
--- a/README.md
+++ b/README.md
@ -15,18 +15,18 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready
 <p align="left"><i>Open-source framework for building and testing RAGs and Cognitive Architectures, designed for accuracy, transparency, and control.</i></p>

 <p align="left">
-<a href="https://github.com/topoteretes/PromethAI-Memory/fork" target="blank">
-<img src="https://img.shields.io/github/forks/topoteretes/PromethAI-Memory?style=for-the-badge" alt="promethAI forks"/>
+<a href="https://github.com/topoteretes/cognee/fork" target="blank">
+<img src="https://img.shields.io/github/forks/topoteretes/cognee?style=for-the-badge" alt="cognee forks"/>
 </a>

-<a href="https://github.com/topoteretes/PromethAI-Backend/stargazers" target="blank">
-<img src="https://img.shields.io/github/stars/topoteretes/PromethAI-Memory?style=for-the-badge" alt="promethAI stars"/>
+<a href="https://github.com/topoteretes/cognee/stargazers" target="blank">
+<img src="https://img.shields.io/github/stars/topoteretes/cognee?style=for-the-badge" alt="cognee stars"/>
 </a>
-<a href="https://github.com/topoteretes/PromethAI-Backend/pulls" target="blank">
-<img src="https://img.shields.io/github/issues-pr/topoteretes/PromethAI-Memory?style=for-the-badge" alt="promethAI pull-requests"/>
+<a href="https://github.com/topoteretes/cognee/pulls" target="blank">
+<img src="https://img.shields.io/github/issues-pr/topoteretes/cognee?style=for-the-badge" alt="cognee pull-requests"/>
 </a>
-<a href='https://github.com/topoteretes/PromethAI-Backend/releases'>
-<img src='https://img.shields.io/github/release/topoteretes/PromethAI-Memory?&label=Latest&style=for-the-badge'>
+<a href='https://github.com/topoteretes/cognee/releases'>
+<img src='https://img.shields.io/github/release/topoteretes/cognee?&label=Latest&style=for-the-badge'>
 </a>

 </p>
@ -72,40 +72,28 @@ AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready



-This repo is built to test and evolve RAG architecture, inspired by human cognitive processes, using Python. It's aims to be production ready, testable, but give great visibility in how we build RAG applications.
-
-This project is a part of the [PromethAI](https://prometh.ai/) ecosystem.
-
-The project run in iterations, from POC towards production ready code.
-The iterations are numbered from 0 to 7, with 0 being the simplest iteration and 7 being the most complex one.
-To run a specific iteration, navigate to the iteration's folder and follow the instructions in the README file.
+This repo is built to test and evolve RAG architecture, inspired by human cognitive processes, using Python. 
+It's aims to be production ready, testable, and give great visibility in how we build RAG applications.
+It runs in iterations, from POC towards production ready code.
 To read more about the approach and details on cognitive architecture, see the blog post: [AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps](https://topoteretes.notion.site/Going-beyond-Langchain-Weaviate-and-towards-a-production-ready-modern-data-platform-7351d77a1eba40aab4394c24bef3a278?pvs=4)
-
-_Keep Ithaka always in your mind.
-Arriving there is what you’re destined for.
-But don’t hurry the journey at all.
-Better if it lasts for years_
+Try it on Whatsapp with one of our partners Keepi.ai by typing /save {content} followed by /query {content}


 ### Current Focus

-#### Level 4 - Dynamic Graph Memory Manager + DB + Rag Test Manager
-Scope: Use Neo4j to map the user queries into a knowledge graph based on cognitive architecture
-Blog post: Soon!
- Dynamic Memory Manager -> store the data in N hierarchical stores
- Dynamic Graph -> map the user queries into a knowledge graph
- Classification -> classify the user queries and choose relevant graph nodes
- Context manager -> generate context for LLM to process containing Semantic, Episodic and Vector store data
- Postgres DB to store metadata 
- Docker
- API 
+#### Level 5 - Integration to keepi.ai and other apps
+Scope: Use Neo4j to map user preferences into a graph structure consisting of semantic, episodic, and procedural memory. 
+Fetch information and store information and files on Whatsapp chatbot using Keepi.ai
+Use the graph to answer user queries and store new information in the graph.
+
+

 ![Image](https://github.com/topoteretes/PromethAI-Memory/blob/main/level_4/User_graph.png)


 ### Installation

-### Run the level 4
+### Run cognee

 Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed.

@ -115,28 +103,13 @@ Copy the .env.example to .env and fill in the variables

 ```docker compose up   ```

-Run 
+And send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000

-``` python main.py ``` 

 If you are running natively, change ENVIRONMENT to local in the .env file
 If you are running in docker, change ENVIRONMENT to postgres in the .env file


-Run
-
-``` python main.py ``` 
-
-Or run
-    
-    ``` docker compose up promethai-mem ```
-
-And send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000
-
-
-
-
-



--- a/cognitive_architecture/database/graph_database/graph.py
+++ b/cognitive_architecture/database/graph_database/graph.py
@ -602,7 +602,7 @@ class Neo4jGraphDB(AbstractGraphDB):

        return cypher_query

-    def construct_merge_query(self, user_id: str, relationship_type: str, memory_type: str,
+    def run_merge_query(self, user_id: str,  memory_type: str,
                              similarity_threshold: float) -> str:
        """
        Constructs a Cypher query to merge nodes in a Neo4j database based on a similarity threshold.
@ -614,26 +614,50 @@ class Neo4jGraphDB(AbstractGraphDB):

        Parameters:
        user_id (str): The ID of the user whose related nodes are to be merged.
-        relationship_type (str): The type of relationship between 'Memory' nodes and the nodes to be merged.
        memory_type (str): The memory type property of the nodes to be merged.
        similarity_threshold (float): The threshold above which nodes will be considered similar enough to be merged.

        Returns:
        str: A Cypher query string that can be executed in a Neo4j session.
        """
+        if memory_type == 'SemanticMemory':
+            relationship_base = 'HAS_SEMANTIC_MEMORY'
+            relationship_type = 'HAS_KNOWLEDGE'
+            memory_label = 'semantic'
+        elif memory_type == 'EpisodicMemory':
+            relationship_base = 'HAS_EPISODIC_MEMORY'
+            # relationship_type = 'EPISODIC_MEMORY'
+            relationship_type = 'HAS_EVENT'
+            memory_label='episodic'
+        elif memory_type == 'Buffer':
+            relationship_base = 'HAS_BUFFER_MEMORY'
+            relationship_type = 'CURRENTLY_HOLDING'
+            memory_label= 'buffer'
+
+
+        query= f"""MATCH (u:User {{userId: '{user_id}'}})-[:{relationship_base}]->(sm:{memory_type})
+                    MATCH (sm)-[:{relationship_type}]->(n)
+                    RETURN labels(n) AS NodeType, collect(n) AS Nodes
+                    """
+
+        node_results = self.query(query)
+
+        node_types = [record['NodeType'] for record in node_results]
+
+        for node in node_types:
            query = f"""
-            MATCH (u:User {{userId: '{user_id}'}})-[:HAS_MEMORY]->(m:Memory)
-            MATCH (m)-[r:HAS_KNOWLEDGE]->(n1), (m)-[r2:HAS_KNOWLEDGE]->(n2)
+                MATCH (u:User {{userId: "{user_id}"}})-[:{relationship_base}]->(m:{memory_type}) 
+                 MATCH (m)-[:{relationship_type}]->(n1:{node[0]} {{memory_type: "{memory_label}"}}),
+                       (m)-[:{relationship_type}]->(n2:{node[0]} {{memory_type: "{memory_label}"}})
                 WHERE id(n1) < id(n2) AND
-                  {relationship_type} = TYPE(r) AND 
-                  {relationship_type} = TYPE(r2) AND 
-                  n1.memory_type = {memory_type} AND 
-                  n2.memory_type = {memory_type} AND 
-                  apoc.text.levenshteinSimilarity(toLower(n1.description), toLower(n2.description)) > {str(similarity_threshold)}
+                       apoc.text.levenshteinSimilarity(toLower(n1.description), toLower(n2.description)) > {similarity_threshold}
                 WITH n1, n2
+                 LIMIT 1
                CALL apoc.refactor.mergeNodes([n1, n2], {{mergeRels: true}}) YIELD node
                 RETURN node
            """
+            self.query(query)
+            self.close()
        return query

    def get_namespaces_by_document_category(self, user_id: str, category: str):
@ -788,7 +812,7 @@ class Neo4jGraphDB(AbstractGraphDB):



-from networkx_graph import NetworkXGraphDB
+from .networkx_graph import NetworkXGraphDB
 class GraphDBFactory:
    def create_graph_db(self, db_type, **kwargs):
        if db_type == 'neo4j':
--- a/cognitive_architecture/llm/queries.py
+++ b/cognitive_architecture/llm/queries.py
@ -17,18 +17,27 @@ OPENAI_API_KEY = config.openai_key
 aclient = instructor.patch(OpenAI())

 load_dotenv()
+import logging


 # Function to read query prompts from files
 def read_query_prompt(filename):
+    try:
        with open(filename, 'r') as file:
            return file.read()
+    except FileNotFoundError:
+        logging.info(f"Error: File not found. Attempted to read: {filename}")
+        logging.info(f"Current working directory: {os.getcwd()}")
+        return None
+    except Exception as e:
+        logging.info(f"An error occurred: {e}")
+        return None


 def generate_graph(input) -> KnowledgeGraph:
-    model = "gpt-4-1106-preview"  # Define the model here
+    model = "gpt-4-1106-preview"
    user_prompt = f"Use the given format to extract information from the following input: {input}."
-    system_prompt = read_query_prompt('prompts/generate_graph_prompt.txt')
+    system_prompt = read_query_prompt('cognitive_architecture/llm/prompts/generate_graph_prompt.txt')

    out = aclient.chat.completions.create(
        model=model,
@ -50,7 +59,7 @@ def generate_graph(input) -> KnowledgeGraph:

 async def generate_summary(input) -> MemorySummary:
    out =  aclient.chat.completions.create(
-        model="gpt-4-1106-preview",
+        model=config.model,
        messages=[
            {
                "role": "user",
@ -69,7 +78,7 @@ async def generate_summary(input) -> MemorySummary:


 def user_query_to_edges_and_nodes( input: str) ->KnowledgeGraph:
-    system_prompt = read_query_prompt('prompts/generate_graph_prompt.txt')
+    system_prompt = read_query_prompt('cognitive_architecture/llm/prompts/generate_graph_prompt.txt')
    return aclient.chat.completions.create(
        model=config.model,
        messages=[
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -21,7 +21,6 @@ services:
    volumes:
      - "./:/app"
      - ./.data:/app/.data
-
    environment:
      - HOST=0.0.0.0
      - ENVIRONMENT=local
@ -39,8 +38,6 @@ services:
        limits:
          cpus: "4.0"
          memory: 8GB
-
-
  postgres:
    image: postgres
    container_name: postgres
@ -53,27 +50,6 @@ services:
      - cognee_backend
    ports:
      - "5432:5432"
-
-#  superset:
-#    platform: linux/amd64
-#    build:
-#      context: ./superset
-#      dockerfile: Dockerfile
-#    container_name: superset
-#    environment:
-#      - ADMIN_USERNAME=admin
-#      - ADMIN_EMAIL=vasilije@topoteretes.com
-#      - ADMIN_PASSWORD=admin
-#      - POSTGRES_USER=bla
-#      - POSTGRES_PASSWORD=bla
-#      - POSTGRES_DB=bubu
-#    networks:
-#      - promethai_mem_backend
-#    ports:
-#      - '8088:8088'
-#    depends_on:
-#      - postgres
-
 networks:
  cognee_backend:
    name: cognee_backend
--- a/main.py
+++ b/main.py
@ -160,6 +160,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
        result = await memory.dynamic_method_call(dynamic_memory_class, 'add_memories',
                                                        observation=content, params=params, loader_settings=loader_settings)
        await update_entity(session, Operation, job_id, "SUCCESS")
+        return 1

 async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_input: str):

@ -181,9 +182,13 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu
        ),
    )

+    detected_language = detect_language(query_input)
+    translated_query = translate_text(query_input, detected_language, "en")
    neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, password=config.graph_database_password)
-    cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id,query_input)
+    cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id,translated_query)
    result = neo4j_graph_db.query(cypher_query)
+    neo4j_graph_db.run_merge_query(user_id=user_id, memory_type="SemanticMemory", similarity_threshold=0.8)
+    neo4j_graph_db.run_merge_query(user_id=user_id, memory_type="EpisodicMemory", similarity_threshold=0.8)
    neo4j_graph_db.close()

    await update_entity(session, Operation, job_id, "SUCCESS")
@ -628,6 +633,8 @@ async def relevance_feedback(query: str, input_type: str):
            break  # Exit the loop if a result of type bool is obtained
    return result

+
+
 async def main():
    user_id = "user_test_1_1"

@ -640,8 +647,8 @@ async def main():
        class GraphQLQuery(BaseModel):
            query: str

-        gg = await user_query_to_graph_db(session, user_id, "How does cognitive architecture work?")
-        print(gg)
+        # gg = await user_query_to_graph_db(session, user_id, "How does cognitive architecture work?")
+        # print(gg)

        # def cypher_statement_correcting( input: str) -> str:
        #     out = aclient.chat.completions.create(
@ -709,8 +716,12 @@ async def main():
        # await create_public_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
        # await add_documents_to_graph_db(session, user_id)
        #
-        # neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
-        #                               password=config.graph_database_password)
+        neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
+                                      password=config.graph_database_password)
+
+        out  = neo4j_graph_db.run_merge_query(user_id = user_id,  memory_type="SemanticMemory", similarity_threshold=0.5)
+        bb = neo4j_graph_db.query(out)
+        print(bb)

        # await attach_user_to_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")