diff --git a/.env.template b/.env.template index 5dbb7eab3..e220517bb 100644 --- a/.env.template +++ b/.env.template @@ -7,7 +7,6 @@ POSTGRES_PASSWORD = bla POSTGRES_DB = bubu POSTGRES_HOST = localhost POSTGRES_HOST_DOCKER = postgres -SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B COG_ARCH_DIR = cognitive_architecture GRAPH_DB_URL = GRAPH_DB_PW = diff --git a/cognitive_architecture/database/graph_database/graph.py b/cognitive_architecture/database/graph_database/graph.py index 98f16bb28..2efd613af 100644 --- a/cognitive_architecture/database/graph_database/graph.py +++ b/cognitive_architecture/database/graph_database/graph.py @@ -602,6 +602,40 @@ class Neo4jGraphDB(AbstractGraphDB): return cypher_query + def construct_merge_query(self, user_id: str, relationship_type: str, memory_type: str, + similarity_threshold: float) -> str: + """ + Constructs a Cypher query to merge nodes in a Neo4j database based on a similarity threshold. + + This method creates a Cypher query that finds pairs of nodes with a specified memory type + connected via a specified relationship type to the same 'Memory' node. If the Levenshtein + similarity between the 'description' properties of these nodes is greater than the + specified threshold, the nodes are merged using the apoc.refactor.mergeNodes procedure. + + Parameters: + user_id (str): The ID of the user whose related nodes are to be merged. + relationship_type (str): The type of relationship between 'Memory' nodes and the nodes to be merged. + memory_type (str): The memory type property of the nodes to be merged. + similarity_threshold (float): The threshold above which nodes will be considered similar enough to be merged. + + Returns: + str: A Cypher query string that can be executed in a Neo4j session. + """ + query = f""" + MATCH (u:User {{userId: '{user_id}'}})-[:HAS_MEMORY]->(m:Memory) + MATCH (m)-[r:HAS_KNOWLEDGE]->(n1), (m)-[r2:HAS_KNOWLEDGE]->(n2) + WHERE id(n1) < id(n2) AND + {relationship_type} = TYPE(r) AND + {relationship_type} = TYPE(r2) AND + n1.memory_type = {memory_type} AND + n2.memory_type = {memory_type} AND + apoc.text.levenshteinSimilarity(toLower(n1.description), toLower(n2.description)) > {str(similarity_threshold)} + WITH n1, n2 + CALL apoc.refactor.mergeNodes([n1, n2], {{mergeRels: true}}) YIELD node + RETURN node + """ + return query + def get_namespaces_by_document_category(self, user_id: str, category: str): """ Retrieve a list of Vectordb namespaces for documents of a specified category associated with a given user. diff --git a/docker-compose.yml b/docker-compose.yml index 8e10b99eb..8ad851f78 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,11 +11,11 @@ services: - NEO4J_AUTH=neo4j/pleaseletmein - NEO4J_PLUGINS=["apoc"] networks: - - promethai_mem_backend + - cognee_backend - promethai_mem: + cognee: networks: - - promethai_mem_backend + - cognee_backend build: context: ./ volumes: @@ -24,6 +24,7 @@ services: environment: - HOST=0.0.0.0 + - ENVIRONMENT=local profiles: ["exclude-from-up"] ports: - 8000:8000 @@ -49,7 +50,7 @@ services: - POSTGRES_PASSWORD=bla - POSTGRES_DB=bubu networks: - - promethai_mem_backend + - cognee_backend ports: - "5432:5432" @@ -74,6 +75,6 @@ services: # - postgres networks: - promethai_mem_backend: - name: promethai_mem_backend + cognee_backend: + name: cognee_backend diff --git a/entrypoint.sh b/entrypoint.sh index 300813101..7fd150b85 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -2,10 +2,15 @@ export ENVIRONMENT # Run Python scripts with error handling echo "Running fetch_secret.py" -python cognitive_architecture/fetch_secret.py -if [ $? -ne 0 ]; then - echo "Error: fetch_secret.py failed" - exit 1 +if [ "$ENVIRONMENT" != "local" ]; then + echo "Running fetch_secret.py" + python cognitive_architecture/fetch_secret.py + if [ $? -ne 0 ]; then + echo "Error: fetch_secret.py failed" + exit 1 + fi +else + echo "ENVIRONMENT ($ENVIRONMENT) is active, skipping fetch_secret.py" fi echo "Running create_database.py" diff --git a/iterations/level_3/vectordb/basevectordb.py b/iterations/level_3/vectordb/basevectordb.py index 81f4f7618..4fde7675e 100644 --- a/iterations/level_3/vectordb/basevectordb.py +++ b/iterations/level_3/vectordb/basevectordb.py @@ -276,7 +276,7 @@ class BaseMemory: n_of_observations: Optional[int] = 2, ): logging.info(namespace) - logging.info("The search type is %", search_type) + logging.info("The search type is %", str(search_type)) logging.info(params) logging.info(observation) diff --git a/iterations/level_3/vectordb/vectordb.py b/iterations/level_3/vectordb/vectordb.py index 0cc66066b..1432564bc 100644 --- a/iterations/level_3/vectordb/vectordb.py +++ b/iterations/level_3/vectordb/vectordb.py @@ -185,7 +185,7 @@ class WeaviateVectorDB(VectorDB): client = self.init_weaviate(namespace =self.namespace) if search_type is None: search_type = 'hybrid' - logging.info("The search type is 2 %", search_type) + logging.info("The search type is %s", str(search_type)) if not namespace: namespace = self.namespace diff --git a/iterations/level_3/vectorstore_manager.py b/iterations/level_3/vectorstore_manager.py index 217318b7e..67cd7a210 100644 --- a/iterations/level_3/vectorstore_manager.py +++ b/iterations/level_3/vectorstore_manager.py @@ -340,7 +340,7 @@ class Memory: ) async def add_dynamic_memory_class(self, class_name: str, namespace: str): - logging.info("Here is the memory id %s", self.memory_id[0]) + logging.info("Here is the memory id %s", str(self.memory_id[0])) new_memory_class = DynamicBaseMemory( class_name, self.user_id, diff --git a/pyproject.toml b/pyproject.toml index 84eeca667..c1ba66029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [tool.poetry] -name = "PromethAI_memory" +name = "cognee" version = "0.1.0" -description = "PromethAI cognitive architecture is a library for enriching LLM context by managing AI cognitive architecture" +description = "Cognee cognitive architecture is a library for enriching LLM context with a semantic layer" authors = ["Vasilije Markovic"] readme = "README.md" license = "Apache-2.0" -homepage = "https://github.com/topoteretes/PromethAI-Memory" -repository = "https://www.prometh.ai" +homepage = "https://github.com/topoteretes/cognee" +repository = "https://www.cognee.ai" classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", @@ -50,7 +50,6 @@ dash = "^2.14.0" unstructured = {extras = ["pdf"], version = "^0.10.23"} sentence-transformers = "2.2.2" torch = "2.0.*" -segment-analytics-python = "^2.2.3" pdf2image = "^1.16.3" instructor = "^0.3.4" networkx = "^3.2.1"