From 5d415dc1a08024c54e9f68150f5b01fd22a42097 Mon Sep 17 00:00:00 2001
From: Matea Pesic <80577904+matea16@users.noreply.github.com>
Date: Sat, 10 May 2025 13:55:37 +0200
Subject: [PATCH 1/2] feat: Add Memgraph integration (#751)

<!-- .github/pull_request_template.md -->

## Description
This PR adds support for the Memgraph graph database following the
[graph database integration
guide](https://docs.cognee.ai/contributing/adding-providers/graph-db/graph-database-integration):
- Implemented `MemgraphAdapter` for interfacing with Memgraph.
- Updated `get_graph_engine.py` to return MemgraphAdapter when
appropriate.
- Added a test script:` test_memgraph.py.`
- Created a dedicated test workflow:
`.github/workflows/test_memgraph.yml.`

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Co-authored-by: Boris <boris@topoteretes.com>
---
 .github/workflows/test_memgraph.yml           |  57 ++
 .../databases/graph/get_graph_engine.py       |  12 +
 .../databases/graph/memgraph/__init__.py      |   0
 .../graph/memgraph/memgraph_adapter.py        | 687 ++++++++++++++++++
 cognee/tests/test_memgraph.py                 | 107 +++
 5 files changed, 863 insertions(+)
 create mode 100644 .github/workflows/test_memgraph.yml
 create mode 100644 cognee/infrastructure/databases/graph/memgraph/__init__.py
 create mode 100644 cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
 create mode 100644 cognee/tests/test_memgraph.py

diff --git a/.github/workflows/test_memgraph.yml b/.github/workflows/test_memgraph.yml
new file mode 100644
index 000000000..e160382f4
--- /dev/null
+++ b/.github/workflows/test_memgraph.yml
@@ -0,0 +1,57 @@
+name: test | memgraph
+
+on:
+  workflow_dispatch:
+  pull_request:
+    types: [labeled, synchronize]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  RUNTIME__LOG_LEVEL: ERROR
+
+jobs:
+  run_memgraph_integration_test:
+    name: test
+    runs-on: ubuntu-22.04
+
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - name: Check out
+        uses: actions/checkout@master
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10.x'
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1.4.1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+
+      - name: Install dependencies
+        run: poetry install -E memgraph --no-interaction
+
+      - name: Run default Memgraph
+        env:
+          ENV: 'dev'
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+          GRAPH_DATABASE_URL: ${{ secrets.MEMGRAPH_API_URL }}
+          GRAPH_DATABASE_PASSWORD: ${{ secrets.MEMGRAPH_API_KEY }}
+          GRAPH_DATABASE_USERNAME: " "
+        run: poetry run python ./cognee/tests/test_memgraph.py
diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py
index 67dc0d7c8..dcd228443 100644
--- a/cognee/infrastructure/databases/graph/get_graph_engine.py
+++ b/cognee/infrastructure/databases/graph/get_graph_engine.py
@@ -67,6 +67,18 @@ def create_graph_engine(
 
         return KuzuAdapter(db_path=graph_file_path)
 
+    elif graph_database_provider == "memgraph":
+        if not (graph_database_url and graph_database_username and graph_database_password):
+            raise EnvironmentError("Missing required Memgraph credentials.")
+
+        from .memgraph.memgraph_adapter import MemgraphAdapter
+
+        return MemgraphAdapter(
+            graph_database_url=graph_database_url,
+            graph_database_username=graph_database_username,
+            graph_database_password=graph_database_password,
+        )
+
     from .networkx.adapter import NetworkXAdapter
 
     graph_client = NetworkXAdapter(filename=graph_file_path)
diff --git a/cognee/infrastructure/databases/graph/memgraph/__init__.py b/cognee/infrastructure/databases/graph/memgraph/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py b/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
new file mode 100644
index 000000000..f3a974359
--- /dev/null
+++ b/cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py
@@ -0,0 +1,687 @@
+"""Memgraph Adapter for Graph Database"""
+
+import json
+from cognee.shared.logging_utils import get_logger, ERROR
+import asyncio
+from textwrap import dedent
+from typing import Optional, Any, List, Dict
+from contextlib import asynccontextmanager
+from uuid import UUID
+from neo4j import AsyncSession
+from neo4j import AsyncGraphDatabase
+from neo4j.exceptions import Neo4jError
+from cognee.infrastructure.engine import DataPoint
+from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
+from cognee.modules.storage.utils import JSONEncoder
+
+logger = get_logger("MemgraphAdapter", level=ERROR)
+
+class MemgraphAdapter(GraphDBInterface):
+    def __init__(
+        self,
+        graph_database_url: str,
+        graph_database_username: str,
+        graph_database_password: str,
+        driver: Optional[Any] = None,
+    ):
+        self.driver = driver or AsyncGraphDatabase.driver(
+            graph_database_url,
+            auth=(graph_database_username, graph_database_password),
+            max_connection_lifetime=120,
+        )
+
+    @asynccontextmanager
+    async def get_session(self) -> AsyncSession:
+        async with self.driver.session() as session:
+            yield session
+    
+    async def query(
+        self,
+        query: str,
+        params: Optional[Dict[str, Any]] = None,
+    ) -> List[Dict[str, Any]]:
+        try:
+            async with self.get_session() as session:
+                result = await session.run(query, params)
+                data = await result.data()
+                return data
+        except Neo4jError as error:
+            logger.error("Memgraph query error: %s", error, exc_info=True)
+            raise error
+    
+    async def has_node(self, node_id: str) -> bool:
+        results = await self.query(
+            """
+                MATCH (n)
+                WHERE n.id = $node_id
+                RETURN COUNT(n) > 0 AS node_exists
+            """,
+            {"node_id": node_id},
+        )
+        return results[0]["node_exists"] if len(results) > 0 else False
+    
+    async def add_node(self, node: DataPoint):
+        serialized_properties = self.serialize_properties(node.model_dump())
+
+        query = """
+        MERGE (node {id: $node_id})
+        ON CREATE SET node:$node_label, node += $properties, node.updated_at = timestamp()
+        ON MATCH SET node:$node_label, node += $properties, node.updated_at = timestamp()
+        RETURN ID(node) AS internal_id,node.id AS nodeId
+        """
+
+        params = {
+            "node_id": str(node.id),
+            "node_label": type(node).__name__,
+            "properties": serialized_properties,
+        }
+        return await self.query(query, params)
+
+    async def add_nodes(self, nodes: list[DataPoint]) -> None:
+        query = """
+        UNWIND $nodes AS node
+        MERGE (n {id: node.node_id})
+        ON CREATE SET n:node.label, n += node.properties, n.updated_at = timestamp()
+        ON MATCH SET n:node.label, n += node.properties, n.updated_at = timestamp()
+        RETURN ID(n) AS internal_id, n.id AS nodeId
+        """
+
+        nodes = [
+            {
+                "node_id": str(node.id),
+                "label": type(node).__name__,
+                "properties": self.serialize_properties(node.model_dump()),
+            }
+            for node in nodes
+        ]
+
+        results = await self.query(query, dict(nodes=nodes))
+        return results
+
+    async def extract_node(self, node_id: str):
+        results = await self.extract_nodes([node_id])
+
+        return results[0] if len(results) > 0 else None
+    
+    async def extract_nodes(self, node_ids: List[str]):
+        query = """
+        UNWIND $node_ids AS id
+        MATCH (node {id: id})
+        RETURN node"""
+
+        params = {"node_ids": node_ids}
+
+        results = await self.query(query, params)
+
+        return [result["node"] for result in results]
+    
+    async def delete_node(self, node_id: str):
+        sanitized_id = node_id.replace(":", "_")
+        
+        query = "MATCH (node: {{id: $node_id}}) DETACH DELETE node"
+        params = {"node_id": sanitized_id}
+
+        return await self.query(query, params)
+    
+    async def delete_nodes(self, node_ids: list[str]) -> None:
+        query = """
+        UNWIND $node_ids AS id
+        MATCH (node {id: id})
+        DETACH DELETE node"""
+
+        params = {"node_ids": node_ids}
+
+        return await self.query(query, params)
+    
+    async def has_edge(self, from_node: UUID, to_node: UUID, edge_label: str) -> bool:
+        query = """
+            MATCH (from_node)-[relationship]->(to_node)
+            WHERE from_node.id = $from_node_id AND to_node.id = $to_node_id AND type(relationship) = $edge_label
+            RETURN COUNT(relationship) > 0 AS edge_exists
+        """
+
+        params = {
+            "from_node_id": str(from_node),
+            "to_node_id": str(to_node),
+            "edge_label": edge_label,
+        }
+        
+        records = await self.query(query, params)
+        return records[0]["edge_exists"] if records else False
+    
+    async def has_edges(self, edges):
+        query = """
+            UNWIND $edges AS edge
+            MATCH (a)-[r]->(b)
+            WHERE id(a) = edge.from_node AND id(b) = edge.to_node AND type(r) = edge.relationship_name
+            RETURN edge.from_node AS from_node, edge.to_node AS to_node, edge.relationship_name AS relationship_name, count(r) > 0 AS edge_exists
+        """
+
+        try:
+            params = {
+                "edges": [
+                    {
+                        "from_node": str(edge[0]),
+                        "to_node": str(edge[1]),
+                        "relationship_name": edge[2],
+                    }
+                    for edge in edges
+                ],
+            }
+
+            results = await self.query(query, params)
+            return [result["edge_exists"] for result in results]
+        except Neo4jError as error:
+            logger.error("Memgraph query error: %s", error, exc_info=True)
+            raise error
+    
+    async def add_edge(
+        self,
+        from_node: UUID,
+        to_node: UUID,
+        relationship_name: str,
+        edge_properties: Optional[Dict[str, Any]] = None,
+    ):
+        serialized_properties = self.serialize_properties(edge_properties or {})
+
+        query = dedent(
+            f"""\
+            MATCH (from_node {{id: $from_node}}),
+                  (to_node {{id: $to_node}})
+            MERGE (from_node)-[r:{relationship_name}]->(to_node)
+            ON CREATE SET r += $properties, r.updated_at = timestamp()
+            ON MATCH SET r += $properties, r.updated_at = timestamp()
+            RETURN r
+            """
+        )
+
+        params = {
+            "from_node": str(from_node),
+            "to_node": str(to_node),
+            "relationship_name": relationship_name,
+            "properties": serialized_properties,
+        }
+
+        return await self.query(query, params)
+    
+    async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
+        query = """
+            UNWIND $edges AS edge
+            MATCH (from_node {id: edge.from_node})
+            MATCH (to_node {id: edge.to_node})
+            CALL merge.relationship(
+                from_node,
+                edge.relationship_name,
+                {
+                    source_node_id: edge.from_node,
+                    target_node_id: edge.to_node
+                },
+                edge.properties,
+                to_node, 
+                {}
+            ) YIELD rel
+            RETURN rel"""
+
+        edges = [
+            {
+                "from_node": str(edge[0]),
+                "to_node": str(edge[1]),
+                "relationship_name": edge[2],
+                "properties": {
+                    **(edge[3] if edge[3] else {}),
+                    "source_node_id": str(edge[0]),
+                    "target_node_id": str(edge[1]),
+                },
+            }
+            for edge in edges
+        ]
+
+        try:
+            results = await self.query(query, dict(edges=edges))
+            return results
+        except Neo4jError as error:
+            logger.error("Memgraph query error: %s", error, exc_info=True)
+            raise error
+    
+    async def get_edges(self, node_id: str):
+        query = """
+        MATCH (n {id: $node_id})-[r]-(m)
+        RETURN n, r, m
+        """
+
+        results = await self.query(query, dict(node_id=node_id))
+
+        return [
+            (result["n"]["id"], result["m"]["id"], {"relationship_name": result["r"][1]})
+            for result in results
+        ]
+    
+    async def get_disconnected_nodes(self) -> list[str]:
+        query = """
+        // Step 1: Collect all nodes
+        MATCH (n)
+        WITH COLLECT(n) AS nodes
+
+        // Step 2: Find all connected components
+        WITH nodes
+        CALL {
+          WITH nodes
+          UNWIND nodes AS startNode
+          MATCH path = (startNode)-[*]-(connectedNode)
+          WITH startNode, COLLECT(DISTINCT connectedNode) AS component
+          RETURN component
+        }
+
+        // Step 3: Aggregate components
+        WITH COLLECT(component) AS components
+
+        // Step 4: Identify the largest connected component
+        UNWIND components AS component
+        WITH component
+        ORDER BY SIZE(component) DESC
+        LIMIT 1
+        WITH component AS largestComponent
+
+        // Step 5: Find nodes not in the largest connected component
+        MATCH (n)
+        WHERE NOT n IN largestComponent
+        RETURN COLLECT(ID(n)) AS ids
+        """
+
+        results = await self.query(query)
+        return results[0]["ids"] if len(results) > 0 else []
+    
+    async def get_predecessors(self, node_id: str, edge_label: str = None) -> list[str]:
+        if edge_label is not None:
+            query = """
+            MATCH (node)<-[r]-(predecessor)
+            WHERE node.id = $node_id AND type(r) = $edge_label
+            RETURN predecessor
+            """
+
+            results = await self.query(
+                query,
+                dict(
+                    node_id=node_id,
+                    edge_label=edge_label,
+                ),
+            )
+
+            return [result["predecessor"] for result in results]
+        else:
+            query = """
+            MATCH (node)<-[r]-(predecessor)
+            WHERE node.id = $node_id
+            RETURN predecessor
+            """
+
+            results = await self.query(
+                query,
+                dict(
+                    node_id=node_id,
+                ),
+            )
+
+            return [result["predecessor"] for result in results]
+    
+    async def get_successors(self, node_id: str, edge_label: str = None) -> list[str]:
+        if edge_label is not None:
+            query = """
+            MATCH (node)-[r]->(successor)
+            WHERE node.id = $node_id AND type(r) = $edge_label
+            RETURN successor
+            """
+
+            results = await self.query(
+                query,
+                dict(
+                    node_id=node_id,
+                    edge_label=edge_label,
+                ),
+            )
+
+            return [result["successor"] for result in results]
+        else:
+            query = """
+            MATCH (node)-[r]->(successor)
+            WHERE node.id = $node_id
+            RETURN successor
+            """
+
+            results = await self.query(
+                query,
+                dict(
+                    node_id=node_id,
+                ),
+            )
+
+            return [result["successor"] for result in results]
+    
+    async def get_neighbours(self, node_id: str) -> List[Dict[str, Any]]:
+        predecessors, successors = await asyncio.gather(
+            self.get_predecessors(node_id), self.get_successors(node_id)
+        )
+
+        return predecessors + successors
+    
+    async def get_connections(self, node_id: UUID) -> list:
+        predecessors_query = """
+        MATCH (node)<-[relation]-(neighbour)
+        WHERE node.id = $node_id
+        RETURN neighbour, relation, node
+        """
+        successors_query = """
+        MATCH (node)-[relation]->(neighbour)
+        WHERE node.id = $node_id
+        RETURN node, relation, neighbour
+        """
+
+        predecessors, successors = await asyncio.gather(
+            self.query(predecessors_query, dict(node_id=str(node_id))),
+            self.query(successors_query, dict(node_id=str(node_id))),
+        )
+
+        connections = []
+
+        for neighbour in predecessors:
+            neighbour = neighbour["relation"]
+            connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
+
+        for neighbour in successors:
+            neighbour = neighbour["relation"]
+            connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
+
+        return connections
+   
+    async def remove_connection_to_predecessors_of(
+        self, node_ids: list[str], edge_label: str
+    ) -> None:
+        query = f"""
+        UNWIND $node_ids AS nid
+        MATCH (node {id: nid})-[r]->(predecessor)
+        WHERE type(r) = $edge_label
+        DELETE r;
+        """
+
+        params = {"node_ids": node_ids, "edge_label": edge_label}
+
+        return await self.query(query, params)
+    
+    async def remove_connection_to_successors_of(
+        self, node_ids: list[str], edge_label: str
+    ) -> None:
+        query = f"""
+        UNWIND $node_ids AS id
+        MATCH (node:`{id}`)<-[r:{edge_label}]-(successor)
+        DELETE r;
+        """
+
+        params = {"node_ids": node_ids}
+
+        return await self.query(query, params)
+    
+    async def delete_graph(self):
+        query = """MATCH (node)
+                DETACH DELETE node;"""
+
+        return await self.query(query)
+  
+    def serialize_properties(self, properties=dict()):
+        serialized_properties = {}
+
+        for property_key, property_value in properties.items():
+            if isinstance(property_value, UUID):
+                serialized_properties[property_key] = str(property_value)
+                continue
+
+            if isinstance(property_value, dict):
+                serialized_properties[property_key] = json.dumps(property_value, cls=JSONEncoder)
+                continue
+
+            serialized_properties[property_key] = property_value
+
+        return serialized_properties
+    
+    async def get_model_independent_graph_data(self):
+        query_nodes = "MATCH (n) RETURN collect(n) AS nodes"
+        nodes = await self.query(query_nodes)
+
+        query_edges = "MATCH (n)-[r]->(m) RETURN collect([n, r, m]) AS elements"
+        edges = await self.query(query_edges)
+
+        return (nodes, edges)
+    
+    async def get_graph_data(self):
+        query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
+
+        result = await self.query(query)
+
+        nodes = [
+            (
+                record["id"],
+                record["properties"],
+            )
+            for record in result
+        ]
+
+        query = """
+        MATCH (n)-[r]->(m)
+        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
+        """
+        result = await self.query(query)
+        edges = [
+            (
+                record["properties"]["source_node_id"],
+                record["properties"]["target_node_id"],
+                record["type"],
+                record["properties"],
+            )
+            for record in result
+        ]
+
+        return (nodes, edges)
+    
+    async def get_filtered_graph_data(self, attribute_filters):
+        """
+        Fetches nodes and relationships filtered by specified attribute values.
+
+        Args:
+            attribute_filters (list of dict): A list of dictionaries where keys are attributes and values are lists of values to filter on.
+                                              Example: [{"community": ["1", "2"]}]
+
+        Returns:
+            tuple: A tuple containing two lists: nodes and edges.
+        """
+        where_clauses = []
+        for attribute, values in attribute_filters[0].items():
+            values_str = ", ".join(
+                f"'{value}'" if isinstance(value, str) else str(value) for value in values
+            )
+            where_clauses.append(f"n.{attribute} IN [{values_str}]")
+
+        where_clause = " AND ".join(where_clauses)
+
+        query_nodes = f"""
+        MATCH (n)
+        WHERE {where_clause}
+        RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
+        """
+        result_nodes = await self.query(query_nodes)
+
+        nodes = [
+            (
+                record["id"],
+                record["properties"],
+            )
+            for record in result_nodes
+        ]
+
+        query_edges = f"""
+        MATCH (n)-[r]->(m)
+        WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
+        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
+        """
+        result_edges = await self.query(query_edges)
+
+        edges = [
+            (
+                record["source"],
+                record["target"],
+                record["type"],
+                record["properties"],
+            )
+            for record in result_edges
+        ]
+
+        return (nodes, edges)
+
+    async def get_node_labels_string(self):
+        node_labels_query = f"""
+        MATCH (n)
+        WITH DISTINCT labels(n) AS labelList
+        UNWIND labelList AS label
+        RETURN collect(DISTINCT label) AS labels;
+        """
+        node_labels_result = await self.query(node_labels_query)
+        node_labels = node_labels_result[0]["labels"] if node_labels_result else []
+
+        if not node_labels:
+            raise ValueError("No node labels found in the database")
+
+        node_labels_str = "[" + ", ".join(f"'{label}'" for label in node_labels) + "]"
+        return node_labels_str
+
+    async def get_relationship_labels_string(self):
+        relationship_types_query = "MATCH ()-[r]->() RETURN collect(DISTINCT type(r)) AS relationships;"
+        relationship_types_result = await self.query(relationship_types_query)
+        relationship_types = (
+            relationship_types_result[0]["relationships"] if relationship_types_result else []
+        )
+
+        if not relationship_types:
+            raise ValueError("No relationship types found in the database.")
+
+        relationship_types_undirected_str = (
+            "{"
+            + ", ".join(f"{rel}" + ": {orientation: 'UNDIRECTED'}" for rel in relationship_types)
+            + "}"
+        )
+        return relationship_types_undirected_str
+
+    async def get_graph_metrics(self, include_optional=False):
+        """For the definition of these metrics, please refer to
+        https://docs.cognee.ai/core_concepts/graph_generation/descriptive_metrics"""
+
+        try:
+            # Basic metrics
+            node_count = await self.query("MATCH (n) RETURN count(n)")
+            edge_count = await self.query("MATCH ()-[r]->() RETURN count(r)")
+            num_nodes = node_count[0][0] if node_count else 0
+            num_edges = edge_count[0][0] if edge_count else 0
+
+            # Calculate mandatory metrics
+            mandatory_metrics = {
+                "num_nodes": num_nodes,
+                "num_edges": num_edges,
+                "mean_degree": (2 * num_edges) / num_nodes if num_nodes > 0 else 0,
+                "edge_density": (num_edges) / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0,
+            }
+
+            # Calculate connected components
+            components_query = """
+            MATCH (n:Node)
+            WITH n.id AS node_id
+            MATCH path = (n)-[:EDGE*0..]-()
+            WITH COLLECT(DISTINCT node_id) AS component
+            RETURN COLLECT(component) AS components
+            """
+            components_result = await self.query(components_query)
+            component_sizes = (
+                [len(comp) for comp in components_result[0][0]] if components_result else []
+            )
+
+            mandatory_metrics.update(
+                {
+                    "num_connected_components": len(component_sizes),
+                    "sizes_of_connected_components": component_sizes,
+                }
+            )
+
+            if include_optional:
+                # Self-loops
+                self_loops_query = """
+                MATCH (n:Node)-[r:EDGE]->(n)
+                RETURN COUNT(r)
+                """
+                self_loops = await self.query(self_loops_query)
+                num_selfloops = self_loops[0][0] if self_loops else 0
+
+                # Shortest paths (simplified for Kuzu)
+                paths_query = """
+                MATCH (n:Node), (m:Node)
+                WHERE n.id < m.id
+                MATCH path = (n)-[:EDGE*]-(m)
+                RETURN MIN(LENGTH(path)) AS length
+                """
+                paths = await self.query(paths_query)
+                path_lengths = [p[0] for p in paths if p[0] is not None]
+
+                # Local clustering coefficient
+                clustering_query = """
+                /// Step 1: Get each node with its neighbors and degree
+                MATCH (n:Node)-[:EDGE]-(neighbor)
+                WITH n, COLLECT(DISTINCT neighbor) AS neighbors, COUNT(DISTINCT neighbor) AS degree
+
+                // Step 2: Pair up neighbors and check if they are connected
+                UNWIND neighbors AS n1
+                UNWIND neighbors AS n2
+                WITH n, degree, n1, n2
+                WHERE id(n1) < id(n2)  // avoid duplicate pairs
+
+                // Step 3: Use OPTIONAL MATCH to see if n1 and n2 are connected
+                OPTIONAL MATCH (n1)-[:EDGE]-(n2)
+                WITH n, degree, COUNT(n2) AS triangle_count
+
+                // Step 4: Compute local clustering coefficient
+                WITH n, degree, 
+                    CASE WHEN degree <= 1 THEN 0.0
+                        ELSE (1.0 * triangle_count) / (degree * (degree - 1) / 2.0)
+                    END AS local_cc
+
+                // Step 5: Compute average
+                RETURN AVG(local_cc) AS avg_clustering_coefficient
+                """
+                clustering = await self.query(clustering_query)
+
+                optional_metrics = {
+                    "num_selfloops": num_selfloops,
+                    "diameter": max(path_lengths) if path_lengths else -1,
+                    "avg_shortest_path_length": sum(path_lengths) / len(path_lengths)
+                    if path_lengths
+                    else -1,
+                    "avg_clustering": clustering[0][0] if clustering and clustering[0][0] else -1,
+                }
+            else:
+                optional_metrics = {
+                    "num_selfloops": -1,
+                    "diameter": -1,
+                    "avg_shortest_path_length": -1,
+                    "avg_clustering": -1,
+                }
+
+            return {**mandatory_metrics, **optional_metrics}
+
+        except Exception as e:
+            logger.error(f"Failed to get graph metrics: {e}")
+            return {
+                "num_nodes": 0,
+                "num_edges": 0,
+                "mean_degree": 0,
+                "edge_density": 0,
+                "num_connected_components": 0,
+                "sizes_of_connected_components": [],
+                "num_selfloops": -1,
+                "diameter": -1,
+                "avg_shortest_path_length": -1,
+                "avg_clustering": -1,
+            }
\ No newline at end of file
diff --git a/cognee/tests/test_memgraph.py b/cognee/tests/test_memgraph.py
new file mode 100644
index 000000000..f3363d2f1
--- /dev/null
+++ b/cognee/tests/test_memgraph.py
@@ -0,0 +1,107 @@
+import os
+
+import pathlib
+import cognee
+from cognee.modules.search.operations import get_history
+from cognee.modules.users.methods import get_default_user
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.search.types import SearchType
+
+
+logger = get_logger()
+
+
+async def main():
+    cognee.config.set_graph_database_provider("memgraph")
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_memgraph")
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_memgraph")
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
+
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    dataset_name = "cs_explanations"
+
+    explanation_file_path = os.path.join(
+        pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
+    )
+    await cognee.add([explanation_file_path], dataset_name)
+
+    text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
+    At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
+    Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
+    The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
+    Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
+    In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
+    """
+
+    await cognee.add([text], dataset_name)
+
+    await cognee.cognify([dataset_name])
+
+    from cognee.infrastructure.databases.vector import get_vector_engine
+
+    vector_engine = get_vector_engine()
+    random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
+    random_node_name = random_node.payload["text"]
+
+    search_results = await cognee.search(
+        query_type=SearchType.INSIGHTS, query_text=random_node_name
+    )
+    assert len(search_results) != 0, "The search results list is empty."
+    print("\n\nExtracted sentences are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+
+    search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
+    assert len(search_results) != 0, "The search results list is empty."
+    print("\n\nExtracted chunks are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+
+    search_results = await cognee.search(
+        query_type=SearchType.SUMMARIES, query_text=random_node_name
+    )
+    assert len(search_results) != 0, "Query related summaries don't exist."
+    print("\nExtracted results are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+
+    search_results = await cognee.search(
+        query_type=SearchType.NATURAL_LANGUAGE,
+        query_text=f"Find nodes connected to node with name {random_node_name}",
+    )
+    assert len(search_results) != 0, "Query related natural language don't exist."
+    print("\nExtracted results are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+
+    user = await get_default_user()
+    history = await get_history(user.id)
+
+    assert len(history) == 8, "Search history is not correct."
+
+    await cognee.prune.prune_data()
+    assert not os.path.isdir(data_directory_path), "Local data files are not deleted"
+
+    await cognee.prune.prune_system(metadata=True)
+    from cognee.infrastructure.databases.graph import get_graph_engine
+    
+    graph_engine = await get_graph_engine()
+    nodes, edges = await graph_engine.get_graph_data()
+    assert len(nodes) == 0 and len(edges) == 0, "Memgraph graph database is not empty"
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())

From 34b95b687c123470a05f47b3b6d324161df20a55 Mon Sep 17 00:00:00 2001
From: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Date: Sat, 10 May 2025 08:24:31 -0400
Subject: [PATCH 2/2] refactor: Handle boto3 s3fs dependencies better (#809)

<!-- .github/pull_request_template.md -->

## Description
refactor: Handle boto3 s3fs dependencies better

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 poetry.lock    | 30 +++++++++++++++++-------------
 pyproject.toml | 16 +++++++---------
 uv.lock        | 30 ++++++++++++++++++------------
 3 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f88d77218..e43faf821 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -15,6 +15,7 @@ files = [
 [package.dependencies]
 aiohttp = ">=3.9.2,<4.0.0"
 aioitertools = ">=0.5.1,<1.0.0"
+boto3 = {version = ">=1.37.0,<1.37.2", optional = true, markers = "extra == \"boto3\""}
 botocore = ">=1.37.0,<1.37.2"
 jmespath = ">=0.7.1,<2.0.0"
 multidict = ">=6.0.0,<7.0.0"
@@ -1225,7 +1226,7 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main"]
-markers = "(sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (python_version < \"3.13\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\")"
+markers = "(sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (python_version < \"3.13\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\")"
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -1238,7 +1239,7 @@ description = "Colored terminal output for Python's logging module"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"codegraph\" or extra == \"chromadb\") or extra == \"chromadb\" or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\")"
+markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")"
 files = [
     {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"},
     {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
@@ -2298,7 +2299,7 @@ description = "The FlatBuffers serialization format for Python"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"codegraph\" or extra == \"chromadb\") or extra == \"chromadb\" or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\")"
+markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")"
 files = [
     {file = "flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051"},
     {file = "flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e"},
@@ -3451,7 +3452,7 @@ description = "Human friendly output for text interfaces using Python"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"codegraph\" or extra == \"chromadb\") or extra == \"chromadb\" or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\")"
+markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")"
 files = [
     {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"},
     {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
@@ -5954,7 +5955,7 @@ description = "Python library for arbitrary-precision floating-point arithmetic"
 optional = true
 python-versions = "*"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"codegraph\" or extra == \"chromadb\") or extra == \"chromadb\" or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\")"
+markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")"
 files = [
     {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
     {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
@@ -6536,7 +6537,7 @@ description = "ONNX Runtime is a runtime accelerator for Machine Learning models
 optional = true
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"codegraph\" or extra == \"chromadb\") or extra == \"chromadb\" or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\")"
+markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")"
 files = [
     {file = "onnxruntime-1.21.1-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:daedb5d33d8963062a25f4a3c788262074587f685a19478ef759a911b4b12c25"},
     {file = "onnxruntime-1.21.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a402f9bda0b1cc791d9cf31d23c471e8189a55369b49ef2b9d0854eb11d22c4"},
@@ -6936,8 +6937,8 @@ files = [
 [package.dependencies]
 numpy = [
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -7556,7 +7557,7 @@ description = ""
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version == \"3.10\" and extra == \"codegraph\" or (extra == \"chromadb\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"deepeval\" or extra == \"milvus\") and python_version < \"3.11\" or python_version == \"3.11\" and (extra == \"codegraph\" or extra == \"chromadb\" or extra == \"gemini\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"deepeval\" or extra == \"milvus\") or (python_version == \"3.12\" or extra == \"gemini\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"deepeval\" or extra == \"milvus\" or extra == \"chromadb\") and (extra == \"codegraph\" or extra == \"chromadb\" or extra == \"gemini\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"deepeval\" or extra == \"milvus\") and python_version >= \"3.12\""
+markers = "python_version == \"3.10\" and extra == \"codegraph\" or (extra == \"chromadb\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"deepeval\" or extra == \"milvus\") and python_version < \"3.11\" or (python_version == \"3.12\" or extra == \"gemini\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"deepeval\" or extra == \"milvus\" or extra == \"chromadb\") and (extra == \"codegraph\" or extra == \"chromadb\" or extra == \"gemini\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"deepeval\" or extra == \"milvus\") and python_version >= \"3.12\" or python_version == \"3.11\" and (extra == \"codegraph\" or extra == \"chromadb\" or extra == \"gemini\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"deepeval\" or extra == \"milvus\")"
 files = [
     {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"},
     {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"},
@@ -8140,8 +8141,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0"
 colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
 dill = [
     {version = ">=0.2", markers = "python_version < \"3.11\""},
-    {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
     {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
+    {version = ">=0.3.6", markers = "python_version == \"3.11\""},
 ]
 isort = ">=4.2.5,<5.13 || >5.13,<7"
 mccabe = ">=0.6,<0.8"
@@ -8270,7 +8271,7 @@ description = "A python implementation of GNU readline."
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "sys_platform == \"win32\" and python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or sys_platform == \"win32\" and extra == \"chromadb\" or sys_platform == \"win32\" and (extra == \"chromadb\" or extra == \"codegraph\") and python_version == \"3.11\" or sys_platform == \"win32\" and (extra == \"chromadb\" or extra == \"codegraph\") and python_version == \"3.12\""
+markers = "sys_platform == \"win32\" and (python_version == \"3.10\" or extra == \"chromadb\" or extra == \"codegraph\") and (extra == \"chromadb\" or python_version == \"3.12\" or python_version == \"3.10\" or python_version == \"3.11\") and (extra == \"codegraph\" or extra == \"chromadb\")"
 files = [
     {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"},
     {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"},
@@ -9544,7 +9545,10 @@ files = [
 ]
 
 [package.dependencies]
-aiobotocore = ">=2.5.4,<3.0.0"
+aiobotocore = [
+    {version = ">=2.5.4,<3.0.0"},
+    {version = ">=2.5.4,<3.0.0", extras = ["boto3"], optional = true, markers = "extra == \"boto3\""},
+]
 aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1"
 fsspec = "==2025.3.2.*"
 
@@ -10319,7 +10323,7 @@ description = "Computer algebra system (CAS) in Python"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"codegraph\" or extra == \"chromadb\") or extra == \"chromadb\" or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\")"
+markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")"
 files = [
     {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"},
     {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"},
@@ -12012,4 +12016,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "7a300700800488853f060001d915b84d108461a6a39ec8299dcb6390fa78ea41"
+content-hash = "4683c16fc40ace9b32102a8463ec8bdd54ceac0998ddb369bc6453cf83453367"
diff --git a/pyproject.toml b/pyproject.toml
index 7ca75d318..7f57e1b53 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,27 +20,25 @@ classifiers = [
 ]
 dependencies = [
     "openai>=1.59.4,<2",
-    "python-dotenv==1.0.1",
+    "python-dotenv>=1.0.1",
     "pydantic==2.10.5",
     "pydantic-settings>=2.2.1,<3",
     "typing_extensions==4.12.2",
     "nltk==3.9.1",
     "numpy>=1.26.4, <=2.1",
-    "pandas==2.2.3",
+    "pandas>=2.2.2",
     # Note: New s3fs and boto3 versions don't work well together
     # Always use comaptible fixed versions of these two dependencies
-    "s3fs==2025.3.2",
-    "boto3==1.37.1",
-    "botocore>=1.35.54,<2",
+    "s3fs[boto3]==2025.3.2",
     "sqlalchemy==2.0.39",
     "aiosqlite>=0.20.0,<0.21",
     "tiktoken<=0.9.0",
     "litellm>=1.57.4",
     "instructor==1.7.2",
-    "langfuse>=2.32.0,<3",
-    "filetype>=1.2.0,<2",
-    "aiohttp>=3.11.14,<4",
-    "aiofiles>=23.2.1,<24",
+    "langfuse>=2.32.0",
+    "filetype>=1.2.0",
+    "aiohttp>=3.11.14",
+    "aiofiles>=23.2.1",
     "owlready2>=0.47,<0.48",
     "graphistry>=0.33.5,<0.34",
     "pypdf>=4.1.0,<6.0.0",
diff --git a/uv.lock b/uv.lock
index 00d716e41..90b579b23 100644
--- a/uv.lock
+++ b/uv.lock
@@ -26,6 +26,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/67/026598918f92145156f2feb7957f57daefda20375cc2ac1a0692a9b8010b/aiobotocore-2.21.1-py3-none-any.whl", hash = "sha256:bd7c49a6d6f8a3d9444b0a94417c8da13813b5c7eec1c4f0ec2db7e8ce8f23e7", size = 78313 },
 ]
 
+[package.optional-dependencies]
+boto3 = [
+    { name = "boto3" },
+]
+
 [[package]]
 name = "aiofiles"
 version = "23.2.1"
@@ -861,8 +866,6 @@ dependencies = [
     { name = "aiohttp" },
     { name = "aiosqlite" },
     { name = "alembic" },
-    { name = "boto3" },
-    { name = "botocore" },
     { name = "dlt", extra = ["sqlalchemy"] },
     { name = "fastapi" },
     { name = "fastapi-users", extra = ["sqlalchemy"] },
@@ -887,7 +890,7 @@ dependencies = [
     { name = "pypdf" },
     { name = "python-dotenv" },
     { name = "python-multipart" },
-    { name = "s3fs" },
+    { name = "s3fs", extra = ["boto3"] },
     { name = "scikit-learn" },
     { name = "sentry-sdk", extra = ["fastapi"] },
     { name = "sqlalchemy" },
@@ -1004,14 +1007,12 @@ weaviate = [
 
 [package.metadata]
 requires-dist = [
-    { name = "aiofiles", specifier = ">=23.2.1,<24" },
-    { name = "aiohttp", specifier = ">=3.11.14,<4" },
+    { name = "aiofiles", specifier = ">=23.2.1" },
+    { name = "aiohttp", specifier = ">=3.11.14" },
     { name = "aiosqlite", specifier = ">=0.20.0,<0.21" },
     { name = "alembic", specifier = ">=1.13.3,<2" },
     { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.26.1,<0.27" },
     { name = "asyncpg", marker = "extra == 'postgres'", specifier = "==0.30.0" },
-    { name = "boto3", specifier = "==1.37.1" },
-    { name = "botocore", specifier = ">=1.35.54,<2" },
     { name = "chromadb", marker = "extra == 'chromadb'", specifier = ">=0.3.0,<0.7" },
     { name = "coverage", marker = "extra == 'dev'", specifier = ">=7.3.2,<8" },
     { name = "debugpy", marker = "extra == 'dev'", specifier = "==1.8.9" },
@@ -1022,7 +1023,7 @@ requires-dist = [
     { name = "fastapi", specifier = "==0.115.7" },
     { name = "fastapi-users", extras = ["sqlalchemy"], specifier = "==14.0.1" },
     { name = "fastembed", marker = "python_full_version < '3.13' and extra == 'codegraph'", specifier = "<=0.6.0" },
-    { name = "filetype", specifier = ">=1.2.0,<2" },
+    { name = "filetype", specifier = ">=1.2.0" },
     { name = "gdown", marker = "extra == 'evals'", specifier = ">=5.2.0,<6" },
     { name = "gitpython", marker = "extra == 'dev'", specifier = ">=3.1.43,<4" },
     { name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" },
@@ -1035,7 +1036,7 @@ requires-dist = [
     { name = "kuzu", marker = "extra == 'kuzu'", specifier = "==0.8.2" },
     { name = "lancedb", specifier = "==0.16.0" },
     { name = "langchain-text-splitters", marker = "extra == 'langchain'", specifier = "==0.3.2" },
-    { name = "langfuse", specifier = ">=2.32.0,<3" },
+    { name = "langfuse", specifier = ">=2.32.0" },
     { name = "langsmith", marker = "extra == 'langchain'", specifier = "==0.2.3" },
     { name = "limits", specifier = ">=4.4.1,<5" },
     { name = "litellm", specifier = ">=1.57.4" },
@@ -1054,7 +1055,7 @@ requires-dist = [
     { name = "numpy", specifier = ">=1.26.4,<=2.1" },
     { name = "openai", specifier = ">=1.59.4,<2" },
     { name = "owlready2", specifier = ">=0.47,<0.48" },
-    { name = "pandas", specifier = "==2.2.3" },
+    { name = "pandas", specifier = ">=2.2.2" },
     { name = "pgvector", marker = "extra == 'postgres'", specifier = ">=0.3.5,<0.4" },
     { name = "plotly", marker = "extra == 'evals'", specifier = ">=6.0.0,<7" },
     { name = "posthog", marker = "extra == 'posthog'", specifier = ">=3.5.0,<4" },
@@ -1071,12 +1072,12 @@ requires-dist = [
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.4.0,<8" },
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.1,<0.22" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" },
-    { name = "python-dotenv", specifier = "==1.0.1" },
+    { name = "python-dotenv", specifier = ">=1.0.1" },
     { name = "python-multipart", specifier = "==0.0.20" },
     { name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" },
     { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.9.0,<2" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" },
-    { name = "s3fs", specifier = "==2025.3.2" },
+    { name = "s3fs", extras = ["boto3"], specifier = "==2025.3.2" },
     { name = "scikit-learn", specifier = ">=1.6.1,<2" },
     { name = "sentry-sdk", extras = ["fastapi"], specifier = ">=2.9.0,<3" },
     { name = "sqlalchemy", specifier = "==2.0.39" },
@@ -6889,6 +6890,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/66/e1/4db0388df5655de92ce5f9b60d2bef220a58dde130e0453e5433c579986e/s3fs-2025.3.2-py3-none-any.whl", hash = "sha256:81eae3f37b4b04bcc08845d7bcc607c6ca45878813ef7e6a28d77b2688417130", size = 30485 },
 ]
 
+[package.optional-dependencies]
+boto3 = [
+    { name = "aiobotocore", extra = ["boto3"] },
+]
+
 [[package]]
 name = "s3transfer"
 version = "0.11.3"