Updated evals, added falkordb

2024-05-20 14:41:08 +02:00 · 2024-05-20 14:41:08 +02:00 · 8ef23731a3
commit 8ef23731a3
parent d099cae128
8 changed files with 247 additions and 8 deletions
--- a/cognee/infrastructure/databases/graph/falkordb/init.py
+++ b/cognee/infrastructure/databases/graph/falkordb/init.py
--- a/cognee/infrastructure/databases/graph/falkordb/adapter.py
+++ b/cognee/infrastructure/databases/graph/falkordb/adapter.py
@ -0,0 +1,191 @@
 """ FalcorDB Adapter for Graph Database"""
 import json
 import logging
 from typing import Optional, Any, List, Dict
 from contextlib import asynccontextmanager
 from falkordb.asyncio import FalkorDB
 from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
 logger = logging.getLogger("FalcorDBAdapter")
 class FalcorDBAdapter(GraphDBInterface):
    def __init__(
        self,
        graph_database_url: str,
        graph_database_username: str,
        graph_database_password: str,
        graph_database_port: int,
        driver: Optional[Any] = None,
        graph_name: str = "DefaultGraph",
    ):
        self.driver = FalkorDB(
            host = graph_database_url,
            port = graph_database_port)
        self.graph_name = graph_name
    async def query(
        self,
        query: str,
        params: Optional[Dict[str, Any]] = None,
    ) -> List[Dict[str, Any]]:
        try:
                selected_graph = self.driver.select_graph(self.graph_name)
                result = await selected_graph.query(query)
                return result.result_set
        except Exception as error:
            logger.error("Falkor query error: %s", error, exc_info = True)
            raise error
    async def graph(self):
        return  self.driver
    async def add_node(self, node_id: str, node_properties: Dict[str, Any] = None):
        node_id = node_id.replace(":", "_")
        serialized_properties = self.serialize_properties(node_properties)
        if "name" not in serialized_properties:
            serialized_properties["name"] = node_id
        # serialized_properties["created_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        # serialized_properties["updated_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        # properties = ", ".join(f"{property_name}: ${property_name}" for property_name in serialized_properties.keys())
        query = f"""MERGE (node:`{node_id}` {{id: $node_id}})
                ON CREATE SET node += $properties
                RETURN ID(node) AS internal_id, node.id AS nodeId"""
        params = {
            "node_id": node_id,
            "properties": serialized_properties,
        }
        return await self.query(query, params)
    async def add_nodes(self, nodes: list[tuple[str, dict[str, Any]]]) -> None:
        # nodes_data = []
        for node in nodes:
            node_id, node_properties = node
            node_id = node_id.replace(":", "_")
            await self.add_node(
                node_id = node_id,
                node_properties = node_properties,
            )
    async def extract_node_description(self, node_id: str):
        query = """MATCH (n)-[r]->(m)
                    WHERE n.id = $node_id
                    AND NOT m.id CONTAINS 'DefaultGraphModel'
                    RETURN m
                    """
        result = await self.query(query, dict(node_id = node_id))
        descriptions = []
        for node in result:
            # Assuming 'm' is a consistent key in your data structure
            attributes = node.get("m", {})
            # Ensure all required attributes are present
            if all(key in attributes for key in ["id", "layer_id", "description"]):
                descriptions.append({
                    "id": attributes["id"],
                    "layer_id": attributes["layer_id"],
                    "description": attributes["description"],
                })
        return descriptions
    async def get_layer_nodes(self):
        query = """MATCH (node) WHERE node.layer_id IS NOT NULL
        RETURN node"""
        return [result['node'] for result in (await self.query(query))]
    async def extract_node(self, node_id: str):
        query= """
        MATCH(node {id: $node_id})
        RETURN node
        """
        results = [node['node'] for node in (await self.query(query, dict(node_id = node_id)))]
        return results[0] if len(results) > 0 else None
    async def delete_node(self, node_id: str):
        node_id = id.replace(":", "_")
        query = f"MATCH (node:`{node_id}` {{id: $node_id}}) DETACH DELETE n"
        params = { "node_id": node_id }
        return await self.query(query, params)
    async def add_edge(self, from_node: str, to_node: str, relationship_name: str, edge_properties: Optional[Dict[str, Any]] = {}):
        serialized_properties = self.serialize_properties(edge_properties)
        from_node = from_node.replace(":", "_")
        to_node = to_node.replace(":", "_")
        query = f"""MATCH (from_node:`{from_node}` {{id: $from_node}}), (to_node:`{to_node}` {{id: $to_node}})
                MERGE (from_node)-[r:`{relationship_name}`]->(to_node)
                SET r += $properties
                RETURN r"""
        params = {
            "from_node": from_node,
            "to_node": to_node,
            "properties": serialized_properties
        }
        return await self.query(query, params)
    async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
        # edges_data = []
        for edge in edges:
            from_node, to_node, relationship_name, edge_properties = edge
            from_node = from_node.replace(":", "_")
            to_node = to_node.replace(":", "_")
            await self.add_edge(
                from_node = from_node,
                to_node = to_node,
                relationship_name = relationship_name,
                edge_properties = edge_properties
            )
    async def filter_nodes(self, search_criteria):
        query = f"""MATCH (node)
                WHERE node.id CONTAINS '{search_criteria}'
                RETURN node"""
        return await self.query(query)
    async def delete_graph(self):
        query = """MATCH (node)
                DETACH DELETE node;"""
        return await self.query(query)
    def serialize_properties(self, properties = dict()):
        return {
            property_key: json.dumps(property_value)
            if isinstance(property_value, (dict, list))
            else property_value for property_key, property_value in properties.items()
        }
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -67,6 +67,14 @@ services:
      - ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file
    # You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value
    command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "8" ]
  falkordb:
    image: falkordb/falkordb:edge
    container_name: falkordb
    ports:
      - "6379:6379"
      - "3001:3000"
    networks:
      - cognee_backend
 networks:
  cognee_backend:
--- a/evals/simple_rag_vs_cognee_eval.py
+++ b/evals/simple_rag_vs_cognee_eval.py
@ -67,12 +67,34 @@ def get_answer(content: str,context, model: Type[BaseModel]= AnswerModel):
        logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
        raise error
-def run_cognify_base_rag_and_search():
+async def run_cognify_base_rag():
    from cognee.api.v1.add import add
    from cognee.api.v1.prune import prune
    from cognee.api.v1.cognify.cognify import cognify
    await prune.prune_system()
    await add("data://test_datasets", "initial_test")
    graph = await cognify("initial_test")
    pass
-def run_cognify_and_search():
+async def cognify_search_base_rag(content:str, context:str):
-    pass
+    vector_client = infrastructure_config.get_config("vector_engine")
    return_ = await vector_client.search(collection_name="basic_rag", query_text="show_all_processes", limit=10)
    print("results", return_)
    return return_
 async def cognify_search_graph(content:str, context:str):
    from cognee.api.v1.search.search import search
    return_ = await search(content)
    return return_
@ -90,12 +112,29 @@ def convert_goldens_to_test_cases(test_cases_raw: List[LLMTestCase]) -> List[LLM
        test_cases.append(test_case)
    return test_cases
-# Data preprocessing before setting the dataset test cases
+# # Data preprocessing before setting the dataset test cases
-dataset.test_cases = convert_goldens_to_test_cases(dataset.test_cases)
+# dataset.test_cases = convert_goldens_to_test_cases(dataset.test_cases)
 #
 #
 # from deepeval.metrics import HallucinationMetric
 #
 #
 # metric = HallucinationMetric()
 # dataset.evaluate([metric])
-from deepeval.metrics import HallucinationMetric
+if __name__ == "__main__":
    import asyncio
-metric = HallucinationMetric()
+    async def main():
-dataset.evaluate([metric])
+        await run_cognify_base_rag_and_search()
    asyncio.run(main())
    # run_cognify_base_rag_and_search()
    # # Data preprocessing before setting the dataset test cases
    # dataset.test_cases = convert_goldens_to_test_cases(dataset.test_cases)
    # from deepeval.metrics import HallucinationMetric
    # metric = HallucinationMetric()
    # dataset.evaluate([metric])
    pass
--- a/evals/test_datasets/initial_test/natural_language_processing.txt
+++ b/evals/test_datasets/initial_test/natural_language_processing.txt
--- a/evals/test_datasets/initial_test/soldiers_home.pdf
+++ b/evals/test_datasets/initial_test/soldiers_home.pdf
--- a/evals/test_datasets/initial_test/trump.txt
+++ b/evals/test_datasets/initial_test/trump.txt
--- a/pyproject.toml
+++ b/pyproject.toml
@ -71,6 +71,7 @@ protobuf = "<5.0.0"
 langchain-community = "0.0.38"
 langchain ="0.1.10"
 deepeval = "^0.21.42"
 falkordb = "^1.0.4"
 [tool.poetry.extras]