Merge branch 'main' into COG-698

2024-12-11 13:17:15 +01:00 · 2024-12-11 13:17:15 +01:00 · 0f0e34e097
commit 0f0e34e097
parent 0e68019716 271834f62e
60 changed files with 460 additions and 1399 deletions
--- a/.github/dependabot.yaml
+++ b/.github/dependabot.yaml
@ -0,0 +1,35 @@
+# Configuration: https://dependabot.com/docs/config-file/
+# Docs: https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically
+
+version: 2
+
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    allow:
+      - dependency-type: "all"
+    commit-message:
+      prefix: ":arrow_up:"
+    open-pull-requests-limit: 50
+
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    allow:
+      - dependency-type: "all"
+    commit-message:
+      prefix: ":arrow_up:"
+    open-pull-requests-limit: 50
+
+  - package-ecosystem: "docker"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    allow:
+      - dependency-type: "all"
+    commit-message:
+      prefix: ":arrow_up:"
+    open-pull-requests-limit: 50
--- a/.github/workflows/community_greetings.yml
+++ b/.github/workflows/community_greetings.yml
@ -0,0 +1,16 @@
+name: community | Greetings
+
+on: [pull_request, issues]
+
+jobs:
+  greeting:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/first-interaction@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        pr-message:  'Hello @${{ github.actor }}, thank you for submitting a PR! We will respond as soon as possible.'
+        issue-message: |
+          Hello @${{ github.actor }}, thank you for your interest in our work!
+
+          If this is a bug report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you.
--- a/.github/workflows/daily_pypi_download_stats.yaml
+++ b/.github/workflows/daily_pypi_download_stats.yaml
@ -1,36 +0,0 @@
-name: analytics | Update Cognee Stats Daily
-
-on:
-  schedule:
-    - cron: '0 1 * * *'  # Runs every day at 01:00 UTC
-
-jobs:
-  update_stats:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v3
-        with:
-          persist-credentials: false
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.x'
-
-      - name: Install Dependencies
-        run: |
-          pip install requests posthog
-
-      - name: Run Update Script
-        env:
-          POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
-          POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
-        run: |
-          cd tools  # Change to the 'tools' directory
-          echo "Current working directory after changing to tools:"
-          pwd  # Print the working directory again
-          echo "List of folders in the tools directory:"
-          ls -la  # List all files and folders in the 'tools' directory
-          python daily_pypi_downloads.py  # Run the script
--- a/.github/workflows/posthog_pipeline.yaml
+++ b/.github/workflows/posthog_pipeline.yaml
@ -1,44 +0,0 @@
-name: analytics | Push GitHub Data to PostHog
-
-on:
-  schedule:
-    - cron: '0 0 * * *'  # Runs every day at midnight
-  workflow_dispatch:
-
-jobs:
-  push-data:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.x'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install requests posthog
-
-      - name: Print working directory, list folders, and run script
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
-          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
-          GITHUB_REPOSITORY: ${{ github.repository }}
-        run: |
-          echo "Current working directory:"
-          pwd  # Print the current working directory
-          echo "List of folders in the current directory:"
-          ls -la  # List all files and folders in the current directory
-          echo "Changing to tools directory..."
-          cd tools  # Change to the 'tools' directory
-          echo "Current working directory after changing to tools:"
-          pwd  # Print the working directory again
-          echo "List of folders in the tools directory:"
-          ls -la  # List all files and folders in the 'tools' directory
-          python push_to_posthog.py  # Run the script
-
--- a/.github/workflows/py_lint.yml
+++ b/.github/workflows/py_lint.yml
@ -46,7 +46,7 @@ jobs:

      - name: Load cached venv
        id: cached-poetry-dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
        with:
          path: .venv
          key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
--- a/.github/workflows/release_discord_action.yml
+++ b/.github/workflows/release_discord_action.yml
@ -11,7 +11,7 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v3
      - name: Github Releases To Discord
-        uses: SethCohen/github-releases-to-discord@v1.15.0
+        uses: SethCohen/github-releases-to-discord@v1.16.2
        with:
          webhook_url: ${{ secrets.WEBHOOK_URL }}
          color: "2105893"
--- a/cognee/modules/graph/cognee_graph/CogneeGraph.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py
@ -40,7 +40,7 @@ class CogneeGraph(CogneeAbstractGraph):
            edge.node1.add_skeleton_edge(edge)
            edge.node2.add_skeleton_edge(edge)
        else:
-            raise EntityAlreadyExistsError(message=f"Edge {edge} already exists in the graph.")
+            print(f"Edge {edge} already exists in the graph.")

    def get_node(self, node_id: str) -> Node:
        return self.nodes.get(node_id, None)
--- a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py
@ -65,6 +65,12 @@ class Node:
    def get_attribute(self, key: str) -> Union[str, int, float]:
        return self.attributes[key]

+    def get_skeleton_edges(self):
+        return self.skeleton_edges
+
+    def get_skeleton_neighbours(self):
+        return self.skeleton_neighbours
+
    def __repr__(self) -> str:
        return f"Node({self.id}, attributes={self.attributes})"

@ -109,8 +115,14 @@ class Edge:
    def add_attribute(self, key: str, value: Any) -> None:
        self.attributes[key] = value

-    def get_attribute(self, key: str, value: Any) -> Union[str, int, float]:
-        return self.attributes[key]
+    def get_attribute(self, key: str) -> Optional[Union[str, int, float]]:
+        return self.attributes.get(key)
+
+    def get_source_node(self):
+        return self.node1
+
+    def get_destination_node(self):
+        return self.node2

    def __repr__(self) -> str:
        direction = "->" if self.directed else "--"
--- a/cognee/modules/retrieval/description_to_codepart_search.py
+++ b/cognee/modules/retrieval/description_to_codepart_search.py
@ -0,0 +1,116 @@
+import asyncio
+import logging
+
+from typing import Set, List
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
+from cognee.modules.users.methods import get_default_user
+from cognee.modules.users.models import User
+from cognee.shared.utils import send_telemetry
+
+
+async def code_description_to_code_part_search(query: str, user: User = None, top_k = 2) -> list:
+    if user is None:
+        user = await get_default_user()
+
+    if user is None:
+        raise PermissionError("No user found in the system. Please create a user.")
+
+    retrieved_codeparts = await code_description_to_code_part(query, user, top_k)
+    return retrieved_codeparts
+
+
+
+async def code_description_to_code_part(
+    query: str,
+    user: User,
+    top_k: int
+) -> List[str]:
+    """
+    Maps a code description query to relevant code parts using a CodeGraph pipeline.
+
+    Args:
+        query (str): The search query describing the code parts.
+        user (User): The user performing the search.
+        top_k (int): Number of codegraph descriptions to match ( num of corresponding codeparts will be higher)
+
+    Returns:
+        Set[str]: A set of unique code parts matching the query.
+
+    Raises:
+        ValueError: If arguments are invalid.
+        RuntimeError: If an unexpected error occurs during execution.
+    """
+    if not query or not isinstance(query, str):
+        raise ValueError("The query must be a non-empty string.")
+    if top_k <= 0 or not isinstance(top_k, int):
+        raise ValueError("top_k must be a positive integer.")
+
+    try:
+        vector_engine = get_vector_engine()
+        graph_engine = await get_graph_engine()
+    except Exception as init_error:
+        logging.error("Failed to initialize engines: %s", init_error, exc_info=True)
+        raise RuntimeError("System initialization error. Please try again later.") from init_error
+
+    send_telemetry("code_description_to_code_part_search EXECUTION STARTED", user.id)
+    logging.info("Search initiated by user %s with query: '%s' and top_k: %d", user.id, query, top_k)
+
+    try:
+        results = await vector_engine.search(
+            "code_summary_text", query_text=query, limit=top_k
+        )
+        if not results:
+            logging.warning("No results found for query: '%s' by user: %s", query, user.id)
+            return []
+
+        memory_fragment = CogneeGraph()
+        await memory_fragment.project_graph_from_db(
+            graph_engine,
+            node_properties_to_project=['id', 'type', 'text', 'source_code'],
+            edge_properties_to_project=['relationship_name']
+        )
+
+        code_pieces_to_return = set()
+
+        for node in results:
+            node_id = str(node.id)
+            node_to_search_from = memory_fragment.get_node(node_id)
+
+            if not node_to_search_from:
+                logging.debug("Node %s not found in memory fragment graph", node_id)
+                continue
+
+            for code_file in node_to_search_from.get_skeleton_neighbours():
+                for code_file_edge in code_file.get_skeleton_edges():
+                    if code_file_edge.get_attribute('relationship_name') == 'contains':
+                        code_pieces_to_return.add(code_file_edge.get_destination_node())
+
+        logging.info("Search completed for user: %s, query: '%s'. Found %d code pieces.",
+                     user.id, query, len(code_pieces_to_return))
+
+        return list(code_pieces_to_return)
+
+    except Exception as exec_error:
+        logging.error(
+            "Error during code description to code part search for user: %s, query: '%s'. Error: %s",
+            user.id, query, exec_error, exc_info=True
+        )
+        send_telemetry("code_description_to_code_part_search EXECUTION FAILED", user.id)
+        raise RuntimeError("An error occurred while processing your request.") from exec_error
+
+
+if __name__ == "__main__":
+    async def main():
+        query = "I am looking for a class with blue eyes"
+        user = None
+        try:
+            results = await code_description_to_code_part_search(query, user)
+            print("Retrieved Code Parts:", results)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+
+    asyncio.run(main())
+
+
--- a/cognee/tasks/summarization/models.py
+++ b/cognee/tasks/summarization/models.py
@ -14,6 +14,7 @@ class TextSummary(DataPoint):


 class CodeSummary(DataPoint):
+    __tablename__ = "code_summary"
    text: str
    made_from: CodeFile

--- a/cognee/tests/unit/modules/graph/cognee_graph_test.py
+++ b/cognee/tests/unit/modules/graph/cognee_graph_test.py
@ -42,19 +42,6 @@ def test_add_edge_success(setup_graph):
    assert edge in node2.skeleton_edges


-def test_add_duplicate_edge(setup_graph):
-    """Test adding a duplicate edge raises an exception."""
-    graph = setup_graph
-    node1 = Node("node1")
-    node2 = Node("node2")
-    graph.add_node(node1)
-    graph.add_node(node2)
-    edge = Edge(node1, node2)
-    graph.add_edge(edge)
-    with pytest.raises(EntityAlreadyExistsError, match="Edge .* already exists in the graph."):
-        graph.add_edge(edge)
-
-
 def test_get_node_success(setup_graph):
    """Test retrieving an existing node."""
    graph = setup_graph
--- a/cognee/tests/unit/modules/retriever/test_description_to_codepart_search.py
+++ b/cognee/tests/unit/modules/retriever/test_description_to_codepart_search.py
@ -0,0 +1,76 @@
+import pytest
+from unittest.mock import AsyncMock, patch
+
+
+
+@pytest.mark.asyncio
+async def test_code_description_to_code_part_no_results():
+    """Test that code_description_to_code_part handles no search results."""
+
+    mock_user = AsyncMock()
+    mock_user.id = "user123"
+    mock_vector_engine = AsyncMock()
+    mock_vector_engine.search.return_value = []
+
+    with patch("cognee.modules.retrieval.description_to_codepart_search.get_vector_engine", return_value=mock_vector_engine), \
+         patch("cognee.modules.retrieval.description_to_codepart_search.get_graph_engine", return_value=AsyncMock()), \
+         patch("cognee.modules.retrieval.description_to_codepart_search.CogneeGraph", return_value=AsyncMock()):
+
+        from cognee.modules.retrieval.description_to_codepart_search import code_description_to_code_part
+        result = await code_description_to_code_part("search query", mock_user, 2)
+
+        assert result == []
+
+
+@pytest.mark.asyncio
+async def test_code_description_to_code_part_invalid_query():
+    """Test that code_description_to_code_part raises ValueError for invalid query."""
+
+    mock_user = AsyncMock()
+
+    with pytest.raises(ValueError, match="The query must be a non-empty string."):
+        from cognee.modules.retrieval.description_to_codepart_search import code_description_to_code_part
+        await code_description_to_code_part("", mock_user, 2)
+
+
+@pytest.mark.asyncio
+async def test_code_description_to_code_part_invalid_top_k():
+    """Test that code_description_to_code_part raises ValueError for invalid top_k."""
+
+    mock_user = AsyncMock()
+
+    with pytest.raises(ValueError, match="top_k must be a positive integer."):
+        from cognee.modules.retrieval.description_to_codepart_search import code_description_to_code_part
+        await code_description_to_code_part("search query", mock_user, 0)
+
+
+@pytest.mark.asyncio
+async def test_code_description_to_code_part_initialization_error():
+    """Test that code_description_to_code_part raises RuntimeError for engine initialization errors."""
+
+    mock_user = AsyncMock()
+
+    with patch("cognee.modules.retrieval.description_to_codepart_search.get_vector_engine", side_effect=Exception("Engine init failed")), \
+         patch("cognee.modules.retrieval.description_to_codepart_search.get_graph_engine", return_value=AsyncMock()):
+
+        from cognee.modules.retrieval.description_to_codepart_search import code_description_to_code_part
+        with pytest.raises(RuntimeError, match="System initialization error. Please try again later."):
+            await code_description_to_code_part("search query", mock_user, 2)
+
+
+@pytest.mark.asyncio
+async def test_code_description_to_code_part_execution_error():
+    """Test that code_description_to_code_part raises RuntimeError for execution errors."""
+
+    mock_user = AsyncMock()
+    mock_user.id = "user123"
+    mock_vector_engine = AsyncMock()
+    mock_vector_engine.search.side_effect = Exception("Execution error")
+
+    with patch("cognee.modules.retrieval.description_to_codepart_search.get_vector_engine", return_value=mock_vector_engine), \
+         patch("cognee.modules.retrieval.description_to_codepart_search.get_graph_engine", return_value=AsyncMock()), \
+         patch("cognee.modules.retrieval.description_to_codepart_search.CogneeGraph", return_value=AsyncMock()):
+
+        from cognee.modules.retrieval.description_to_codepart_search import code_description_to_code_part
+        with pytest.raises(RuntimeError, match="An error occurred while processing your request."):
+            await code_description_to_code_part("search query", mock_user, 2)
--- a/docs/api_reference.md
+++ b/docs/api_reference.md
@ -1,299 +0,0 @@
-# Cognee API Reference
-
-## Overview
-
-The Cognee API provides a set of endpoints for managing datasets, performing cognitive tasks, and configuring various settings in the system. The API is built on FastAPI and includes multiple routes to handle different functionalities. This reference outlines the available endpoints and their usage.
-
-## Base URL
-
-The base URL for all API requests is determined by the server's deployment environment. Typically, this will be:
-
- **Development**: `http://localhost:8000`
- **Production**: Depending on your server setup.
-
-## Endpoints
-
-### 1. Root
-
- **URL**: `/`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Root endpoint that returns a welcome message.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200,
-    "body": {
-      "message": "Hello, World, I am alive!"
-    }
-  }
-  ```
-
-### 2. Health Check
-
- **URL**: `/health`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Health check endpoint that returns the server status.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200
-  }
-  ```
-
-### 3. Get Datasets
-
- **URL**: `/datasets`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Retrieve a list of available datasets.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200,
-    "body": [
-      {
-        "id": "dataset_id_1",
-        "name": "Dataset Name 1",
-        "description": "Description of Dataset 1",
-        ...
-      },
-      ...
-    ]
-  }
-  ```
-
-### 4. Delete Dataset
-
- **URL**: `/datasets/{dataset_id}`
- **Method**: `DELETE`
- **Auth Required**: No
- **Description**: Delete a specific dataset by its ID.
-  
-  **Path Parameters**:
-  - `dataset_id`: The ID of the dataset to delete.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200
-  }
-  ```
-
-### 5. Get Dataset Graph
-
- **URL**: `/datasets/{dataset_id}/graph`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Retrieve the graph visualization URL for a specific dataset.
-  
-  **Path Parameters**:
-  - `dataset_id`: The ID of the dataset.
-  
-  **Response**:
-  ```json
-  "http://example.com/path/to/graph"
-  ```
-
-### 6. Get Dataset Data
-
- **URL**: `/datasets/{dataset_id}/data`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Retrieve data associated with a specific dataset.
-  
-  **Path Parameters**:
-  - `dataset_id`: The ID of the dataset.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200,
-    "body": [
-      {
-        "data_id": "data_id_1",
-        "content": "Data content here",
-        ...
-      },
-      ...
-    ]
-  }
-  ```
-
-### 7. Get Dataset Status
-
- **URL**: `/datasets/status`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Retrieve the status of one or more datasets.
-  
-  **Query Parameters**:
-  - `dataset`: A list of dataset IDs to check status for.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200,
-    "body": {
-      "dataset_id_1": "Status 1",
-      "dataset_id_2": "Status 2",
-      ...
-    }
-  }
-  ```
-
-### 8. Get Raw Data
-
- **URL**: `/datasets/{dataset_id}/data/{data_id}/raw`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Retrieve the raw data file for a specific data entry in a dataset.
-  
-  **Path Parameters**:
-  - `dataset_id`: The ID of the dataset.
-  - `data_id`: The ID of the data entry.
-  
-  **Response**: Raw file download.
-
-### 9. Add Data
-
- **URL**: `/add`
- **Method**: `POST`
- **Auth Required**: No
- **Description**: Add new data to a dataset. The data can be uploaded from a file or a URL.
-  
-  **Form Parameters**:
-  - `datasetId`: The ID of the dataset to add data to.
-  - `data`: A list of files to upload.
-
-  **Request**
-  ```json
-  {
-    "dataset_id": "ID_OF_THE_DATASET_TO_PUT_DATA_IN", // Optional, we use "main" as default.
-    "files": File[]
-  }
-  ```
-  
-  **Response**:
-  ```json
-  {
-    "status": 200
-  }
-  ```
-
-### 10. Cognify
-
- **URL**: `/cognify`
- **Method**: `POST`
- **Auth Required**: No
- **Description**: Perform cognitive processing on the specified datasets.
-  
-  **Request Body**:
-  ```json
-  {
-    "datasets": ["ID_OF_THE_DATASET_1", "ID_OF_THE_DATASET_2", ...]
-  }
-  ```
-  
-  **Response**:
-  ```json
-  {
-    "status": 200
-  }
-  ```
-
-### 11. Search
-
- **URL**: `/search`
- **Method**: `POST`
- **Auth Required**: No
- **Description**: Search for nodes in the graph based on the provided query parameters.
-  
-  **Request Body**:
-  ```json
-  {
-    "searchType": "INSIGHTS", // Or "SUMMARIES" or "CHUNKS"
-    "query": "QUERY_TO_MATCH_DATA"
-  }
-  ```
-
-  **Response**
-
-  For "INSIGHTS" search type:
-  ```json
-  {
-    "status": 200,
-    "body": [[
-      { "name" "source_node_name" },
-      { "relationship_name" "between_nodes_relationship_name" },
-      { "name" "target_node_name" },
-    ]]
-  }
-  ```
-
-  For "SUMMARIES" search type:
-    ```json
-    {
-      "status": 200,
-      "body": [
-        { "text" "summary_text" },
-        { "text" "summary_text" },
-        { "text" "summary_text" },
-        ...
-      ]
-    }
-    ```
-
-  For "CHUNKS" search type:
-  ```json
-  {
-    "status": 200,
-    "body": [
-      { "text" "chunk_text" },
-      { "text" "chunk_text" },
-      { "text" "chunk_text" },
-      ...
-    ]
-  }
-  ```
-
-### 12. Get Settings
-
- **URL**: `/settings`
- **Method**: `GET`
- **Auth Required**: No
- **Description**: Retrieve the current system settings.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200,
-    "body": {
-      "llm": {...},
-      "vectorDB": {...},
-      ...
-    }
-  }
-  ```
-
-### 13. Save Settings
-
- **URL**: `/settings`
- **Method**: `POST`
- **Auth Required**: No
- **Description**: Save new settings for the system, including LLM and vector DB configurations.
-  
-  **Request Body**:
-  - `llm`: Optional. The configuration for the LLM provider.
-  - `vectorDB`: Optional. The configuration for the vector database provider.
-  
-  **Response**:
-  ```json
-  {
-    "status": 200
-  }
-  ```
--- a/docs/assets/favicon.png
+++ b/docs/assets/favicon.png
--- a/docs/assets/logo.png
+++ b/docs/assets/logo.png
--- a/docs/concepts/graph_data_models.md
+++ b/docs/concepts/graph_data_models.md
@ -1,6 +0,0 @@
-Graph data models are fundamental structures used to represent and store data in the form of graphs, which consist of nodes (or vertices) and edges (or links). This model is particularly effective for illustrating relationships and connections among various data entities, making it invaluable in domains such as social networks, recommendation systems, logistics, biological networks, and more. Here's an overview of key concepts and types of graph data models:
-
-Key Concepts:
-Nodes (Vertices): Represent entities or objects within the graph, such as people in a social network, stations in a transportation map, or proteins in biological networks.
-Edges (Links): Depict the relationships or interactions between nodes. Edges can be directed (indicating a one-way relationship) or undirected (indicating a mutual relationship).
-Properties: Both nodes and edges can have properties (key-value pairs) that provide additional information, such as weights, types, or other attributes relevant to the application.
--- a/docs/concepts/llm_structured_outputs.md
+++ b/docs/concepts/llm_structured_outputs.md
@ -1,8 +0,0 @@
-Function calling in the context of Large Language Models (LLMs) like GPT-3, GPT-4, and their derivatives extends beyond traditional programming paradigms. In this scenario, function calling involves prompting the LLM to simulate the behavior of a function within its generated output. This capability allows users to interact with LLMs in a structured way, effectively requesting specific operations or information retrieval tasks by framing their prompts as function calls.
-
-How LLM Function Calling Works:
-Prompt Construction: The user constructs a prompt that mimics a function call in programming. This prompt includes the "name" of the function (often a description of the task) and the "arguments" (the specific inputs or conditions for the task). For example, a prompt might look like "Generate a summary for the following article:" followed by the article text.
-
-LLM Interpretation: The LLM interprets this structured prompt and understands it as a request to perform a specific task, similar to how a function in a program would be invoked. The model then generates an output that aligns with the expected behavior of the function described in the prompt.
-
-Parameters and Outputs: In LLM function calling, the parameters are the details provided in the prompt, and the output is the generated text that the model produces in response. This output is intended to fulfill the function's "purpose" as inferred from the prompt.
--- a/docs/concepts/multilayer_graph_networks.md
+++ b/docs/concepts/multilayer_graph_networks.md
@ -1 +0,0 @@
-A multilayer graph network is a sophisticated structure used to model complex systems where entities and their interactions can exist in multiple layers, each representing a different type of relationship, context, or domain. Unlike traditional graphs that capture connections in a single, uniform setting, multilayer graphs provide a more nuanced framework, allowing for the representation of diverse interconnections and dependencies across various dimensions or layers. 
--- a/docs/concepts/propositions.md
+++ b/docs/concepts/propositions.md
@ -1,11 +0,0 @@
-
-Propositions are fundamental elements in the study of logic, linguistics, and natural language processing. They represent atomic expressions within texts that encapsulate distinct factoids, conveying specific pieces of information. In essence, a proposition is a declarative statement that can either be true or false, but not both simultaneously.
-This binary nature makes propositions crucial for logical deductions, reasoning, and the construction of arguments.
-
-In a natural language context, propositions are presented in a concise and self-contained format. 
-They are designed to convey information clearly and unambiguously, making them easily interpretable by humans and computable by machines. For example, the statement "The Eiffel Tower is in Paris" is a proposition because it presents a specific fact about the location of the Eiffel Tower, and its truth value can be assessed as either true or false.
-
-The concept of propositions extends beyond mere statements of fact to include assertions about concepts, relationships, and conditions. 
-For instance, "If it rains, the ground gets wet" is a conditional proposition that establishes a cause-and-effect relationship between two events.
-
-In computational linguistics and natural language processing, propositions are vital for tasks such as information extraction, knowledge representation, and question answering.
--- a/docs/conceptual_overview.md
+++ b/docs/conceptual_overview.md
@ -1,87 +0,0 @@
-#  Conceptual Overview - cognee
-
-## Introduction
-
-!!! info "What is cognee?"
-    cognee is a data processing framework that enables LLMs to produce deterministic and traceable outputs.
-
-
-cognee assists developers in introducing greater predictability and management into their Retrieval-Augmented Generation (RAG) workflows through the use of graph architectures, vector stores, and auto-optimizing pipelines.
-
-Displaying information as a graph is the clearest way to grasp the content of your documents. Crucially, graphs allow systematic navigation and extraction of data from documents based on their hierarchy.
-## Core Concepts
-
-
-### Concept 1: Data Pipelines
-Most of the data we provide to a system can be categorized as unstructured, semi-structured, or structured. Rows from a database would belong to structured data, jsons to semi-structured data, and logs that we input into the system could be considered unstructured.
-To organize and process this data, we need to ensure we have custom loaders for all data types, which can help us unify and organize it properly.
-<figure markdown>
-![Data Pipelines](img/pipelines.png)
-<figcaption>Data Pipeline Example</figcaption>
-</figure>
-
-In the example above, we have a pipeline in which data has been imported from various sources, normalized, and stored in a database. Relevant identifiers and relationships between the data are also created in this process.
-To create an effective data pipeline for processing various types of data—structured, semi-structured, and unstructured—it’s crucial to understand each type's specific handling and processing needs. Let's expand on the concepts involved in setting up such a data pipeline.
-
-Data Types and Their Handling
- Structured Data: This includes data that adheres to a fixed schema, such as rows in a relational database or data in CSV files. The processing of structured data typically involves SQL queries for extraction, transformations through simple functions or procedures, and loading into destination tables or databases.
-
- Semi-structured Data: JSON files, XML, or even some APIs' data fit this category. These data types don't have a rigid schema but have some organizational properties that can be exploited. Semi-structured data often requires parsers that can navigate its structure (like trees for XML or key-value pairs for JSON) to extract necessary information. Libraries such as json in Python or lxml for XML handling can be very useful here.
-
- Unstructured Data: This category includes text files, logs, or even images and videos. 
-
-
-### Concept 2: Data Enrichment with LLMs
-LLMs are adept at processing unstructured data. They can easily extract summaries, keywords, and other useful information from documents. We use function calling with Pydantic models to extract information from the unstructured data.
-<figure markdown>
-![Data Enrichment](img/enrichment.png)
-<figcaption>Data Enrichment Example</figcaption>
-</figure>
-We decompose the loaded content into graphs, allowing us to more precisely map out the relationships between entities and concepts.
-
-### Concept 3: Graphs
-Knowledge graphs simply map out knowledge, linking specific facts and their connections. 
-When Large Language Models (LLMs) process text, they infer these links, leading to occasional inaccuracies due to their probabilistic nature. 
-
-Clearly defined relationships enhance their accuracy.  
-
-This structured approach can extend beyond concepts to document layouts, pages, or other organizational schemas.
-<figure markdown>
-![Graph structure](img/graph_structure.png)
-<figcaption>Graph Structure</figcaption>
-</figure>
-
-### Concept 4: Vector and Graph Retrieval 
-Cognee lets you use multiple vector and graph retrieval methods to find the most relevant information.
-!!! info "Learn more?"
-    Check out learning materials to see how you can use these methods in your projects.
-### Concept 5: Auto-Optimizing Pipelines
-Integrating knowledge graphs into Retrieval-Augmented Generation (RAG) pipelines leads to an intriguing outcome: the system's adeptness at contextual understanding allows it to be evaluated in a way Machine Learning (ML) engineers are accustomed to. 
-
-This involves bombarding the RAG system with hundreds of synthetic questions, enabling the knowledge graph to evolve and refine its context autonomously over time. 
-
-This method paves the way for developing self-improving memory engines that can adapt to new data and user feedback.
-
-## Architecture Overview
-A high-level diagram of cognee's architecture, illustrating the main components and their interactions.
-
-<figure markdown>
-![Architecture](img/architecture.png)
-<figcaption>Architecture</figcaption>
-</figure>
-
-Main components:
-
- **Data Pipelines**: Responsible for ingesting, processing, and transforming data from various sources.
- **LLMs**: Large Language Models that process unstructured data and generate text.
- **Graph Store**: Knowledge graphs that represent relationships between entities and concepts.
- **Vector Store**: Database that stores vector representations of data for efficient retrieval.
- **Search**: Retrieves relevant information from the knowledge graph and vector stores.
-
-## How It Fits Into Your Projects
-
-!!! info "How cognee fits into your projects"
-    cognee is a self-contained library that simplifies the process of loading and structuring data in LLMs.
-
-By integrating cognee into your data pipelines, you can leverage the power of LLMs, knowledge graphs, and vector retrieval to create accurate and explainable AI solutions.
-cognee provides a self-contained library that simplifies the process of loading and structuring LLM context, enabling you to create accurate and explainable AI solutions.
--- a/docs/configuration.md
+++ b/docs/configuration.md
@ -1,93 +0,0 @@
-# Configuration
-
-
-
-## 🚀 Configure Vector and Graph Stores
-
-You can configure the vector and graph stores using the environment variables in your .env file or programmatically.
-We use [Pydantic Settings](https://docs.pydantic.dev/latest/concepts/pydantic_settings/#dotenv-env-support)
-
-We have a global configuration object (cognee.config) and individual configurations on pipeline and data store levels
-
-Check available configuration options:
-``` python
-from cognee.infrastructure.databases.vector import get_vectordb_config
-from cognee.infrastructure.databases.graph.config import get_graph_config
-from cognee.infrastructure.databases.relational import get_relational_config
-from cognee.infrastructure.llm.config import get_llm_config
-print(get_vectordb_config().to_dict())
-print(get_graph_config().to_dict())
-print(get_relational_config().to_dict())
-print(get_llm_config().to_dict())
-
-```
-
-Setting the environment variables in your .env file, and Pydantic will pick them up:
-
-```bash
-GRAPH_DATABASE_PROVIDER = 'lancedb'
-
-```
-Otherwise, you can set the configuration yourself:
-
-```python
-cognee.config.set_llm_provider('ollama')
-```
-
-## 🚀 Getting Started with Local Models
-
-You'll need to run the local model on your machine or use one of the providers hosting the model.
-!!! note "We had some success with mixtral, but 7b models did not work well. We recommend using mixtral for now."
-
-### Ollama 
-
-Set up Ollama by following instructions on [Ollama website](https://ollama.com/)
-
-
-Set the environment variable in your .env to use the model
-
-```bash
-LLM_PROVIDER = 'ollama'
-
-```
-Otherwise, you can set the configuration for the model:
-
-```bash
-cognee.config.set_llm_provider('ollama')
-
-```
-You can also set the HOST and model name:
-
-```bash
-cognee.config.set_llm_endpoint("http://localhost:11434/v1")
-cognee.config.set_llm_model("mistral:instruct")
-```
-
-
-### Anyscale
-
-```bash
-LLM_PROVIDER = 'custom'
-
-```
-Otherwise, you can set the configuration for the model:
-
-```bash
-cognee.config.set_llm_provider('custom')
-
-```
-You can also set the HOST  and model name:
-```bash
-LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-LLM_ENDPOINT = "https://api.endpoints.anyscale.com/v1"
-LLM_API_KEY = "your_api_key"
-```
-
-You can set the same way HOST and model name for any other provider that has an API endpoint.
-
-
-
-
-
-
-
--- a/docs/data_engineering_llm_ops.md
+++ b/docs/data_engineering_llm_ops.md
@ -1,32 +0,0 @@
-# Data Engineering and LLMOps
-
-!!! tip "This is a work in progress and any feedback is welcome"
-
-## Table of Contents
-1. [Data Engineering](#data-engineering)
-2. [Large Language Model Operations (LLM Ops)](#large-language-model-operations-llm-ops)
-
-## Data Engineering
-
-Data Engineering focuses on managing and analyzing big data. It revolves around five key aspects:
-
-### Volume
-The size and amount of data that companies manage and analyze.
-
-### Value
-The insights and patterns derived from data that lead to business benefits.
-
-### Variety
-The diversity of data types, including unstructured, semi-structured, and raw data.
-
-### Velocity
-The speed at which data is received, stored, and managed.
-
-### Veracity
-The accuracy or truthfulness of data.
-
-## Large Language Model Operations (LLM Ops)
-
-The emerging field of Large Language Model Operations (LLM Ops) inherits many practices from data engineering. LLM Ops involves the deployment, monitoring, and maintenance of systems using LLMs to manage and build new generation of AI powered applications. 
-
-For more in-depth information on LLM Ops, see [Resource Name](link-to-resource).
--- a/docs/data_ingestion.md
+++ b/docs/data_ingestion.md
@ -1,46 +0,0 @@
-# How data ingestion with cognee works
-
-
-
-
-# Why bother with data ingestion?
-
-In order to use cognee, you need to ingest data into the cognee data store. 
-This data can be events, customer data, or third-party data. 
-
-In order to build reliable models and pipelines, we need to structure and process various types of datasets and data sources in the same way.
-Some of the operations like normalization, deduplication, and data cleaning are common across all data sources.
-
-
-This is where cognee comes in. It provides a unified interface to ingest data from various sources and process it in a consistent way.
-For this we use dlt (Data Loading Tool) which is a part of cognee infrastructure.
-
-
-# Example
-
-Let's say you have a dataset of customer reviews in a PDF file. You want to ingest this data into cognee and use it to train a model.
-
-You can use the following code to ingest the data:
-
-```python
-dataset_name = "artificial_intelligence"
-
-ai_text_file_path = os.path.join(pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf")
-await cognee.add([ai_text_file_path], dataset_name)
-
-```
-
-cognee uses dlt to ingest the data and allows you to use:
-
-1. SQL databases. Supports PostgreSQL, MySQL, MS SQL Server, BigQuery, Redshift, and more.
-2. REST API generic source. Loads data from REST APIs using declarative configuration.
-3. OpenAPI source generator. Generates a source from an OpenAPI 3.x spec using the REST API source.
-4. Cloud and local storage. Retrieves data from AWS S3, Google Cloud Storage, Azure Blob Storage, local files, and more.
-
-
-
-# What happens under the hood?
-
-We use dlt as a loader to ingest data into the cognee metadata store. We can ingest data from various sources like SQL databases, REST APIs, OpenAPI specs, and cloud storage.
-This enables us to have a common data model we can then use to build models and pipelines.
-The models and pipelines we build in this way end up in the cognee data store, which is a unified interface to access the data.
--- a/docs/img/architecture.png
+++ b/docs/img/architecture.png
--- a/docs/img/bad_architecture.png
+++ b/docs/img/bad_architecture.png
--- a/docs/img/enrichment.png
+++ b/docs/img/enrichment.png
--- a/docs/img/good_architecture.png
+++ b/docs/img/good_architecture.png
--- a/docs/img/graph_example.png
+++ b/docs/img/graph_example.png
--- a/docs/img/graph_structure.png
+++ b/docs/img/graph_structure.png
--- a/docs/img/linguistic_analysis.png
+++ b/docs/img/linguistic_analysis.png
--- a/docs/img/loaders.png
+++ b/docs/img/loaders.png
--- a/docs/img/pipelines.png
+++ b/docs/img/pipelines.png
--- a/docs/img/roadmap.png
+++ b/docs/img/roadmap.png
--- a/docs/img/sources.png
+++ b/docs/img/sources.png
--- a/docs/img/team.png
+++ b/docs/img/team.png
--- a/docs/img/vector_dbs.png
+++ b/docs/img/vector_dbs.png
--- a/docs/index.md
+++ b/docs/index.md
@ -1,31 +0,0 @@
-# New to cognee?
-
-The getting started guide covers adding a cognee data store to your AI app, sending data, identifying users, extracting actions and insights, and interconnecting separate datasets.
-
-[Get started](quickstart.md)
-
-## Ingest Data
-
-Learn how to manage the ingestion of events, customer data, or third-party data for use with cognee.
-
-[Explore](data_ingestion.md)
-
-## Tasks and Pipelines
-
-Analyze and enrich your data and improve LLM answers with a series of tasks and pipelines.
-
-[Learn about tasks](templates.md)
-
-## API
-
-Push or pull data to build custom functionality or create bespoke views for your business needs.
-
-[Explore](api_reference.md)
-
-## Resources
-
-### Resources
-
- [Research](research.md)
- [Community](https://discord.gg/52QTb5JK){:target="_blank"}
-
--- a/docs/learning/graph.md
+++ b/docs/learning/graph.md
--- a/docs/learning/graph_example.png
+++ b/docs/learning/graph_example.png
--- a/docs/learning/loader.md
+++ b/docs/learning/loader.md
--- a/docs/learning/search.md
+++ b/docs/learning/search.md
--- a/docs/learning/sources.md
+++ b/docs/learning/sources.md
--- a/docs/learning/vector.md
+++ b/docs/learning/vector.md
--- a/docs/local_models.md
+++ b/docs/local_models.md
@ -1,60 +0,0 @@
-# Running cognee with local models
-
-## 🚀 Getting Started with Local Models
-
-You'll need to run the local model on your machine or use one of the providers hosting the model.
-!!! note "We had some success with mixtral, but 7b models did not work well. We recommend using mixtral for now."
-
-### Ollama 
-
-Set up Ollama by following instructions on [Ollama website](https://ollama.com/)
-
-
-Set the environment variable in your .env to use the model
-
-```bash
-LLM_PROVIDER = 'ollama'
-
-```
-Otherwise, you can set the configuration for the model:
-
-```bash
-cognee.config.llm_provider = 'ollama'
-
-```
-You can also set the HOST and model name:
-
-```bash
-
-cognee.config.llm_endpoint = "http://localhost:11434/v1"
-cognee.config.llm_model = "mistral:instruct"
-```
-
-
-### Anyscale
-
-```bash
-LLM_PROVIDER = 'custom'
-
-```
-Otherwise, you can set the configuration for the model:
-
-```bash
-cognee.config.llm_provider = 'custom'
-
-```
-You can also set the HOST  and model name:
-```bash
-LLM_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-LLM_ENDPOINT = "https://api.endpoints.anyscale.com/v1"
-LLM_API_KEY = "your_api_key"
-```
-
-You can set the same way HOST and model name for any other provider that has an API endpoint.
-
-
-
-
-
-
-
--- a/docs/overrides/main.html
+++ b/docs/overrides/main.html
@ -1,17 +0,0 @@
-{% extends "base.html" %}
-
-{% block meta %}
-  {{ super() }}
-  <meta property="og:type" content="website" />
-  <meta property="og:site_name" content="cognee" />
-  <meta property="og:description" content="Deterministic data engine for LLMs" />
-  <meta property="og:title" content="{{ page.title|striptags }}" />
-  <meta property="og:url" content="{{ page.canonical_url }}" />
-
-  <meta name="twitter:card" content="summary_large_image" />
-  <meta name="twitter:site" content="@tricalt" />
-  <meta name="twitter:creator" content="@tricalt" />
-  <meta name="twitter:title" content="{{ page.title|striptags }}" />
-  <meta name="twitter:description" content="desc" />
-
-{% endblock %}
--- a/docs/overrides/partials/integrations/analytics/segment.html
+++ b/docs/overrides/partials/integrations/analytics/segment.html
@ -1,15 +0,0 @@
-<script>
-  var segmentKey = "{{ config.extra.analytics.key }}" 
-
-  /* Wait for page to load and application to mount */
-  document.addEventListener("DOMContentLoaded", function() {
-    try {
-      !function(){var i="analytics",analytics=window[i]=window[i]||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","debug","page","screen","once","off","on","addSourceMiddleware","addIntegrationMiddleware","setAnonymousId","addDestinationMiddleware","register"];analytics.factory=function(e){return function(){if(window[i].initialized)return window[i][e].apply(window[i],arguments);var n=Array.prototype.slice.call(arguments);if(["track","screen","alias","group","page","identify"].indexOf(e)>-1){var c=document.querySelector("link[rel='canonical']");n.push({__t:"bpc",c:c&&c.getAttribute("href")||void 0,p:location.pathname,u:location.href,s:location.search,t:document.title,r:document.referrer})}n.unshift(e);analytics.push(n);return analytics}};for(var n=0;n<analytics.methods.length;n++){var key=analytics.methods[n];analytics[key]=analytics.factory(key)}analytics.load=function(key,n){var t=document.createElement("script");t.type="text/javascript";t.async=!0;t.setAttribute("data-global-segment-analytics-key",i);t.src="https://cdn.segment.com/analytics.js/v1/" + key + "/analytics.min.js";var r=document.getElementsByTagName("script")[0];r.parentNode.insertBefore(t,r);analytics._loadOptions=n};analytics._writeKey=segmentKey;;analytics.SNIPPET_VERSION="5.2.0";
-      analytics.load(segmentKey);
-      analytics.page();
-      }}();
-    } catch (error) {
-      console.error("Failed to load Segment analytics", error);
-    }
-  });
-</script>
--- a/docs/pipelines.md
+++ b/docs/pipelines.md
@ -1,81 +0,0 @@
-# PIPELINES
-
-Cognee uses [tasks](https://github.com/topoteretes/cognee/blob/main/cognee/modules/pipelines/tasks/Task.py) grouped into pipelines that populate graph and vector stores. [These tasks](https://github.com/topoteretes/cognee/tree/main/cognee/tasks) analyze and enrich data, enhancing the quality of answers produced by Large Language Models (LLMs). 
-
-The tasks are managed and executed asynchronously using the `run_tasks` and `run_tasks_parallel` functions.
-
-```python
-pipeline = run_tasks(tasks, documents)
-async for result in pipeline:
-    print(result)
-```
-
-## Main pipeline: [cognee.cognify](https://github.com/topoteretes/cognee/blob/168cb5d1bf1964b5b0c645b2f3d8638d84554fda/cognee/api/v1/cognify/cognify_v2.py#L38)
-
-This is the main pipeline currently implemented in cognee. It is designed to process data in a structured way and populate the graph and vector stores.
-
-
-This function is the entry point for processing datasets. It handles dataset retrieval, user authorization, and manages the execution of a pipeline of tasks that process documents.
-
-### Parameters
-
- `datasets: Union[str, list[str]] = None`: A string or list of dataset names to be processed.
- `user: User = None`: The user requesting the processing. If not provided, the default user is retrieved.
-
-### Steps in the Function
-
-#### User Authentication
-
-```python
-if user is None:
-    user = await get_default_user()
-```
-
-If no user is provided, the function retrieves the default user.
-
-#### Handling Empty or String Dataset Input
-
-```python
-existing_datasets = await get_datasets(user.id)
-if datasets is None or len(datasets) == 0:
-        datasets = existing_datasets
-if type(datasets[0]) == str:
-        datasets = await get_datasets_by_name(datasets, user.id)
-```
-
-If no datasets are provided, the function retrieves all datasets owned by the user. If a list of dataset names (strings) is provided, they are converted into dataset objects.
-
-#### Selecting datasets from the input list that are owned by the user
-
-```python
-existing_datasets_map = {
-        generate_dataset_name(dataset.name): True for dataset in existing_datasets
-    }
-```
-
-#### Run Cognify Pipeline for Each Dataset
-
-```python
-awaitables = []
-
-for dataset in datasets:
-    dataset_name = generate_dataset_name(dataset.name)
-
-    if dataset_name in existing_datasets_map:
-        awaitables.append(run_cognify_pipeline(dataset, user))
-
-return await asyncio.gather(*awaitables)
-
-The `run_cognify_pipeline` function is defined within `cognify` and is responsible for processing a single dataset. This is where most of the heavy lifting occurs. The function processes multiple datasets concurrently using `asyncio.gather`.
-
-
-#### Pipeline Tasks
-
-The pipeline consists of several tasks, each responsible for different parts of the processing:
-
- `classify_documents`: Converts each of the documents into one of the specific Document types: PdfDocument, AudioDocument, ImageDocument or TextDocument
- `check_permissions_on_documents`: Checks if the user has the necessary permissions to access the documents. In this case, it checks for "write" permission.
- `extract_chunks_from_documents`: Extracts text chunks based on the document type.
- `add_data_points`: Creates nodes and edges from the chunks and their properties. Adds them to the graph engine.
- `extract_graph_from_data`: Generates knowledge graphs from the document chunks.
- `summarize_text`: Extracts a summary for each chunk using an llm.
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@ -1,69 +0,0 @@
-# QUICKSTART
-
-!!! tip "To understand how cognee works check out the [conceptual overview](conceptual_overview.md)"
-
-## Setup
-
-To run cognee, you will need the following:
-
-1. OpenAI API key (Ollama or Anyscale could work as [well](local_models.md))
-
-Add your LLM API key to the environment variables
-
-```
-import os
-
-os.environ["LLM_API_KEY"] = "YOUR_OPENAI_API_KEY"
-```
-or 
-```
-cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
-
-```
-If you are using Networkx, create an account on Graphistry to visualize results:
-```
-    cognee.config.set_graphistry_config({
-        "username": "YOUR_USERNAME",
-        "password": "YOUR_PASSWORD"
-    })
-```
-
-If you want to run Postgres instead of Sqlite, run postgres Docker container.
-Navigate to cognee folder and run:
-```
-docker compose up postgres
-```
-
-Add the following environment variables to .env file
-```
-DB_HOST=127.0.0.1
-DB_PORT=5432
-DB_USERNAME=cognee # or any username you want
-DB_PASSWORD=cognee # or any password you want
-DB_NAME=cognee_db # or any db name you want
-DB_PROVIDER=postgres
-```
-
-## Run
-
-cognee is asynchronous by design, meaning that operations like adding information, processing it, and querying it can run concurrently without blocking the execution of other tasks. 
-Make sure to await the results of the functions that you call.
-
-```
-import cognee
-
-text = """Natural language processing (NLP) is an interdisciplinary
-       subfield of computer science and information retrieval"""
-
-await cognee.add(text) # Add a new piece of information
-
-await cognee.cognify() # Use LLMs and cognee to create knowledge
-
-search_results = await cognee.search(SearchType.INSIGHTS, query_text='Tell me about NLP')  # Query cognee for the knowledge
-
-for result_text in search_results:
-    print(result_text)
-```
-
-In the example above, we add a piece of information to cognee, use LLMs to create a GraphRAG, and then query cognee for the knowledge.
-cognee is composable and you can build your own cognee pipelines using our [templates.](templates.md)
--- a/docs/rags.md
+++ b/docs/rags.md
@ -1,78 +0,0 @@
-## RAG Stack
-Core elements of a RAG stack are the building blocks that we can use to get to more personalized and deterministic outputs. 
-
-!!! tip "This is a work in progress and any feedback is welcome"
-## What is a RAG?
-
-!!! note "What is RAG?"
-    RAG stands for Retrieval Augmented Generation. It is a model that combines the power of large language models (LLMs) like GPT-4 with the efficiency of information retrieval systems. The goal of RAG is to generate text that is both fluent and factually accurate by retrieving relevant information from a knowledge base.
-
-To try building a simple RAG and understand the limitations, check out this simple guide with examples: [RAGs: Retrieval-Augmented Generation Explained](rag/rag_explained.md)
-
-## The Building Blocks of a RAG Stack
-
-### 1. Data Sources
-
-You can get your data from a variety of sources, including:
-
-
- APIs like Twitter, Reddit, and Google
- Web scraping tools like Scrapy and Beautiful Soup
- Documents like PDFs, Word, and Excel files
- Relational databases like DuckDB, PSQL and MySQL
- Data warehouses like Snowflake and Databricks
- Customer data platforms like Segment
-<figure markdown>
-![Data Sources](img/sources.png)
-<figcaption>Some data sources</figcaption>
-</figure>
-The goal here is to give the data structure and connect it so that it can be used in your deterministic LLM stack.
-
-### 2. Data Loaders
-<figure markdown>
-![Data Loader](img/loaders.png)
-<figcaption>Data Loaders</figcaption>
-</figure>
-Data loading into a data lake or warehouse involves using tools like Apache Airflow, dlt, dbt, and Databricks. The process includes data extraction, transformation, and loading for model usage, aiming for a clean, structured dataset ready for enrichment.
-Check out how we do it with dlt: [Data Loading Tool (dlt)](dlt/dlt.md)
-### 3. Vector Computation and Vector Stores
-Data is transformed into vectors using OpenAI or custom models. Understanding where to run these models and integrating your computing infrastructure with tools like custom spark pipelines is essential. The aim is to achieve ready-to-use pipelines and models.
-<figure markdown>
-![Vector Stores](img/vector_dbs.png)
-<figcaption>Vector Stores </figcaption>
-</figure>
-Image [Source](https://blog.det.life/why-you-shouldnt-invest-in-vector-databases-c0cd3f59d23c)
-### 4. Graph Computation and Graph Stores
-Creating a knowledge graph from your data allows for querying and information retrieval. It's essential to know how to construct, maintain, and use it for text generation. The aim is an accurate, current, and easily queried knowledge graph.
-<figure markdown>
-![Graph Stores](img/graph_example.png)
-<figcaption>Graph Example</figcaption>
-</figure>
-### 5. Search 
-The process involves querying and retrieving vectors from Vector DBs or hybrid DBs, and using search tools to rank these vectors. The aim is to index vectors and search for relevant ones as needed.
-#### Vector Similarity Search
-Identifies objects with vector representations closest to the query vector, finding the most similar items based on various dimensions of comparison.
-
-#### Image Search
-Utilizes images as the input for conducting a similarity search, analyzing the content of the image to find similar images based on visual features.
-
-#### Keyword Search
-Employs the BM25F algorithm for ranking results based on keyword matches. Relevance is calculated using term frequency, inverse document frequency, and field-length normalization.
-
-#### Hybrid Search
-Merges the BM25 algorithm with vector similarity search techniques to enhance the relevance and accuracy of search results. Leverages both textual and vector-based features for ranking.
-
-#### Generative Search
-Utilizes the outputs of search results as prompts for a Large Language Model (LLM). Can generate summaries, extrapolations, or new content based on the aggregated search results.
-
-#### Reranking
-Involves the application of a reranker module to adjust the initial ranking of search results. Optimizes result relevance based on additional criteria or more complex models.
-
-#### Aggregation
-Involves compiling and summarizing data from a set of search results. Provides insights or overviews based on the collective information found.
-
-#### Filters
-Apply constraints or conditions to the search process to narrow down the results. Filters can be based on specific attributes, metadata, or other criteria relevant to the search domain.
-
-#### Graph Search
-Involves traversing a graph data structure to find specific nodes or paths. It can be used to find relationships between different entities in a knowledge graph.
--- a/docs/research.md
+++ b/docs/research.md
@ -1,62 +0,0 @@
-# Research
-
-The page is dedicated to collecting all research that was collected in the past one year from various sources.
-
-This is not an exhaustive list, and any PRs would be welcome
-
-### Research Papers
- [2024/06/04] [Symbolic reasoning](https://arxiv.org/abs/2402.01817)
- [2024/06/04] [Transformers and episodic memory](https://arxiv.org/abs/2405.14992)
- [2024/03/24] [Graph Chain-of-Thought: Augmenting Large Language Models by Reasoning on Graphs](https://arxiv.org/abs/2404.07103)
- [2024/03/24] [Leave No Context Behind: Efficient Infinite Context Transformers with Infini-attention](https://arxiv.org/abs/2404.07143)
- [2024/03/24] [Compound AI systems](https://bair.berkeley.edu/blog/2024/02/18/compound-ai-systems/)
- [2015/07/30] [Multilayer Network of Language](https://arxiv.org/abs/1507.08539)
- [2023/12/12]  [Dense X Retrieval: What Retrieval Granularity Should We Use?](https://arxiv.org/pdf/2312.06648.pdf)
- [2024/01/05] [Retrieval-Augmented Generation for Large Language Models: A Survey](https://arxiv.org/pdf/2312.10997.pdf)
- [2022/10/20]  [Cognitive modelling with multilayer networks: Insights, advancements and future challenges](https://arxiv.org/pdf/2210.00500.pdf)
- [2023/09/20] CoAla framework and relevant literature [literature](https://github.com/ysymyth/awesome-language-agents)
- [2023/06/09] [Mind2Web: Towards a Generalist Agent for the Web](https://arxiv.org/pdf/2306.06070.pdf), Xiang Deng, et al. [[code]](https://github.com/OSU-NLP-Group/Mind2Web) [[demo]](https://osu-nlp-group.github.io/Mind2Web/)
- [2023/06/28] AI Agents in Langchain [https://docs.google.com/presentation/d/1L_CHsg26sDxPmKj285Ob5T2xsAUejBlfiGQSnsSHTk0/edit#slide=id.g254e571859c_0_164](https://docs.google.com/presentation/d/1L_CHsg26sDxPmKj285Ob5T2xsAUejBlfiGQSnsSHTk0/edit#slide=id.g254e571859c_0_164)
- [2023/06/27] Agent infra [https://lilianweng.github.io/posts/2023-06-23-agent/](https://lilianweng.github.io/posts/2023-06-23-agent/)
- [2023/06/05] [Orca: Progressive Learning from Complex Explanation Traces of GPT-4](https://arxiv.org/pdf/2306.02707.pdf), Subhabrata Mukherjee et al.
- [2023/05/25] 📚[Voyager: An Open-Ended Embodied Agent with Large Language Models](https://arxiv.org/pdf/2305.16291.pdf), Guanzhi Wang, et al. [[code]](https://github.com/MineDojo/Voyager) [[website]](https://voyager.minedojo.org/), Shishir G. Patil, et al.
- [2023/05/24] 📚[Gorilla: Gorilla: Large Language Model Connected with Massive APIs](https://arxiv.org/abs/2305.15334)
- [2023/05/17] 📚[Tree of Thoughts: Deliberate Problem Solving with Large Language Models](https://arxiv.org/abs/2305.10601), Shunyu Yao, et al.[[code]](https://github.com/kyegomez/tree-of-thoughts) [[code-orig]](https://github.com/ysymyth/tree-of-thought-llm)
- [2023/05/12] 📚[MEGABYTE: Predicting Million-byte Sequences with Multiscale Transformers](https://arxiv.org/abs/2305.07185), Lili Yu, et al.
- [2023/05/09] 📚[FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance](https://arxiv.org/abs/2305.05176), Lingjiao Chen, et al.
- [2023/05/01] 📚[Learning to Reason and Memorize with Self-Notes](https://arxiv.org/abs/2305.00833), Jack Lanchantin, et al.
- [2023/04/24] 📚[WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244), Can Xu, et al.
- [2023/04/22] 📚[LLM+P: Empowering Large Language Models with Optimal Planning Proficiency](https://arxiv.org/abs/2304.11477), Bo Liu, et al.
- [2023/04/07] 📚[Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/abs/2304.03442), Joon Sung Park, et al. [[code]](https://github.com/mkturkcan/generative-agents)
- [2023/03/30] [Self-Refine: Iterative Refinement with Self-Feedback](https://arxiv.org/abs/2303.17651), Aman Madaan, et al.[[code]](https://github.com/madaan/self-refine)
- [2023/03/30] [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace](https://arxiv.org/pdf/2303.17580.pdf), Yongliang Shen, et al. [[code]](https://github.com/microsoft/JARVIS) [[demo]](https://huggingface.co/spaces/microsoft/HuggingGPT)
- [2023/03/20] [Reflexion: Language Agents with Verbal Reinforcement Learning](https://arxiv.org/pdf/2303.11366.pdf), Noah Shinn , et al. [[code]](https://github.com/noahshinn024/reflexion)
- [2023/02/23] 📚[Not what you've signed up for: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection](https://arxiv.org/abs/2302.12173), Sahar Abdelnab, et al.
- [2023/02/09] 📚[Toolformer: Language Models Can Teach Themselves to Use Tools](https://arxiv.org/pdf/2302.04761.pdf), Timo Schick, et al. [[code]](https://github.com/lucidrains/toolformer-pytorch)
- [2022/12/12] 📚[LMQL: Prompting Is Programming: A Query Language for Large Language Models](https://arxiv.org/abs/2212.06094), Luca Beurer-Kellner, et al.
- [2022/10/06] [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/pdf/2210.03629.pdf), Shunyu Yao, et al. [[code]](https://github.com/ysymyth/ReAct)
- [2022/07/12] 📚[Inner Monologue: Embodied Reasoning through Planning with Language Models](https://arxiv.org/pdf/2207.05608.pdf), Wenlong Huang, et al. [[demo]](https://innermonologue.github.io/)
- [2022/04/04] [Do As I Can, Not As I Say: Grounding Language in Robotic Affordances](https://github.com/Significant-Gravitas/Nexus/wiki/Awesome-Resources), Michael Ahn, e al. [[demo]](https://say-can.github.io/)
- [2021/12/17] [WebGPT: Browser-assisted question-answering with human feedback](https://arxiv.org/pdf/2112.09332.pdf), Reiichiro Nakano, et al.
- [2021/06/17] 📚[LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685), Edward J. Hu, et al.
- [2023/04/03] [Generative Agents](https://arxiv.org/abs/2304.03442)
- [2023/05/17] [Three of thought: Deliberate Problem Solving with Large Language Mode](https://arxiv.org/abs/2305.10601)ls
-
-
-### Knowledge Graphs
-
- [2023/06/09] [Taxonomies: Overview](https://www.brighttalk.com/webcast/9273/605659?utm_source=brighttalk-portal&utm_medium=web&utm_campaign=topic&utm_content=upcoming)
-
-### Blog Articles
-
- [2023/04/29] [AUTO-GPT: UNLEASHING THE POWER OF AUTONOMOUS AI AGENTS](https://www.leewayhertz.com/autogpt/) By Akash Takyar
- [2023/04/20] [Conscious Machines: Experiments, Theory, and Implementations(Chinese)](https://pattern.swarma.org/article/230) By Jiang Zhang
- [2023/04/18] [Autonomous Agents & Agent Simulations](https://blog.langchain.dev/agents-round/) By Langchain
- [2023/04/16] [4 Autonomous AI Agents you need to know](https://towardsdatascience.com/4-autonomous-ai-agents-you-need-to-know-d612a643fa92) By Sophia Yang
- [2023/03/31] [ChatGPT that learns to use tools](https://zhuanlan.zhihu.com/p/618448188) By Haojie Pan
-
-### Talks
-
- [2023/06/05] [Two Paths to Intelligence](https://www.youtube.com/watch?v=rGgGOccMEiY&t=1497s) by Geoffrey Hinton
- [2023/05/24] [State of GPT](https://www.youtube.com/watch?v=bZQun8Y4L2A) by Andrej Karpathy | OpenAI
- [2024/03/15] Podcast on AI, Memory by Bill Gurley
--- a/docs/search.md
+++ b/docs/search.md
@ -1,21 +0,0 @@
-## Cognee Search Module
-
-This module contains the search function that is used to search for nodes in the graph. It supports various search types and integrates with user permissions to filter results accordingly.
-
-### Search Types
-
-The `SearchType` enum defines the different types of searches that can be performed:
-
- `INSIGHTS`: Search for insights from the knowledge graph.
- `SUMMARIES`: Search for summaries of the texts provided.
- `CHUNKS`: Search for the whole chunks of data.
-
-
-### Search Function
-
-The `search` function is the main entry point for performing a search. It handles user authentication, retrieves document IDs for the user, and filters the search results based on user permissions.
-
-```python
-from cognee import search, SearchType
-await search(SearchType.INSIGHTS, "your_query")
-```
--- a/docs/stylesheets/extra.css
+++ b/docs/stylesheets/extra.css
@ -1,51 +0,0 @@
-
-[data-md-color-scheme = "cognee"] {
-  color-scheme: dark;
-
-  --md-default-bg-color: #0C0121;
-  --md-default-bg-color--light: #240067;
-  
-  --md-default-fg-color: #57DFD7;
-  --md-default-fg-color--light: #85ded8;
-  --md-default-fg-color--dark:  #4dc6be;
-
-  /* --md-primary-fg-color: #0C0121; */
-  --md-primary-fg-color: #7233BA;
-  --md-primary-fg-color--light: #8a49d4;
-  --md-primary-fg-color--dark: #522488;
-  /* --md-primary-bg-color:               hsla(0, 0%, 100%, 1);
-  --md-primary-bg-color--light: */
-
-  --md-accent-fg-color: #41a29b;
-
-  --md-typeset-color: white;
-  --md-typeset-a-color: #57DFD7;
-
-  --md-footer-bg-color: #0C0121;
-  --md-footer-bg-color--dark: #0C0121;
-}
-
-.md-header {
-  background-color: var(--md-default-bg-color);
-}
-
-/* Remove unnecessary title from the header */
-.md-header__title {
-  display: none;
-}
-/* Spread header elements evenly when there is no title */
-.md-header__inner {
-  justify-content: space-between;
-}
-
-.md-tabs {
-  background-color: var(--md-default-bg-color);
-}
-
-.md-button--primary:hover {
-  background-color: #8a49d4 !important;
-}
-
-.md-typeset .md-button {
-  border-radius: 32px;
-}
--- a/docs/team.md
+++ b/docs/team.md
@ -1,4 +0,0 @@
-# Team
-
-
-![About us](img/team.png)
--- a/docs/why.md
+++ b/docs/why.md
@ -1,29 +0,0 @@
-# Why use cognee?
-
-cognee is one of the first OSS tools that enables easy, scalable and flexible use of LLMs to process large volumes of documents using GraphRAG approach. 
-
-LLMs don't have a semantic layer, and they don't have a way to understand the data they are processing. This is where cognee comes in. 
-We let you define logical structures for your data and then use these structures to guide the LLMs to process the data in a way that makes sense to you.
-
-cognee helps you avoid the overly complicated set of tools and processes to give you somewhat reliable output
-
-
-***From***
-
-![Bad Architecture](img/bad_architecture.png)
-
-***To***
-
-![Good Architecture](img/good_architecture.png)
-
-??? note "Why use cognee?"
-
-    Its hard to answer the question of why use cognee without answering why you need thin LLM frameworks in the first place.:)
- - **Cost-effective** — cognee extends the capabilities of your LLMs without the need for expensive data processing tools.
- - **Self-contained** — cognee runs as a simple-to-use library meaning you can add it to your application easily
- - **Easy to use** — Navigate graphs instead of embeddings to understand your data faster and better
- - **Flexible** — cognee lets you control your input and provide your own Pydantic data models.
-
-
-
-
--- a/evals/deepeval_metrics.py
+++ b/evals/deepeval_metrics.py
@ -1,14 +1,72 @@
-from deepeval.metrics import GEval
-from deepeval.test_case import LLMTestCaseParams
+from deepeval.metrics import BaseMetric, GEval
+from deepeval.test_case import LLMTestCase, LLMTestCaseParams
+
+from evals.official_hotpot_metrics import exact_match_score, f1_score

 correctness_metric = GEval(
-        name="Correctness",
-        model="gpt-4o-mini",
-        evaluation_params=[
-            LLMTestCaseParams.ACTUAL_OUTPUT,
-            LLMTestCaseParams.EXPECTED_OUTPUT
-        ],
-        evaluation_steps=[
-           "Determine whether the actual output is factually correct based on the expected output."    
-        ]
-    )
+    name="Correctness",
+    model="gpt-4o-mini",
+    evaluation_params=[
+        LLMTestCaseParams.ACTUAL_OUTPUT,
+        LLMTestCaseParams.EXPECTED_OUTPUT
+    ],
+    evaluation_steps=[
+        "Determine whether the actual output is factually correct based on the expected output."    
+    ]
+)
+
+
+class f1_score_metric(BaseMetric):
+
+    """F1 score taken directly from the official hotpot benchmark 
+    implementation and wrapped into a deepeval metric."""
+    
+    def __init__(self, threshold: float = 0.5):
+        self.threshold = threshold
+
+    def measure(self, test_case: LLMTestCase):
+        f1, precision, recall = f1_score(
+            prediction=test_case.actual_output,
+            ground_truth=test_case.expected_output,
+        )
+        self.score = f1
+        self.success = self.score >= self.threshold
+        return self.score
+
+    # Reusing regular measure as async F1 score is not implemented
+    async def a_measure(self, test_case: LLMTestCase):
+        return self.measure(test_case)
+
+    def is_successful(self):
+        return self.success
+
+    @property
+    def __name__(self):
+        return "Official hotpot F1 score"
+    
+class em_score_metric(BaseMetric):
+
+    """Exact Match score taken directly from the official hotpot benchmark 
+    implementation and wrapped into a deepeval metric."""
+    
+    def __init__(self, threshold: float = 0.5):
+        self.threshold = threshold
+
+    def measure(self, test_case: LLMTestCase):
+        self.score = exact_match_score(
+            prediction=test_case.actual_output,
+            ground_truth=test_case.expected_output,
+        )
+        self.success = self.score >= self.threshold
+        return self.score
+
+    # Reusing regular measure as async F1 score is not implemented
+    async def a_measure(self, test_case: LLMTestCase):
+        return self.measure(test_case)
+
+    def is_successful(self):
+        return self.success
+
+    @property
+    def __name__(self):
+        return "Official hotpot EM score"
--- a/evals/eval_on_hotpot.py
+++ b/evals/eval_on_hotpot.py
@ -111,7 +111,9 @@ if __name__ == "__main__":
    
    parser.add_argument("--with_cognee", action="store_true")
    parser.add_argument("--num_samples", type=int, default=500)
-    parser.add_argument("--metric", type=str, default="correctness_metric")
+    parser.add_argument("--metric", type=str, default="correctness_metric",
+                        help="Valid options are Deepeval metrics (e.g. AnswerRelevancyMetric) \
+                              and metrics defined in evals/deepeval_metrics.py, e.g. f1_score_metric")
    
    args = parser.parse_args()

@ -120,6 +122,8 @@ if __name__ == "__main__":
        metric = metric_cls()
    except AttributeError:
        metric = getattr(evals.deepeval_metrics, args.metric)
+        if isinstance(metric, type):
+            metric = metric()
    
    if args.with_cognee:
        answer_provider = answer_with_cognee
--- a/evals/official_hotpot_metrics.py
+++ b/evals/official_hotpot_metrics.py
@ -0,0 +1,86 @@
+"""
+These are the official evaluation metrics for HotpotQA taken from https://hotpotqa.github.io/
+"""
+
+import re
+import string
+import sys
+from collections import Counter
+
+import ujson as json
+
+
+def normalize_answer(s):
+
+    def remove_articles(text):
+        return re.sub(r'\b(a|an|the)\b', ' ', text)
+
+    def white_space_fix(text):
+        return ' '.join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1_score(prediction, ground_truth):
+    normalized_prediction = normalize_answer(prediction)
+    normalized_ground_truth = normalize_answer(ground_truth)
+
+    ZERO_METRIC = (0, 0, 0)
+
+    if normalized_prediction in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
+        return ZERO_METRIC
+    if normalized_ground_truth in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
+        return ZERO_METRIC
+
+    prediction_tokens = normalized_prediction.split()
+    ground_truth_tokens = normalized_ground_truth.split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return ZERO_METRIC
+    precision = 1.0 * num_same / len(prediction_tokens)
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1, precision, recall
+
+
+def exact_match_score(prediction, ground_truth):
+    return (normalize_answer(prediction) == normalize_answer(ground_truth))
+
+def update_answer(metrics, prediction, gold):
+    em = exact_match_score(prediction, gold)
+    f1, prec, recall = f1_score(prediction, gold)
+    metrics['em'] += float(em)
+    metrics['f1'] += f1
+    metrics['prec'] += prec
+    metrics['recall'] += recall
+    return em, prec, recall
+
+def update_sp(metrics, prediction, gold):
+    cur_sp_pred = set(map(tuple, prediction))
+    gold_sp_pred = set(map(tuple, gold))
+    tp, fp, fn = 0, 0, 0
+    for e in cur_sp_pred:
+        if e in gold_sp_pred:
+            tp += 1
+        else:
+            fp += 1
+    for e in gold_sp_pred:
+        if e not in cur_sp_pred:
+            fn += 1
+    prec = 1.0 * tp / (tp + fp) if tp + fp > 0 else 0.0
+    recall = 1.0 * tp / (tp + fn) if tp + fn > 0 else 0.0
+    f1 = 2 * prec * recall / (prec + recall) if prec + recall > 0 else 0.0
+    em = 1.0 if fp + fn == 0 else 0.0
+    metrics['sp_em'] += em
+    metrics['sp_f1'] += f1
+    metrics['sp_prec'] += prec
+    metrics['sp_recall'] += recall
+    return em, prec, recall
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -1,152 +0,0 @@
-site_name: cognee
-site_author: Vasilije Markovic
-site_description: desc
-repo_name: cognee
-repo_url: https://github.com/topoteretes/cognee
-site_url: https://www.congee.ai
-edit_uri: edit/main/docs/
-copyright: Copyright &copy; 2024 cognee
-theme:
-  name: material
-  logo: assets/logo.png
-  favicon: assets/favicon.png
-  icon:
-    repo: fontawesome/brands/github
-    edit: material/pencil
-    view: material/eye
-    theme:
-    admonition:
-      note: octicons/tag-16
-      abstract: octicons/checklist-16
-      info: octicons/info-16
-      tip: octicons/squirrel-16
-      success: octicons/check-16
-      question: octicons/question-16
-      warning: octicons/alert-16
-      failure: octicons/x-circle-16
-      danger: octicons/zap-16
-      bug: octicons/bug-16
-      example: octicons/beaker-16
-      quote: octicons/quote-16
-  features:
-    - announce.dismiss
-    - content.action.edit
-    - content.action.view
-    - content.code.annotate
-    - content.code.copy
-    - content.code.select
-    - content.tabs.link
-    - content.tooltips
-    - header.autohide
-    - navigation.expand
-    - navigation.footer
-    - navigation.indexes
-    - navigation.instant
-    - navigation.instant.prefetch
-    - navigation.instant.progress
-    - navigation.prune
-    - navigation.sections
-    - navigation.tabs
-    - navigation.top
-    - navigation.tracking
-    - navigation.path
-    - search.highlight
-    - search.share
-    - search.suggest
-    - toc.follow
-    # - toc.integrate
-  palette:
-      - scheme: cognee
-        primary: custom
-  font:
-    text: Roboto
-    code: Roboto Mono
-  custom_dir: docs/overrides
-
-extra:
-  analytics:
-    provider: segment
-    key: !ENV DOCS_SEGMENT_KEY
-
-extra_css:
-  - stylesheets/extra.css
-
-# Extensions
-markdown_extensions:
-  - abbr
-  - admonition
-  - pymdownx.details
-  - attr_list
-  - def_list
-  - footnotes
-  - md_in_html
-  - toc:
-      permalink: true
-  - pymdownx.arithmatex:
-      generic: true
-  - pymdownx.betterem:
-      smart_enable: all
-  - pymdownx.caret
-  - pymdownx.details
-  - pymdownx.emoji:
-      emoji_generator: !!python/name:material.extensions.emoji.to_svg
-      emoji_index: !!python/name:material.extensions.emoji.twemoji
-  - pymdownx.highlight:
-      anchor_linenums: true
-      line_spans: __span
-      pygments_lang_class: true
-  - pymdownx.inlinehilite
-  - pymdownx.keys
-  - pymdownx.magiclink:
-      normalize_issue_symbols: true
-      repo_url_shorthand: true
-      user: tricalt
-      repo: cognee
-  - pymdownx.mark
-  - pymdownx.smartsymbols
-  - pymdownx.snippets:
-      auto_append:
-        - includes/mkdocs.md
-  - pymdownx.superfences:
-      custom_fences:
-        - name: mermaid
-          class: mermaid
-          format: !!python/name:pymdownx.superfences.fence_code_format
-  - pymdownx.tabbed:
-      alternate_style: true
-      combine_header_slug: true
-  - pymdownx.tasklist:
-      custom_checkbox: true
-nav:
-  - Overview:
-    - Overview: 'index.md'
-    - Start here:
-        - Installation: 'quickstart.md'
-        - Add data: 'data_ingestion.md'
-        - Create LLM enriched data store: 'templates.md'
-        - Explore data: 'search.md'
-    - Configuration: 'configuration.md'
-    - What is cognee:
-      - Introduction: 'conceptual_overview.md'
-    - API reference: 'api_reference.md'
-
-
-plugins:
-  - search:
-      separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
-
-  - minify:
-      minify_html: true
-      minify_js: true
-      minify_css: true
-      htmlmin_opts:
-          remove_comments: true
-      cache_safe: true
-
-  - mkdocstrings:
-      handlers:
-        python:
-          options:
-            members_order: alphabetical
-            allow_inspection: true
-            show_bases: true
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.

 [[package]]
 name = "aiofiles"
@ -2893,6 +2893,8 @@ optional = false
 python-versions = "*"
 files = [
    {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
+    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
+    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]

 [package.dependencies]
@ -3103,13 +3105,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (>

 [[package]]
 name = "jupyterlab"
-version = "4.2.6"
+version = "4.3.3"
 description = "JupyterLab computational environment"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "jupyterlab-4.2.6-py3-none-any.whl", hash = "sha256:78dd42cae5b460f377624b03966a8730e3b0692102ddf5933a2a3730c1bc0a20"},
-    {file = "jupyterlab-4.2.6.tar.gz", hash = "sha256:625f3ac19da91f9706baf66df25723b2f1307c1159fc7293035b066786d62a4a"},
+    {file = "jupyterlab-4.3.3-py3-none-any.whl", hash = "sha256:32a8fd30677e734ffcc3916a4758b9dab21b02015b668c60eb36f84357b7d4b1"},
+    {file = "jupyterlab-4.3.3.tar.gz", hash = "sha256:76fa39e548fdac94dc1204af5956c556f54c785f70ee26aa47ea08eda4d5bbcd"},
 ]

 [package.dependencies]
@ -3124,15 +3126,15 @@ jupyter-server = ">=2.4.0,<3"
 jupyterlab-server = ">=2.27.1,<3"
 notebook-shim = ">=0.2"
 packaging = "*"
-setuptools = ">=40.1.0"
+setuptools = ">=40.8.0"
 tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""}
 tornado = ">=6.2.0"
 traitlets = "*"

 [package.extras]
-dev = ["build", "bump2version", "coverage", "hatch", "pre-commit", "pytest-cov", "ruff (==0.3.5)"]
-docs = ["jsx-lexer", "myst-parser", "pydata-sphinx-theme (>=0.13.0)", "pytest", "pytest-check-links", "pytest-jupyter", "sphinx (>=1.8,<7.3.0)", "sphinx-copybutton"]
-docs-screenshots = ["altair (==5.3.0)", "ipython (==8.16.1)", "ipywidgets (==8.1.2)", "jupyterlab-geojson (==3.4.0)", "jupyterlab-language-pack-zh-cn (==4.1.post2)", "matplotlib (==3.8.3)", "nbconvert (>=7.0.0)", "pandas (==2.2.1)", "scipy (==1.12.0)", "vega-datasets (==0.9.0)"]
+dev = ["build", "bump2version", "coverage", "hatch", "pre-commit", "pytest-cov", "ruff (==0.6.9)"]
+docs = ["jsx-lexer", "myst-parser", "pydata-sphinx-theme (>=0.13.0)", "pytest", "pytest-check-links", "pytest-jupyter", "sphinx (>=1.8,<8.1.0)", "sphinx-copybutton"]
+docs-screenshots = ["altair (==5.4.1)", "ipython (==8.16.1)", "ipywidgets (==8.1.5)", "jupyterlab-geojson (==3.4.0)", "jupyterlab-language-pack-zh-cn (==4.2.post3)", "matplotlib (==3.9.2)", "nbconvert (>=7.0.0)", "pandas (==2.2.3)", "scipy (==1.14.1)", "vega-datasets (==0.9.0)"]
 test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter (>=0.5.3)", "pytest-timeout", "pytest-tornasync", "requests", "requests-cache", "virtualenv"]
 upgrade-extension = ["copier (>=9,<10)", "jinja2-time (<0.3)", "pydantic (<3.0)", "pyyaml-include (<3.0)", "tomli-w (<2.0)"]

@ -4529,18 +4531,18 @@ twitter = ["twython"]

 [[package]]
 name = "notebook"
-version = "7.2.2"
+version = "7.3.1"
 description = "Jupyter Notebook - A web-based notebook environment for interactive computing"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c"},
-    {file = "notebook-7.2.2.tar.gz", hash = "sha256:2ef07d4220421623ad3fe88118d687bc0450055570cdd160814a59cf3a1c516e"},
+    {file = "notebook-7.3.1-py3-none-any.whl", hash = "sha256:212e1486b2230fe22279043f33c7db5cf9a01d29feb063a85cb139747b7c9483"},
+    {file = "notebook-7.3.1.tar.gz", hash = "sha256:84381c2a82d867517fd25b86e986dae1fe113a70b98f03edff9b94e499fec8fa"},
 ]

 [package.dependencies]
 jupyter-server = ">=2.4.0,<3"
-jupyterlab = ">=4.2.0,<4.3"
+jupyterlab = ">=4.3.2,<4.4"
 jupyterlab-server = ">=2.27.1,<3"
 notebook-shim = ">=0.2,<0.3"
 tornado = ">=6.2.0"
@ -6889,28 +6891,29 @@ files = [

 [[package]]
 name = "ruff"
-version = "0.2.2"
+version = "0.8.2"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0a9efb032855ffb3c21f6405751d5e147b0c6b631e3ca3f6b20f917572b97eb6"},
-    {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d450b7fbff85913f866a5384d8912710936e2b96da74541c82c1b458472ddb39"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecd46e3106850a5c26aee114e562c329f9a1fbe9e4821b008c4404f64ff9ce73"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e22676a5b875bd72acd3d11d5fa9075d3a5f53b877fe7b4793e4673499318ba"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1695700d1e25a99d28f7a1636d85bafcc5030bba9d0578c0781ba1790dbcf51c"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b0c232af3d0bd8f521806223723456ffebf8e323bd1e4e82b0befb20ba18388e"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f63d96494eeec2fc70d909393bcd76c69f35334cdbd9e20d089fb3f0640216ca"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a61ea0ff048e06de273b2e45bd72629f470f5da8f71daf09fe481278b175001"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1439c8f407e4f356470e54cdecdca1bd5439a0673792dbe34a2b0a551a2fe3"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:940de32dc8853eba0f67f7198b3e79bc6ba95c2edbfdfac2144c8235114d6726"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c126da55c38dd917621552ab430213bdb3273bb10ddb67bc4b761989210eb6e"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3b65494f7e4bed2e74110dac1f0d17dc8e1f42faaa784e7c58a98e335ec83d7e"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1ec49be4fe6ddac0503833f3ed8930528e26d1e60ad35c2446da372d16651ce9"},
-    {file = "ruff-0.2.2-py3-none-win32.whl", hash = "sha256:d920499b576f6c68295bc04e7b17b6544d9d05f196bb3aac4358792ef6f34325"},
-    {file = "ruff-0.2.2-py3-none-win_amd64.whl", hash = "sha256:cc9a91ae137d687f43a44c900e5d95e9617cb37d4c989e462980ba27039d239d"},
-    {file = "ruff-0.2.2-py3-none-win_arm64.whl", hash = "sha256:c9d15fc41e6054bfc7200478720570078f0b41c9ae4f010bcc16bd6f4d1aacdd"},
-    {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"},
+    {file = "ruff-0.8.2-py3-none-linux_armv6l.whl", hash = "sha256:c49ab4da37e7c457105aadfd2725e24305ff9bc908487a9bf8d548c6dad8bb3d"},
+    {file = "ruff-0.8.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ec016beb69ac16be416c435828be702ee694c0d722505f9c1f35e1b9c0cc1bf5"},
+    {file = "ruff-0.8.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f05cdf8d050b30e2ba55c9b09330b51f9f97d36d4673213679b965d25a785f3c"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60f578c11feb1d3d257b2fb043ddb47501ab4816e7e221fbb0077f0d5d4e7b6f"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbd5cf9b0ae8f30eebc7b360171bd50f59ab29d39f06a670b3e4501a36ba5897"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b402ddee3d777683de60ff76da801fa7e5e8a71038f57ee53e903afbcefdaa58"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:705832cd7d85605cb7858d8a13d75993c8f3ef1397b0831289109e953d833d29"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32096b41aaf7a5cc095fa45b4167b890e4c8d3fd217603f3634c92a541de7248"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e769083da9439508833cfc7c23e351e1809e67f47c50248250ce1ac52c21fb93"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fe716592ae8a376c2673fdfc1f5c0c193a6d0411f90a496863c99cd9e2ae25d"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:81c148825277e737493242b44c5388a300584d73d5774defa9245aaef55448b0"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d261d7850c8367704874847d95febc698a950bf061c9475d4a8b7689adc4f7fa"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1ca4e3a87496dc07d2427b7dd7ffa88a1e597c28dad65ae6433ecb9f2e4f022f"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:729850feed82ef2440aa27946ab39c18cb4a8889c1128a6d589ffa028ddcfc22"},
+    {file = "ruff-0.8.2-py3-none-win32.whl", hash = "sha256:ac42caaa0411d6a7d9594363294416e0e48fc1279e1b0e948391695db2b3d5b1"},
+    {file = "ruff-0.8.2-py3-none-win_amd64.whl", hash = "sha256:2aae99ec70abf43372612a838d97bfe77d45146254568d94926e8ed5bbb409ea"},
+    {file = "ruff-0.8.2-py3-none-win_arm64.whl", hash = "sha256:fb88e2a506b70cfbc2de6fae6681c4f944f7dd5f2fe87233a7233d888bad73e8"},
+    {file = "ruff-0.8.2.tar.gz", hash = "sha256:b84f4f414dda8ac7f75075c1fa0b905ac0ff25361f42e6d5da681a465e0f78e5"},
 ]

 [[package]]
@ -7183,13 +7186,13 @@ win32 = ["pywin32"]

 [[package]]
 name = "sentry-sdk"
-version = "2.19.0"
+version = "2.19.2"
 description = "Python client for Sentry (https://sentry.io)"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "sentry_sdk-2.19.0-py2.py3-none-any.whl", hash = "sha256:7b0b3b709dee051337244a09a30dbf6e95afe0d34a1f8b430d45e0982a7c125b"},
-    {file = "sentry_sdk-2.19.0.tar.gz", hash = "sha256:ee4a4d2ae8bfe3cac012dcf3e4607975904c137e1738116549fc3dbbb6ff0e36"},
+    {file = "sentry_sdk-2.19.2-py2.py3-none-any.whl", hash = "sha256:ebdc08228b4d131128e568d696c210d846e5b9d70aa0327dec6b1272d9d40b84"},
+    {file = "sentry_sdk-2.19.2.tar.gz", hash = "sha256:467df6e126ba242d39952375dd816fbee0f217d119bf454a8ce74cf1e7909e8d"},
 ]

 [package.dependencies]
@ -8868,4 +8871,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9.0,<3.12"
-content-hash = "c1f30981f79db94213a89aec3207f0b4775944968e97dda8aa49c3aa143ce7b5"
+content-hash = "18d78e556471b4b63c948138233ef4b38bba02f649a469180c2b1c292f0d61df"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -103,7 +103,7 @@ notebook = {version = "^7.1.0", optional = true}
 deptry = "^0.20.0"
 debugpy = "1.8.2"
 pylint = "^3.0.3"
-ruff = "^0.2.2"
+ruff = ">=0.2.2,<0.9.0"
 tweepy = "4.14.0"
 gitpython = "^3.1.43"
				`@ -1 +0,0 @@`
				`A multilayer graph network is a sophisticated structure used to model complex systems where entities and their interactions can exist in multiple layers, each representing a different type of relationship, context, or domain. Unlike traditional graphs that capture connections in a single, uniform setting, multilayer graphs provide a more nuanced framework, allowing for the representation of diverse interconnections and dependencies across various dimensions or layers.`