Merge dev with main (#921)

## Description Merge changes on main to dev ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-06-07 16:48:47 +02:00 · 2025-06-07 16:48:47 +02:00 · 84c7aeb1a5
commit 84c7aeb1a5
parent 1ed6cfd918
14 changed files with 69 additions and 1047 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,2 @@
 # ignore jupyter notebooks in the language bar on github
 notebooks/** linguist-vendored
--- a/.github/workflows/notebooks_tests.yml
+++ b/.github/workflows/notebooks_tests.yml
@ -11,30 +11,9 @@ jobs:
 #      notebook-location: notebooks/cognee_demo.ipynb
 #    secrets: inherit
  run-llama-index-integration:
    name: LlamaIndex Integration Notebook
    uses: ./.github/workflows/reusable_notebook.yml
    with:
      notebook-location: notebooks/llama_index_cognee_integration.ipynb
    secrets: inherit
  run-cognee-llama-index:
    name: Cognee LlamaIndex Notebook
    uses: ./.github/workflows/reusable_notebook.yml
    with:
      notebook-location: notebooks/cognee_llama_index.ipynb
    secrets: inherit
  run-cognee-multimedia:
    name: Cognee Multimedia Notebook
    uses: ./.github/workflows/reusable_notebook.yml
    with:
      notebook-location: notebooks/cognee_multimedia_demo.ipynb
    secrets: inherit
 #  run-graphrag-vs-rag:
 #    name: Graphrag vs Rag notebook
 #    uses: ./.github/workflows/reusable_notebook.yml
 #    with:
 #      notebook-location: notebooks/graphrag_vs_rag.ipynb
 #    secrets: inherit
--- a/README.md
+++ b/README.md
@ -13,6 +13,8 @@
  <a href="https://cognee.ai">Learn more</a>
  ·
  <a href="https://discord.gg/NQPKmU5CCg">Join Discord</a>
  ·
  <a href="https://www.reddit.com/r/AIMemory/">Join r/AIMemory</a>
  </p>
@ -46,12 +48,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
 <div style="text-align: center">
  <img src="https://raw.githubusercontent.com/topoteretes/cognee/refs/heads/main/assets/cognee_benefits.png" alt="Why cognee?" width="50%" />
 </div>
 </div>
 ## Features
 - Interconnect and retrieve your past conversations, documents, images and audio transcriptions
@ -61,7 +60,7 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
 ## Get Started
-Get started quickly with a Google Colab  <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a>  or  <a href="https://github.com/topoteretes/cognee-starter">starter repo</a>
+Get started quickly with a Google Colab  <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or  <a href="https://github.com/topoteretes/cognee-starter">starter repo</a>
 ## Contributing
@ -141,7 +140,15 @@ Example output:
 ```
-### cognee UI
+## Our paper is out! <a href="https://arxiv.org/abs/2505.24478" target="_blank" rel="noopener noreferrer">Read here</a>
 <div style="text-align: center">
  <img src="assets/cognee-paper.png" alt="cognee paper" width="100%" />
 </div>
 </div>
 ## Cognee UI
 You can also cognify your files and query using cognee UI.
--- a/alembic/versions/482cd6517ce4_add_default_user.py
+++ b/alembic/versions/482cd6517ce4_add_default_user.py
@ -12,6 +12,8 @@ from sqlalchemy.util import await_only
 from cognee.modules.users.methods import create_default_user, delete_user
 from fastapi_users.exceptions import UserAlreadyExists
 # revision identifiers, used by Alembic.
 revision: str = "482cd6517ce4"
@ -21,7 +23,10 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"
 def upgrade() -> None:
-    await_only(create_default_user())
+    try:
        await_only(create_default_user())
    except UserAlreadyExists:
        pass  # It's fine if the default user already exists
 def downgrade() -> None:
--- a/assets/cognee-paper.png
+++ b/assets/cognee-paper.png
--- a/cognee-frontend/src/modules/ingestion/DatasetsView/DatasetsView.tsx
+++ b/cognee-frontend/src/modules/ingestion/DatasetsView/DatasetsView.tsx
@ -53,7 +53,7 @@ export default function DatasetsView({
    setExplorationDataset(dataset);
    showExplorationWindow();
  }
-  
+
  return (
    <>
      <Stack orientation="vertical" gap="4">
@ -95,10 +95,10 @@ export default function DatasetsView({
          </DatasetItem>
        ))}
      </Stack>
-      <Modal onClose={hideExplorationWindow} isOpen={isExplorationWindowShown} className={styles.explorerModal}>
+      <Modal closeOnBackdropClick={false} onClose={hideExplorationWindow} isOpen={isExplorationWindowShown} className={styles.explorerModal}>
        <Spacer horizontal="2" vertical="3" wrap>
          <Text>{dataset?.name}</Text>
-        </Spacer> 
+        </Spacer>
        <Explorer dataset={dataset!} />
      </Modal>
    </>
--- a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
+++ b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
@ -3,7 +3,7 @@
 import { v4 } from 'uuid';
 import classNames from 'classnames';
 import { useCallback, useEffect, useState } from 'react';
-import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean } from 'ohmy-ui';
+import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean, Input } from 'ohmy-ui';
 import { fetch } from '@/utils';
 import styles from './SearchView.module.css';
 import getHistory from '@/modules/chat/getHistory';
@ -33,8 +33,15 @@ export default function SearchView() {
  }, {
    value: 'RAG_COMPLETION',
    label: 'Completion using RAG',
  }, {
    value: 'GRAPH_COMPLETION_COT',
    label: 'Cognee\'s Chain of Thought search',
  }, {
    value: 'GRAPH_COMPLETION_CONTEXT_EXTENSION',
    label: 'Cognee\'s Multi-Hop search',
  }];
  const [searchType, setSearchType] = useState(searchOptions[0]);
  const [rangeValue, setRangeValue] = useState(10);
  const scrollToBottom = useCallback(() => {
    setTimeout(() => {
@ -90,6 +97,7 @@ export default function SearchView() {
      body: JSON.stringify({
        query: inputValue.trim(),
        searchType: searchTypeValue,
        topK: rangeValue,
      }),
    })
      .then((response) => response.json())
@ -108,7 +116,7 @@ export default function SearchView() {
      .catch(() => {
        setInputValue(inputValue);
      });
-  }, [inputValue, scrollToBottom, searchType.value]);
+  }, [inputValue, rangeValue, scrollToBottom, searchType.value]);
  const {
    value: isInputExpanded,
@ -122,6 +130,10 @@ export default function SearchView() {
    }
  };
  const handleRangeValueChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    setRangeValue(parseInt(event.target.value));
  };
  return (
    <Stack className={styles.searchViewContainer}>
      <DropdownSelect<SelectOption>
@ -146,9 +158,15 @@ export default function SearchView() {
        </Stack>
      </div>
      <form onSubmit={handleSearchSubmit}>
-        <Stack orientation="horizontal" align="end/" gap="2">
+        <Stack orientation="vertical" gap="2">
          <TextArea onKeyUp={handleSubmitOnEnter} style={{ transition: 'height 0.3s ease', height: isInputExpanded ? '128px' : '38px' }} onFocus={expandInput} onBlur={contractInput} value={inputValue} onChange={handleInputChange} name="searchInput" placeholder="Search" />
-          <CTAButton hugContent type="submit">Search</CTAButton>
+          <Stack orientation="horizontal" gap="between">
            <Stack orientation="horizontal" gap="2" align="center">
              <label><Text>Search range: </Text></label>
              <Input style={{ maxWidth: "90px" }} value={rangeValue} onChange={handleRangeValueChange} type="number" />
            </Stack>
            <CTAButton hugContent type="submit">Search</CTAButton>
          </Stack>
        </Stack>
      </form>
    </Stack>
--- a/cognee-frontend/tsconfig.json
+++ b/cognee-frontend/tsconfig.json
@ -1,6 +1,10 @@
 {
  "compilerOptions": {
-    "lib": ["dom", "dom.iterable", "esnext"],
+    "lib": [
      "dom",
      "dom.iterable",
      "esnext"
    ],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
@ -18,9 +22,19 @@
      }
    ],
    "paths": {
-      "@/*": ["./src/*"]
+      "@/*": [
-    }
+        "./src/*"
      ]
    },
    "target": "ES2017"
  },
-  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "include": [
-  "exclude": ["node_modules"]
+    "next-env.d.ts",
    "**/*.ts",
    "**/*.tsx",
    ".next/types/**/*.ts"
  ],
  "exclude": [
    "node_modules"
  ]
 }
--- a/cognee/api/v1/search/routers/get_search_router.py
+++ b/cognee/api/v1/search/routers/get_search_router.py
@ -1,5 +1,5 @@
 from uuid import UUID
-from typing import Optional, Union
+from typing import Optional
 from datetime import datetime
 from fastapi import Depends, APIRouter
 from fastapi.responses import JSONResponse
@ -17,6 +17,7 @@ class SearchPayloadDTO(InDTO):
    datasets: Optional[list[str]] = None
    dataset_ids: Optional[list[UUID]] = None
    query: str
    top_k: Optional[int] = 10
 def get_search_router() -> APIRouter:
@ -49,6 +50,7 @@ def get_search_router() -> APIRouter:
                user=user,
                datasets=payload.datasets,
                dataset_ids=payload.dataset_ids,
                top_k=payload.top_k,
            )
            return results
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -14,7 +14,7 @@ echo "Environment: $ENVIRONMENT"
 # smooth redeployments and container restarts while maintaining data integrity.
 echo "Running database migrations..."
-MIGRATION_OUTPUT=$(alembic upgrade head 2>&1)
+MIGRATION_OUTPUT=$(alembic upgrade head)
 MIGRATION_EXIT_CODE=$?
 if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
@ -42,5 +42,5 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
        gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
    fi
 else
-    gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app 
+    gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
 fi
--- a/notebooks/cognee_graphiti_demo.ipynb
+++ b/notebooks/cognee_graphiti_demo.ipynb
@ -1,225 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cognee Graphiti integration demo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "source": [
    "First we import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import asyncio\n",
    "\n",
    "import cognee\n",
    "from cognee.shared.logging_utils import get_logger, ERROR\n",
    "from cognee.modules.pipelines import Task, run_tasks\n",
    "from cognee.tasks.temporal_awareness import build_graph_with_temporal_awareness\n",
    "from cognee.infrastructure.databases.relational import (\n",
    "    create_db_and_tables as create_relational_db_and_tables,\n",
    ")\n",
    "from cognee.tasks.temporal_awareness.index_graphiti_objects import (\n",
    "    index_and_transform_graphiti_nodes_and_edges,\n",
    ")\n",
    "from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search\n",
    "from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever\n",
    "from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt\n",
    "from cognee.infrastructure.llm.get_llm_client import get_llm_client\n",
    "from cognee.modules.users.methods import get_default_user"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set environment variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:43:57.893763Z",
     "start_time": "2025-01-15T10:43:57.891332Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# We ignore warnigns for now\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "# API key for cognee\n",
    "if \"LLM_API_KEY\" not in os.environ:\n",
    "    os.environ[\"LLM_API_KEY\"] = \"\"\n",
    "\n",
    "# API key for graphiti\n",
    "if \"OPENAI_API_KEY\" not in os.environ:\n",
    "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "GRAPH_DATABASE_PROVIDER = \"neo4j\"\n",
    "GRAPH_DATABASE_USERNAME = \"neo4j\"\n",
    "GRAPH_DATABASE_PASSWORD = \"pleaseletmein\"\n",
    "GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
    "\n",
    "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = GRAPH_DATABASE_PROVIDER\n",
    "os.environ[\"GRAPH_DATABASE_USERNAME\"] = GRAPH_DATABASE_USERNAME\n",
    "os.environ[\"GRAPH_DATABASE_PASSWORD\"] = GRAPH_DATABASE_PASSWORD\n",
    "os.environ[\"GRAPH_DATABASE_URL\"] = GRAPH_DATABASE_URL\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Input texts with temporal information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:43:57.928664Z",
     "start_time": "2025-01-15T10:43:57.927105Z"
    }
   },
   "outputs": [],
   "source": [
    "text_list = [\n",
    "    \"Kamala Harris is the Attorney General of California. She was previously \"\n",
    "    \"the district attorney for San Francisco.\",\n",
    "    \"As AG, Harris was in office from January 3, 2011 – January 3, 2017\",\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Running graphiti + transforming its graph into cognee's core system (graph transformation + vector embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:44:25.008501Z",
     "start_time": "2025-01-15T10:43:57.932240Z"
    }
   },
   "outputs": [],
   "source": [
    "await cognee.prune.prune_data()\n",
    "await cognee.prune.prune_system(metadata=True)\n",
    "await create_relational_db_and_tables()\n",
    "\n",
    "# Initialize default user\n",
    "user = await get_default_user()\n",
    "\n",
    "for text in text_list:\n",
    "    await cognee.add(text)\n",
    "\n",
    "tasks = [\n",
    "    Task(build_graph_with_temporal_awareness, text_list=text_list),\n",
    "    ]\n",
    "\n",
    "pipeline = run_tasks(tasks, user=user)\n",
    "\n",
    "async for result in pipeline:\n",
    "    print(result)\n",
    "\n",
    "await index_and_transform_graphiti_nodes_and_edges()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Retrieving and generating answer from graphiti graph with cognee retriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-15T10:44:27.844438Z",
     "start_time": "2025-01-15T10:44:25.013325Z"
    }
   },
   "outputs": [],
   "source": [
    "# Step 1: Formulating the Query 🔍\n",
    "query = \"When was Kamala Harris in office?\"\n",
    "\n",
    "# Step 2: Searching for Relevant Triplets 📊\n",
    "triplets = await brute_force_triplet_search(\n",
    "    query=query,\n",
    "    top_k=3,\n",
    "    collections=[\"graphitinode_content\", \"graphitinode_name\", \"graphitinode_summary\"],\n",
    ")\n",
    "\n",
    "# Step 3: Preparing the Context for the LLM\n",
    "retriever = GraphCompletionRetriever()\n",
    "context = await retriever.resolve_edges_to_text(triplets)\n",
    "\n",
    "args = {\"question\": query, \"context\": context}\n",
    "\n",
    "# Step 4: Generating Prompts ✍️\n",
    "user_prompt = render_prompt(\"graph_context_for_question.txt\", args)\n",
    "system_prompt = read_query_prompt(\"answer_simple_question_restricted.txt\")\n",
    "\n",
    "# Step 5: Interacting with the LLM 🤖\n",
    "llm_client = get_llm_client()\n",
    "computed_answer = await llm_client.acreate_structured_output(\n",
    "    text_input=user_prompt,  # Input prompt for the user context\n",
    "    system_prompt=system_prompt,  # System-level instructions for the model\n",
    "    response_model=str,\n",
    ")\n",
    "\n",
    "# Step 6: Displaying the Computed Answer ✨\n",
    "print(f\"💡 Answer: {computed_answer}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/cognee_llama_index.ipynb
+++ b/notebooks/cognee_llama_index.ipynb
@ -1,239 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cognee GraphRAG with LlamaIndex Documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-core\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data\n",
    "\n",
    "We will use a sample news article dataset retrieved from Diffbot, which Tomaz has conveniently made available on GitHub for easy access.\n",
    "\n",
    "The dataset contains 2,500 samples; for ease of experimentation, we will use 5 of these samples, which include the `title` and `text` of news articles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from llama_index.core import Document\n",
    "\n",
    "news = pd.read_csv(\n",
    "    \"https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv\"\n",
    ")[:5]\n",
    "\n",
    "news.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare documents as required by LlamaIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "documents = [Document(text=f\"{row['title']}: {row['text']}\") for i, row in news.iterrows()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set environment variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# Setting environment variables\n",
    "if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
    "    os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
    "\n",
    "if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
    "    os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
    "\n",
    "if \"LLM_API_KEY\" not in os.environ:\n",
    "    os.environ[\"LLM_API_KEY\"] = \"\"\n",
    "\n",
    "# \"neo4j\" or \"networkx\"\n",
    "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
    "# Not needed if using networkx\n",
    "# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
    "# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
    "# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
    "\n",
    "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
    "os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
    "# Not needed if using \"lancedb\" or \"pgvector\"\n",
    "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
    "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
    "\n",
    "# Relational Database provider \"sqlite\" or \"postgres\"\n",
    "os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
    "\n",
    "# Database name\n",
    "os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
    "\n",
    "# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
    "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
    "# os.environ[\"DB_PORT\"]=\"5432\"\n",
    "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
    "# os.environ[\"DB_PASSWORD\"]=\"cognee\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run Cognee with LlamaIndex Documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Union, BinaryIO\n",
    "\n",
    "from cognee.infrastructure.databases.vector.pgvector import (\n",
    "    create_db_and_tables as create_pgvector_db_and_tables,\n",
    ")\n",
    "from cognee.infrastructure.databases.relational import (\n",
    "    create_db_and_tables as create_relational_db_and_tables,\n",
    ")\n",
    "from cognee.modules.users.models import User\n",
    "from cognee.modules.users.methods import get_default_user\n",
    "from cognee.tasks.ingestion.ingest_data import ingest_data\n",
    "import cognee\n",
    "\n",
    "# Create a clean slate for cognee -- reset data and system state\n",
    "await cognee.prune.prune_data()\n",
    "await cognee.prune.prune_system(metadata=True)\n",
    "\n",
    "\n",
    "# Add the LlamaIndex documents, and make it available for cognify\n",
    "async def add(\n",
    "    data: Union[BinaryIO, list[BinaryIO], str, list[str]],\n",
    "    dataset_name: str = \"main_dataset\",\n",
    "    user: User = None,\n",
    "):\n",
    "    await create_relational_db_and_tables()\n",
    "    await create_pgvector_db_and_tables()\n",
    "\n",
    "    if user is None:\n",
    "        user = await get_default_user()\n",
    "\n",
    "    await ingest_data(data, dataset_name, user)\n",
    "\n",
    "\n",
    "await add(documents)\n",
    "\n",
    "# Use LLMs and cognee to create knowledge graph\n",
    "await cognee.cognify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query Cognee for summaries related to data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cognee import SearchType\n",
    "\n",
    "# Query cognee for summaries\n",
    "search_results = await cognee.search(\n",
    "    query_type=SearchType.SUMMARIES, query_text=\"What are the main news discussed in the document?\"\n",
    ")\n",
    "# Display search results\n",
    "print(\"\\n Summary of main news discussed:\\n\")\n",
    "print(search_results[0][\"text\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Render Knowledge Graph generated from provided data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import graphistry\n",
    "\n",
    "from cognee.infrastructure.databases.graph import get_graph_engine\n",
    "from cognee.shared.utils import render_graph\n",
    "\n",
    "# Get graph\n",
    "graphistry.login(\n",
    "    username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n",
    ")\n",
    "graph_engine = await get_graph_engine()\n",
    "\n",
    "graph_url = await render_graph(graph_engine.graph)\n",
    "print(graph_url)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/graphrag_vs_rag.ipynb
+++ b/notebooks/graphrag_vs_rag.ipynb
--- a/notebooks/llama_index_cognee_integration.ipynb
+++ b/notebooks/llama_index_cognee_integration.ipynb
		`@ -0,0 +1,2 @@`
							`# ignore jupyter notebooks in the language bar on github`
							`notebooks/** linguist-vendored`