diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..8bdf6cf28
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# ignore jupyter notebooks in the language bar on github
+notebooks/** linguist-vendored
diff --git a/.github/workflows/notebooks_tests.yml b/.github/workflows/notebooks_tests.yml
index a6fbae294..f33f90b4a 100644
--- a/.github/workflows/notebooks_tests.yml
+++ b/.github/workflows/notebooks_tests.yml
@@ -11,30 +11,9 @@ jobs:
# notebook-location: notebooks/cognee_demo.ipynb
# secrets: inherit
- run-llama-index-integration:
- name: LlamaIndex Integration Notebook
- uses: ./.github/workflows/reusable_notebook.yml
- with:
- notebook-location: notebooks/llama_index_cognee_integration.ipynb
- secrets: inherit
-
- run-cognee-llama-index:
- name: Cognee LlamaIndex Notebook
- uses: ./.github/workflows/reusable_notebook.yml
- with:
- notebook-location: notebooks/cognee_llama_index.ipynb
- secrets: inherit
-
run-cognee-multimedia:
name: Cognee Multimedia Notebook
uses: ./.github/workflows/reusable_notebook.yml
with:
notebook-location: notebooks/cognee_multimedia_demo.ipynb
secrets: inherit
-
-# run-graphrag-vs-rag:
-# name: Graphrag vs Rag notebook
-# uses: ./.github/workflows/reusable_notebook.yml
-# with:
-# notebook-location: notebooks/graphrag_vs_rag.ipynb
-# secrets: inherit
diff --git a/README.md b/README.md
index cbbad2a86..eeee1145e 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,8 @@
Learn more
·
Join Discord
+ ·
+ Join r/AIMemory
@@ -46,12 +48,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
-
-
-
## Features
- Interconnect and retrieve your past conversations, documents, images and audio transcriptions
@@ -61,7 +60,7 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
## Get Started
-Get started quickly with a Google Colab notebook or starter repo
+Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo
## Contributing
@@ -141,7 +140,15 @@ Example output:
```
-### cognee UI
+## Our paper is out! Read here
+
+
+

+
+
+
+
+## Cognee UI
You can also cognify your files and query using cognee UI.
diff --git a/alembic/versions/482cd6517ce4_add_default_user.py b/alembic/versions/482cd6517ce4_add_default_user.py
index 92429e1e4..d85f0f146 100644
--- a/alembic/versions/482cd6517ce4_add_default_user.py
+++ b/alembic/versions/482cd6517ce4_add_default_user.py
@@ -12,6 +12,8 @@ from sqlalchemy.util import await_only
from cognee.modules.users.methods import create_default_user, delete_user
+from fastapi_users.exceptions import UserAlreadyExists
+
# revision identifiers, used by Alembic.
revision: str = "482cd6517ce4"
@@ -21,7 +23,10 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"
def upgrade() -> None:
- await_only(create_default_user())
+ try:
+ await_only(create_default_user())
+ except UserAlreadyExists:
+ pass # It's fine if the default user already exists
def downgrade() -> None:
diff --git a/assets/cognee-paper.png b/assets/cognee-paper.png
new file mode 100644
index 000000000..df7113b0e
Binary files /dev/null and b/assets/cognee-paper.png differ
diff --git a/cognee-frontend/src/modules/ingestion/DatasetsView/DatasetsView.tsx b/cognee-frontend/src/modules/ingestion/DatasetsView/DatasetsView.tsx
index 13965230b..68a0d606c 100644
--- a/cognee-frontend/src/modules/ingestion/DatasetsView/DatasetsView.tsx
+++ b/cognee-frontend/src/modules/ingestion/DatasetsView/DatasetsView.tsx
@@ -53,7 +53,7 @@ export default function DatasetsView({
setExplorationDataset(dataset);
showExplorationWindow();
}
-
+
return (
<>
@@ -95,10 +95,10 @@ export default function DatasetsView({
))}
-
+
{dataset?.name}
-
+
>
diff --git a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
index 664a46fad..bea5e1277 100644
--- a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
+++ b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
@@ -3,7 +3,7 @@
import { v4 } from 'uuid';
import classNames from 'classnames';
import { useCallback, useEffect, useState } from 'react';
-import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean } from 'ohmy-ui';
+import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean, Input } from 'ohmy-ui';
import { fetch } from '@/utils';
import styles from './SearchView.module.css';
import getHistory from '@/modules/chat/getHistory';
@@ -33,8 +33,15 @@ export default function SearchView() {
}, {
value: 'RAG_COMPLETION',
label: 'Completion using RAG',
+ }, {
+ value: 'GRAPH_COMPLETION_COT',
+ label: 'Cognee\'s Chain of Thought search',
+ }, {
+ value: 'GRAPH_COMPLETION_CONTEXT_EXTENSION',
+ label: 'Cognee\'s Multi-Hop search',
}];
const [searchType, setSearchType] = useState(searchOptions[0]);
+ const [rangeValue, setRangeValue] = useState(10);
const scrollToBottom = useCallback(() => {
setTimeout(() => {
@@ -90,6 +97,7 @@ export default function SearchView() {
body: JSON.stringify({
query: inputValue.trim(),
searchType: searchTypeValue,
+ topK: rangeValue,
}),
})
.then((response) => response.json())
@@ -108,7 +116,7 @@ export default function SearchView() {
.catch(() => {
setInputValue(inputValue);
});
- }, [inputValue, scrollToBottom, searchType.value]);
+ }, [inputValue, rangeValue, scrollToBottom, searchType.value]);
const {
value: isInputExpanded,
@@ -122,6 +130,10 @@ export default function SearchView() {
}
};
+ const handleRangeValueChange = (event: React.ChangeEvent) => {
+ setRangeValue(parseInt(event.target.value));
+ };
+
return (
@@ -146,9 +158,15 @@ export default function SearchView() {
diff --git a/cognee-frontend/tsconfig.json b/cognee-frontend/tsconfig.json
index 7b2858930..f48e7ee6f 100644
--- a/cognee-frontend/tsconfig.json
+++ b/cognee-frontend/tsconfig.json
@@ -1,6 +1,10 @@
{
"compilerOptions": {
- "lib": ["dom", "dom.iterable", "esnext"],
+ "lib": [
+ "dom",
+ "dom.iterable",
+ "esnext"
+ ],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
@@ -18,9 +22,19 @@
}
],
"paths": {
- "@/*": ["./src/*"]
- }
+ "@/*": [
+ "./src/*"
+ ]
+ },
+ "target": "ES2017"
},
- "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
- "exclude": ["node_modules"]
+ "include": [
+ "next-env.d.ts",
+ "**/*.ts",
+ "**/*.tsx",
+ ".next/types/**/*.ts"
+ ],
+ "exclude": [
+ "node_modules"
+ ]
}
diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py
index 22947866d..f4dc1989f 100644
--- a/cognee/api/v1/search/routers/get_search_router.py
+++ b/cognee/api/v1/search/routers/get_search_router.py
@@ -1,5 +1,5 @@
from uuid import UUID
-from typing import Optional, Union
+from typing import Optional
from datetime import datetime
from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse
@@ -17,6 +17,7 @@ class SearchPayloadDTO(InDTO):
datasets: Optional[list[str]] = None
dataset_ids: Optional[list[UUID]] = None
query: str
+ top_k: Optional[int] = 10
def get_search_router() -> APIRouter:
@@ -49,6 +50,7 @@ def get_search_router() -> APIRouter:
user=user,
datasets=payload.datasets,
dataset_ids=payload.dataset_ids,
+ top_k=payload.top_k,
)
return results
diff --git a/entrypoint.sh b/entrypoint.sh
index 9cd81939c..31a2b328d 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -14,7 +14,7 @@ echo "Environment: $ENVIRONMENT"
# smooth redeployments and container restarts while maintaining data integrity.
echo "Running database migrations..."
-MIGRATION_OUTPUT=$(alembic upgrade head 2>&1)
+MIGRATION_OUTPUT=$(alembic upgrade head)
MIGRATION_EXIT_CODE=$?
if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
@@ -42,5 +42,5 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
fi
else
- gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
+ gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
fi
diff --git a/notebooks/cognee_graphiti_demo.ipynb b/notebooks/cognee_graphiti_demo.ipynb
deleted file mode 100644
index 79123a483..000000000
--- a/notebooks/cognee_graphiti_demo.ipynb
+++ /dev/null
@@ -1,225 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Cognee Graphiti integration demo"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "vscode": {
- "languageId": "plaintext"
- }
- },
- "source": [
- "First we import the necessary libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import asyncio\n",
- "\n",
- "import cognee\n",
- "from cognee.shared.logging_utils import get_logger, ERROR\n",
- "from cognee.modules.pipelines import Task, run_tasks\n",
- "from cognee.tasks.temporal_awareness import build_graph_with_temporal_awareness\n",
- "from cognee.infrastructure.databases.relational import (\n",
- " create_db_and_tables as create_relational_db_and_tables,\n",
- ")\n",
- "from cognee.tasks.temporal_awareness.index_graphiti_objects import (\n",
- " index_and_transform_graphiti_nodes_and_edges,\n",
- ")\n",
- "from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search\n",
- "from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever\n",
- "from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt\n",
- "from cognee.infrastructure.llm.get_llm_client import get_llm_client\n",
- "from cognee.modules.users.methods import get_default_user"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Set environment variables"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-01-15T10:43:57.893763Z",
- "start_time": "2025-01-15T10:43:57.891332Z"
- }
- },
- "outputs": [],
- "source": [
- "import os\n",
- "\n",
- "# We ignore warnigns for now\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "# API key for cognee\n",
- "if \"LLM_API_KEY\" not in os.environ:\n",
- " os.environ[\"LLM_API_KEY\"] = \"\"\n",
- "\n",
- "# API key for graphiti\n",
- "if \"OPENAI_API_KEY\" not in os.environ:\n",
- " os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
- "\n",
- "GRAPH_DATABASE_PROVIDER = \"neo4j\"\n",
- "GRAPH_DATABASE_USERNAME = \"neo4j\"\n",
- "GRAPH_DATABASE_PASSWORD = \"pleaseletmein\"\n",
- "GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
- "\n",
- "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = GRAPH_DATABASE_PROVIDER\n",
- "os.environ[\"GRAPH_DATABASE_USERNAME\"] = GRAPH_DATABASE_USERNAME\n",
- "os.environ[\"GRAPH_DATABASE_PASSWORD\"] = GRAPH_DATABASE_PASSWORD\n",
- "os.environ[\"GRAPH_DATABASE_URL\"] = GRAPH_DATABASE_URL\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Input texts with temporal information"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-01-15T10:43:57.928664Z",
- "start_time": "2025-01-15T10:43:57.927105Z"
- }
- },
- "outputs": [],
- "source": [
- "text_list = [\n",
- " \"Kamala Harris is the Attorney General of California. She was previously \"\n",
- " \"the district attorney for San Francisco.\",\n",
- " \"As AG, Harris was in office from January 3, 2011 – January 3, 2017\",\n",
- "]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Running graphiti + transforming its graph into cognee's core system (graph transformation + vector embeddings)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-01-15T10:44:25.008501Z",
- "start_time": "2025-01-15T10:43:57.932240Z"
- }
- },
- "outputs": [],
- "source": [
- "await cognee.prune.prune_data()\n",
- "await cognee.prune.prune_system(metadata=True)\n",
- "await create_relational_db_and_tables()\n",
- "\n",
- "# Initialize default user\n",
- "user = await get_default_user()\n",
- "\n",
- "for text in text_list:\n",
- " await cognee.add(text)\n",
- "\n",
- "tasks = [\n",
- " Task(build_graph_with_temporal_awareness, text_list=text_list),\n",
- " ]\n",
- "\n",
- "pipeline = run_tasks(tasks, user=user)\n",
- "\n",
- "async for result in pipeline:\n",
- " print(result)\n",
- "\n",
- "await index_and_transform_graphiti_nodes_and_edges()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Retrieving and generating answer from graphiti graph with cognee retriever"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-01-15T10:44:27.844438Z",
- "start_time": "2025-01-15T10:44:25.013325Z"
- }
- },
- "outputs": [],
- "source": [
- "# Step 1: Formulating the Query 🔍\n",
- "query = \"When was Kamala Harris in office?\"\n",
- "\n",
- "# Step 2: Searching for Relevant Triplets 📊\n",
- "triplets = await brute_force_triplet_search(\n",
- " query=query,\n",
- " top_k=3,\n",
- " collections=[\"graphitinode_content\", \"graphitinode_name\", \"graphitinode_summary\"],\n",
- ")\n",
- "\n",
- "# Step 3: Preparing the Context for the LLM\n",
- "retriever = GraphCompletionRetriever()\n",
- "context = await retriever.resolve_edges_to_text(triplets)\n",
- "\n",
- "args = {\"question\": query, \"context\": context}\n",
- "\n",
- "# Step 4: Generating Prompts ✍️\n",
- "user_prompt = render_prompt(\"graph_context_for_question.txt\", args)\n",
- "system_prompt = read_query_prompt(\"answer_simple_question_restricted.txt\")\n",
- "\n",
- "# Step 5: Interacting with the LLM 🤖\n",
- "llm_client = get_llm_client()\n",
- "computed_answer = await llm_client.acreate_structured_output(\n",
- " text_input=user_prompt, # Input prompt for the user context\n",
- " system_prompt=system_prompt, # System-level instructions for the model\n",
- " response_model=str,\n",
- ")\n",
- "\n",
- "# Step 6: Displaying the Computed Answer ✨\n",
- "print(f\"💡 Answer: {computed_answer}\")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/cognee_llama_index.ipynb b/notebooks/cognee_llama_index.ipynb
deleted file mode 100644
index 65f1b2ef0..000000000
--- a/notebooks/cognee_llama_index.ipynb
+++ /dev/null
@@ -1,239 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Cognee GraphRAG with LlamaIndex Documents"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%pip install llama-index-core\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Load Data\n",
- "\n",
- "We will use a sample news article dataset retrieved from Diffbot, which Tomaz has conveniently made available on GitHub for easy access.\n",
- "\n",
- "The dataset contains 2,500 samples; for ease of experimentation, we will use 5 of these samples, which include the `title` and `text` of news articles."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from llama_index.core import Document\n",
- "\n",
- "news = pd.read_csv(\n",
- " \"https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv\"\n",
- ")[:5]\n",
- "\n",
- "news.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Prepare documents as required by LlamaIndex"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "documents = [Document(text=f\"{row['title']}: {row['text']}\") for i, row in news.iterrows()]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Set environment variables"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "\n",
- "# Setting environment variables\n",
- "if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
- " os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
- "\n",
- "if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
- " os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
- "\n",
- "if \"LLM_API_KEY\" not in os.environ:\n",
- " os.environ[\"LLM_API_KEY\"] = \"\"\n",
- "\n",
- "# \"neo4j\" or \"networkx\"\n",
- "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
- "# Not needed if using networkx\n",
- "# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
- "# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
- "# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
- "\n",
- "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
- "os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
- "# Not needed if using \"lancedb\" or \"pgvector\"\n",
- "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
- "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
- "\n",
- "# Relational Database provider \"sqlite\" or \"postgres\"\n",
- "os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
- "\n",
- "# Database name\n",
- "os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
- "\n",
- "# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
- "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
- "# os.environ[\"DB_PORT\"]=\"5432\"\n",
- "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
- "# os.environ[\"DB_PASSWORD\"]=\"cognee\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Run Cognee with LlamaIndex Documents"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from typing import Union, BinaryIO\n",
- "\n",
- "from cognee.infrastructure.databases.vector.pgvector import (\n",
- " create_db_and_tables as create_pgvector_db_and_tables,\n",
- ")\n",
- "from cognee.infrastructure.databases.relational import (\n",
- " create_db_and_tables as create_relational_db_and_tables,\n",
- ")\n",
- "from cognee.modules.users.models import User\n",
- "from cognee.modules.users.methods import get_default_user\n",
- "from cognee.tasks.ingestion.ingest_data import ingest_data\n",
- "import cognee\n",
- "\n",
- "# Create a clean slate for cognee -- reset data and system state\n",
- "await cognee.prune.prune_data()\n",
- "await cognee.prune.prune_system(metadata=True)\n",
- "\n",
- "\n",
- "# Add the LlamaIndex documents, and make it available for cognify\n",
- "async def add(\n",
- " data: Union[BinaryIO, list[BinaryIO], str, list[str]],\n",
- " dataset_name: str = \"main_dataset\",\n",
- " user: User = None,\n",
- "):\n",
- " await create_relational_db_and_tables()\n",
- " await create_pgvector_db_and_tables()\n",
- "\n",
- " if user is None:\n",
- " user = await get_default_user()\n",
- "\n",
- " await ingest_data(data, dataset_name, user)\n",
- "\n",
- "\n",
- "await add(documents)\n",
- "\n",
- "# Use LLMs and cognee to create knowledge graph\n",
- "await cognee.cognify()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Query Cognee for summaries related to data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from cognee import SearchType\n",
- "\n",
- "# Query cognee for summaries\n",
- "search_results = await cognee.search(\n",
- " query_type=SearchType.SUMMARIES, query_text=\"What are the main news discussed in the document?\"\n",
- ")\n",
- "# Display search results\n",
- "print(\"\\n Summary of main news discussed:\\n\")\n",
- "print(search_results[0][\"text\"])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Render Knowledge Graph generated from provided data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import graphistry\n",
- "\n",
- "from cognee.infrastructure.databases.graph import get_graph_engine\n",
- "from cognee.shared.utils import render_graph\n",
- "\n",
- "# Get graph\n",
- "graphistry.login(\n",
- " username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n",
- ")\n",
- "graph_engine = await get_graph_engine()\n",
- "\n",
- "graph_url = await render_graph(graph_engine.graph)\n",
- "print(graph_url)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/graphrag_vs_rag.ipynb b/notebooks/graphrag_vs_rag.ipynb
deleted file mode 100644
index 035015264..000000000
--- a/notebooks/graphrag_vs_rag.ipynb
+++ /dev/null
@@ -1,253 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Cognee GraphRAG\n",
- "\n",
- "Connecting external knowledge to the LLM efficiently and retrieving it is a key challenge faced by developers. For developers and data scientists, integrating structured and unstructured data into AI workflows often involves multiple tools, complex pipelines, and time-consuming processes.\n",
- "\n",
- "Enter **cognee,** a powerful framework for knowledge and memory management. Cognee streamlines the path from raw data to actionable insights.\n",
- "\n",
- "In this notebook, we’ll explore a demo that leverages cognee and creates a knowledge graph from a document, process it into a meaningful structure, and extract useful insights. By the end, you’ll see how cognee can give you new insights into your data by connecting various data sources in one big semantic layer you can analyze.\n",
- "\n",
- "## RAG: Retrieval Augmented Generation - Recap\n",
- "\n",
- "RAG enhances LLMs by integrating external knowledge sources during inference. It does so by turning the data into a vector representation and storing it in a vector store.\n",
- "\n",
- "### Key Benefits of RAG:\n",
- "\n",
- "1. Connecting domain specific data to LLMs\n",
- "2. Cost savings\n",
- "3. Higher accuracy than base LLM\n",
- "\n",
- "However, building a RAG system presents challenges: handling diverse data formats, data updates, creating a robust metadata layer, and mediocre accuracy\n",
- "\n",
- "## Introducing cognee\n",
- "\n",
- "cognee simplifies knowledge and memory management for LLMs\n",
- "\n",
- "cognee is inspired by human mind and higher cognitive functions. It mimics ways we construct our mental map of the world and build a semantic understanding of various objects, terms and issues in our everyday lives.\n",
- "\n",
- "cognee brings this approach to code by allowing developers to create semantic layers that would allow users to store their ontologies which are **a formalised depiction of knowledge** in graphs.\n",
- "\n",
- "This lets you use the knowledge you have about a system connect it to LLMs in a modular way, with the best data engineering practices, wide choice of vector and graph stores and various LLMs you can use.\n",
- "\n",
- "Together, they:\n",
- "\n",
- "- Turn unstructured and semi-structured data into a graph/vector representation.\n",
- "- Enable ontology generation for particular domains, making unique graphs for every vertical\n",
- "- Provide a deterministic layer for LLM outputs, ensuring consistency and reliability.\n",
- "\n",
- "## Step-by-Step Demo: Building a RAG System with Cognee\n",
- "\n",
- "### 1. Setting Up the Environment\n",
- "\n",
- "Start by importing the required libraries and defining the environment:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "!pip install cognee==0.1.39"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import cognee\n",
- "\n",
- "await cognee.prune.prune_data()\n",
- "await cognee.prune.prune_system(metadata=True)\n",
- "\n",
- "if \"OPENAI_API_KEY\" not in os.environ:\n",
- " os.environ[\"OPENAI_API_KEY\"] = \"\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Ensure you’ve set up your API keys and installed necessary dependencies.\n",
- "\n",
- "### 2. Preparing the Dataset\n",
- "\n",
- "We’ll use a brief profile of an individual as our sample dataset:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "documents = [\"Jessica Miller, Experienced Sales Manager with a strong track record in building high-performing teams.\",\n",
- " \"David Thompson, Creative Graphic Designer with over 8 years of experience in visual design and branding.\"\n",
- " ]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 3. Adding Data to Cognee\n",
- "\n",
- "Load the dataset into the cognee framework:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "await cognee.add(documents)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This step prepares the data for graph-based processing.\n",
- "\n",
- "### 5. Processing Data into a Knowledge Graph\n",
- "\n",
- "Transform the data into a structured knowledge graph:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "await cognee.cognify()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The graph now contains nodes and relationships derived from the dataset, creating a powerful structure for exploration.\n",
- "\n",
- "### 6. Performing Searches\n",
- "\n",
- "### Answer prompt based on knowledge graph approach:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from cognee.api.v1.search import SearchType\n",
- "search_results = await cognee.search(query_type=SearchType.GRAPH_COMPLETION, query_text=\"Tell me who are the people mentioned?\")\n",
- "\n",
- "print(\"\\n\\nAnswer based on knowledge graph:\\n\")\n",
- "for result in search_results:\n",
- " print(f\"{result}\\n\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Answer prompt based on RAG approach:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "search_results = await cognee.search(query_type=SearchType.RAG_COMPLETION, query_text=\"Tell me who are the people mentioned?\")\n",
- "\n",
- "print(\"\\n\\nAnswer based on RAG:\\n\")\n",
- "for result in search_results:\n",
- " print(f\"{result}\\n\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In conclusion, the results demonstrate a significant advantage of the knowledge graph-based approach (Graphrag) over the RAG approach. Graphrag successfully identified all the mentioned individuals across multiple documents, showcasing its ability to aggregate and infer information from a global context. In contrast, the RAG approach was limited to identifying individuals within a single document due to its chunking-based processing constraints. This highlights Graphrag's superior capability in comprehensively resolving queries that span across a broader corpus of interconnected data."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 7. Finding Related Nodes\n",
- "\n",
- "Explore relationships in the knowledge graph:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "related_nodes = await cognee.search(query_type=SearchType.INSIGHTS, query_text=\"person\")\n",
- "\n",
- "print(\"\\n\\nRelated nodes are:\\n\")\n",
- "for node in related_nodes:\n",
- " print(f\"{node}\\n\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Why Choose Cognee?\n",
- "\n",
- "### 1. Agentic Framework and Memory tied together\n",
- "\n",
- "Your agents can now get long-term, short-term memory and memory specific to their domains\n",
- "\n",
- "### 2. Enhanced Querying and Insights\n",
- "\n",
- "Your memory can now automatically optimize itself and allow to respond to questions better\n",
- "\n",
- "### 3. Simplified Deployment\n",
- "\n",
- "You can use the standard tools out of the box and get things done without much effort\n",
- "\n",
- "## Visualizing the Knowledge Graph\n",
- "\n",
- "Imagine a graph structure where each node represents a document or entity, and edges indicate relationships.\n",
- "\n",
- "Here’s the visualized knowledge graph from the simple example above:\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "## Conclusion\n",
- "\n",
- "Try running it yourself\n",
- "\n",
- "[join the cognee community](https://discord.gg/tV7pr5XSj7)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/llama_index_cognee_integration.ipynb b/notebooks/llama_index_cognee_integration.ipynb
deleted file mode 100644
index be6c4ea13..000000000
--- a/notebooks/llama_index_cognee_integration.ipynb
+++ /dev/null
@@ -1,288 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "[](https://colab.research.google.com/drive/1EpokQ8Y_5jIJ7HdixZms81Oqgh2sp7-E?usp=sharing)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## LlamaIndex Cognee GraphRAG Integration\n",
- "\n",
- "Connecting external knowledge to the LLM efficiently and retrieving it is a key challenge faced by developers. For developers and data scientists, integrating structured and unstructured data into AI workflows often involves multiple tools, complex pipelines, and time-consuming processes.\n",
- "\n",
- "Enter **cognee,** a powerful framework for knowledge and memory management, and LlamaIndex, a versatile data integration library. Together, they enable us to transform retrieval-augmented generation (RAG) pipelines, into GraphRAG pipelines, streamlining the path from raw data to actionable insights.\n",
- "\n",
- "In this post, we’ll explore a demo that leverages cognee and LlamaIndex to create a knowledge graph from a LlamaIndex document, process it into a meaningful structure, and extract useful insights. By the end, you’ll see how these tools can give you new insights into your data by connecting various data sources in one big semantic layer you can analyze.\n",
- "\n",
- "## RAG - Recap\n",
- "\n",
- "RAG enhances LLMs by integrating external knowledge sources during inference. It does so by turning the data into a vector representation and storing it in a vector store.\n",
- "\n",
- "### Key Benefits of RAG:\n",
- "\n",
- "1. Connecting domain specific data to LLMs\n",
- "2. Cost savings\n",
- "3. Higher accuracy than base LLM\n",
- "\n",
- "However, building a RAG system presents challenges: handling diverse data formats, data updates, creating a robust metadata layer, and mediocre accuracy\n",
- "\n",
- "## Introducing cognee and LlamaIndex more\n",
- "\n",
- "cognee simplifies knowledge and memory management for LLMs, while LlamaIndex facilitates connecting LLMs to structured data sources and enabling agentic use-cases\n",
- "\n",
- "cognee is inspired by human mind and higer cognitive functions. It mimics ways we construct our mental map of the world and build a semantic understanding of various objects, terms and issues in our everyday lives.\n",
- "\n",
- "cognee brings this approach to code by allowing developers to create semantic layers that would allow users to store their ontologies which are **a formalised depiction of knowledge** in graphs.\n",
- "\n",
- "This lets you use the knowledge you have about a system connect it to LLMs in a modular way, with best data engineering practices, wide choice of vector and graph stores and various LLMs you can use.\n",
- "\n",
- "Together, they:\n",
- "\n",
- "- Turn unstructured and semi-structured data into a graph/vector representation.\n",
- "- Enable ontology generation for particular domains, making unique graphs for every vertical\n",
- "- Provide a deterministic layer for LLM outputs, ensuring consistency and reliability.\n",
- "\n",
- "## Step-by-Step Demo: Building a RAG System with Cognee and LlamaIndex\n",
- "\n",
- "### 1. Setting Up the Environment\n",
- "\n",
- "Start by importing the required libraries and defining the environment:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "!pip install llama-index-graph-rag-cognee==0.1.3"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import asyncio\n",
- "from llama_index.core import Document\n",
- "from llama_index.graph_rag.cognee import CogneeGraphRAG\n",
- "\n",
- "if \"OPENAI_API_KEY\" not in os.environ:\n",
- " os.environ[\"OPENAI_API_KEY\"] = \"\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Ensure you’ve set up your API keys and installed necessary dependencies.\n",
- "\n",
- "### 2. Preparing the Dataset\n",
- "\n",
- "We’ll use a brief profile of an individual as our sample dataset:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "documents = [\n",
- " Document(\n",
- " text=\"Jessica Miller, Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams.\"\n",
- " ),\n",
- " Document(\n",
- " text=\"David Thompson, Creative Graphic Designer with over 8 years of experience in visual design and branding.\"\n",
- " ),\n",
- "]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 3. Initializing CogneeGraphRAG\n",
- "\n",
- "Instantiate the Cognee framework with configurations for LLM, graph, and database providers:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "cogneeRAG = CogneeGraphRAG(\n",
- " llm_api_key=os.environ[\"OPENAI_API_KEY\"],\n",
- " llm_provider=\"openai\",\n",
- " llm_model=\"gpt-4o-mini\",\n",
- " graph_db_provider=\"networkx\",\n",
- " vector_db_provider=\"lancedb\",\n",
- " relational_db_provider=\"sqlite\",\n",
- " relational_db_name=\"cognee_db\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 4. Adding Data to Cognee\n",
- "\n",
- "Load the dataset into the cognee framework:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "await cogneeRAG.add(documents, \"test\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This step prepares the data for graph-based processing.\n",
- "\n",
- "### 5. Processing Data into a Knowledge Graph\n",
- "\n",
- "Transform the data into a structured knowledge graph:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "await cogneeRAG.process_data(\"test\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The graph now contains nodes and relationships derived from the dataset, creating a powerful structure for exploration.\n",
- "\n",
- "### 6. Performing Searches\n",
- "\n",
- "### Answer prompt based on knowledge graph approach:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "search_results = await cogneeRAG.search(\"Tell me who are the people mentioned?\")\n",
- "\n",
- "print(\"\\n\\nAnswer based on knowledge graph:\\n\")\n",
- "for result in search_results:\n",
- " print(f\"{result}\\n\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Answer prompt based on RAG approach:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "search_results = await cogneeRAG.rag_search(\"Tell me who are the people mentioned?\")\n",
- "\n",
- "print(\"\\n\\nAnswer based on RAG:\\n\")\n",
- "for result in search_results:\n",
- " print(f\"{result}\\n\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In conclusion, the results demonstrate a significant advantage of the knowledge graph-based approach (Graphrag) over the RAG approach. Graphrag successfully identified all the mentioned individuals across multiple documents, showcasing its ability to aggregate and infer information from a global context. In contrast, the RAG approach was limited to identifying individuals within a single document due to its chunking-based processing constraints. This highlights Graphrag's superior capability in comprehensively resolving queries that span across a broader corpus of interconnected data."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 7. Finding Related Nodes\n",
- "\n",
- "Explore relationships in the knowledge graph:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "related_nodes = await cogneeRAG.get_related_nodes(\"person\")\n",
- "\n",
- "print(\"\\n\\nRelated nodes are:\\n\")\n",
- "for node in related_nodes:\n",
- " print(f\"{node}\\n\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Why Choose Cognee and LlamaIndex?\n",
- "\n",
- "### 1. Agentic Framework and Memory tied together\n",
- "\n",
- "Your agents can now get long-term, short-term memory and memory specific to their domains\n",
- "\n",
- "### 2. Enhanced Querying and Insights\n",
- "\n",
- "Your memory can now automatically optimize itself and allow to respond to questions better\n",
- "\n",
- "### 3. Simplified Deployment\n",
- "\n",
- "You can use the standard tools out of the box and get things done without much effort\n",
- "\n",
- "## Visualizing the Knowledge Graph\n",
- "\n",
- "Imagine a graph structure where each node represents a document or entity, and edges indicate relationships.\n",
- "\n",
- "Here’s the visualized knowledge graph from the simple example above:\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "## Conclusion\n",
- "\n",
- "Try running it yourself\n",
- "\n",
- "[join the cognee community](https://discord.gg/tV7pr5XSj7)"
- ]
- }
- ],
- "metadata": {
- "language_info": {
- "name": "python",
- "version": "3.12.9"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}