Merge dev with main (#921)

<!-- .github/pull_request_template.md -->

## Description
Merge changes on main to dev

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-06-07 16:48:47 +02:00 committed by GitHub
parent 1ed6cfd918
commit 84c7aeb1a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 69 additions and 1047 deletions

2
.gitattributes vendored Normal file
View file

@ -0,0 +1,2 @@
# ignore jupyter notebooks in the language bar on github
notebooks/** linguist-vendored

View file

@ -11,30 +11,9 @@ jobs:
# notebook-location: notebooks/cognee_demo.ipynb # notebook-location: notebooks/cognee_demo.ipynb
# secrets: inherit # secrets: inherit
run-llama-index-integration:
name: LlamaIndex Integration Notebook
uses: ./.github/workflows/reusable_notebook.yml
with:
notebook-location: notebooks/llama_index_cognee_integration.ipynb
secrets: inherit
run-cognee-llama-index:
name: Cognee LlamaIndex Notebook
uses: ./.github/workflows/reusable_notebook.yml
with:
notebook-location: notebooks/cognee_llama_index.ipynb
secrets: inherit
run-cognee-multimedia: run-cognee-multimedia:
name: Cognee Multimedia Notebook name: Cognee Multimedia Notebook
uses: ./.github/workflows/reusable_notebook.yml uses: ./.github/workflows/reusable_notebook.yml
with: with:
notebook-location: notebooks/cognee_multimedia_demo.ipynb notebook-location: notebooks/cognee_multimedia_demo.ipynb
secrets: inherit secrets: inherit
# run-graphrag-vs-rag:
# name: Graphrag vs Rag notebook
# uses: ./.github/workflows/reusable_notebook.yml
# with:
# notebook-location: notebooks/graphrag_vs_rag.ipynb
# secrets: inherit

View file

@ -13,6 +13,8 @@
<a href="https://cognee.ai">Learn more</a> <a href="https://cognee.ai">Learn more</a>
· ·
<a href="https://discord.gg/NQPKmU5CCg">Join Discord</a> <a href="https://discord.gg/NQPKmU5CCg">Join Discord</a>
·
<a href="https://www.reddit.com/r/AIMemory/">Join r/AIMemory</a>
</p> </p>
@ -46,12 +48,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
<div style="text-align: center"> <div style="text-align: center">
<img src="https://raw.githubusercontent.com/topoteretes/cognee/refs/heads/main/assets/cognee_benefits.png" alt="Why cognee?" width="50%" /> <img src="https://raw.githubusercontent.com/topoteretes/cognee/refs/heads/main/assets/cognee_benefits.png" alt="Why cognee?" width="50%" />
</div> </div>
</div> </div>
## Features ## Features
- Interconnect and retrieve your past conversations, documents, images and audio transcriptions - Interconnect and retrieve your past conversations, documents, images and audio transcriptions
@ -61,7 +60,7 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
## Get Started ## Get Started
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> or <a href="https://github.com/topoteretes/cognee-starter">starter repo</a> Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee-starter">starter repo</a>
## Contributing ## Contributing
@ -141,7 +140,15 @@ Example output:
``` ```
### cognee UI ## Our paper is out! <a href="https://arxiv.org/abs/2505.24478" target="_blank" rel="noopener noreferrer">Read here</a>
<div style="text-align: center">
<img src="assets/cognee-paper.png" alt="cognee paper" width="100%" />
</div>
</div>
## Cognee UI
You can also cognify your files and query using cognee UI. You can also cognify your files and query using cognee UI.

View file

@ -12,6 +12,8 @@ from sqlalchemy.util import await_only
from cognee.modules.users.methods import create_default_user, delete_user from cognee.modules.users.methods import create_default_user, delete_user
from fastapi_users.exceptions import UserAlreadyExists
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision: str = "482cd6517ce4" revision: str = "482cd6517ce4"
@ -21,7 +23,10 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"
def upgrade() -> None: def upgrade() -> None:
await_only(create_default_user()) try:
await_only(create_default_user())
except UserAlreadyExists:
pass # It's fine if the default user already exists
def downgrade() -> None: def downgrade() -> None:

BIN
assets/cognee-paper.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View file

@ -53,7 +53,7 @@ export default function DatasetsView({
setExplorationDataset(dataset); setExplorationDataset(dataset);
showExplorationWindow(); showExplorationWindow();
} }
return ( return (
<> <>
<Stack orientation="vertical" gap="4"> <Stack orientation="vertical" gap="4">
@ -95,10 +95,10 @@ export default function DatasetsView({
</DatasetItem> </DatasetItem>
))} ))}
</Stack> </Stack>
<Modal onClose={hideExplorationWindow} isOpen={isExplorationWindowShown} className={styles.explorerModal}> <Modal closeOnBackdropClick={false} onClose={hideExplorationWindow} isOpen={isExplorationWindowShown} className={styles.explorerModal}>
<Spacer horizontal="2" vertical="3" wrap> <Spacer horizontal="2" vertical="3" wrap>
<Text>{dataset?.name}</Text> <Text>{dataset?.name}</Text>
</Spacer> </Spacer>
<Explorer dataset={dataset!} /> <Explorer dataset={dataset!} />
</Modal> </Modal>
</> </>

View file

@ -3,7 +3,7 @@
import { v4 } from 'uuid'; import { v4 } from 'uuid';
import classNames from 'classnames'; import classNames from 'classnames';
import { useCallback, useEffect, useState } from 'react'; import { useCallback, useEffect, useState } from 'react';
import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean } from 'ohmy-ui'; import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean, Input } from 'ohmy-ui';
import { fetch } from '@/utils'; import { fetch } from '@/utils';
import styles from './SearchView.module.css'; import styles from './SearchView.module.css';
import getHistory from '@/modules/chat/getHistory'; import getHistory from '@/modules/chat/getHistory';
@ -33,8 +33,15 @@ export default function SearchView() {
}, { }, {
value: 'RAG_COMPLETION', value: 'RAG_COMPLETION',
label: 'Completion using RAG', label: 'Completion using RAG',
}, {
value: 'GRAPH_COMPLETION_COT',
label: 'Cognee\'s Chain of Thought search',
}, {
value: 'GRAPH_COMPLETION_CONTEXT_EXTENSION',
label: 'Cognee\'s Multi-Hop search',
}]; }];
const [searchType, setSearchType] = useState(searchOptions[0]); const [searchType, setSearchType] = useState(searchOptions[0]);
const [rangeValue, setRangeValue] = useState(10);
const scrollToBottom = useCallback(() => { const scrollToBottom = useCallback(() => {
setTimeout(() => { setTimeout(() => {
@ -90,6 +97,7 @@ export default function SearchView() {
body: JSON.stringify({ body: JSON.stringify({
query: inputValue.trim(), query: inputValue.trim(),
searchType: searchTypeValue, searchType: searchTypeValue,
topK: rangeValue,
}), }),
}) })
.then((response) => response.json()) .then((response) => response.json())
@ -108,7 +116,7 @@ export default function SearchView() {
.catch(() => { .catch(() => {
setInputValue(inputValue); setInputValue(inputValue);
}); });
}, [inputValue, scrollToBottom, searchType.value]); }, [inputValue, rangeValue, scrollToBottom, searchType.value]);
const { const {
value: isInputExpanded, value: isInputExpanded,
@ -122,6 +130,10 @@ export default function SearchView() {
} }
}; };
const handleRangeValueChange = (event: React.ChangeEvent<HTMLInputElement>) => {
setRangeValue(parseInt(event.target.value));
};
return ( return (
<Stack className={styles.searchViewContainer}> <Stack className={styles.searchViewContainer}>
<DropdownSelect<SelectOption> <DropdownSelect<SelectOption>
@ -146,9 +158,15 @@ export default function SearchView() {
</Stack> </Stack>
</div> </div>
<form onSubmit={handleSearchSubmit}> <form onSubmit={handleSearchSubmit}>
<Stack orientation="horizontal" align="end/" gap="2"> <Stack orientation="vertical" gap="2">
<TextArea onKeyUp={handleSubmitOnEnter} style={{ transition: 'height 0.3s ease', height: isInputExpanded ? '128px' : '38px' }} onFocus={expandInput} onBlur={contractInput} value={inputValue} onChange={handleInputChange} name="searchInput" placeholder="Search" /> <TextArea onKeyUp={handleSubmitOnEnter} style={{ transition: 'height 0.3s ease', height: isInputExpanded ? '128px' : '38px' }} onFocus={expandInput} onBlur={contractInput} value={inputValue} onChange={handleInputChange} name="searchInput" placeholder="Search" />
<CTAButton hugContent type="submit">Search</CTAButton> <Stack orientation="horizontal" gap="between">
<Stack orientation="horizontal" gap="2" align="center">
<label><Text>Search range: </Text></label>
<Input style={{ maxWidth: "90px" }} value={rangeValue} onChange={handleRangeValueChange} type="number" />
</Stack>
<CTAButton hugContent type="submit">Search</CTAButton>
</Stack>
</Stack> </Stack>
</form> </form>
</Stack> </Stack>

View file

@ -1,6 +1,10 @@
{ {
"compilerOptions": { "compilerOptions": {
"lib": ["dom", "dom.iterable", "esnext"], "lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true, "allowJs": true,
"skipLibCheck": true, "skipLibCheck": true,
"strict": true, "strict": true,
@ -18,9 +22,19 @@
} }
], ],
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": [
} "./src/*"
]
},
"target": "ES2017"
}, },
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "include": [
"exclude": ["node_modules"] "next-env.d.ts",
"**/*.ts",
"**/*.tsx",
".next/types/**/*.ts"
],
"exclude": [
"node_modules"
]
} }

View file

@ -1,5 +1,5 @@
from uuid import UUID from uuid import UUID
from typing import Optional, Union from typing import Optional
from datetime import datetime from datetime import datetime
from fastapi import Depends, APIRouter from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
@ -17,6 +17,7 @@ class SearchPayloadDTO(InDTO):
datasets: Optional[list[str]] = None datasets: Optional[list[str]] = None
dataset_ids: Optional[list[UUID]] = None dataset_ids: Optional[list[UUID]] = None
query: str query: str
top_k: Optional[int] = 10
def get_search_router() -> APIRouter: def get_search_router() -> APIRouter:
@ -49,6 +50,7 @@ def get_search_router() -> APIRouter:
user=user, user=user,
datasets=payload.datasets, datasets=payload.datasets,
dataset_ids=payload.dataset_ids, dataset_ids=payload.dataset_ids,
top_k=payload.top_k,
) )
return results return results

View file

@ -14,7 +14,7 @@ echo "Environment: $ENVIRONMENT"
# smooth redeployments and container restarts while maintaining data integrity. # smooth redeployments and container restarts while maintaining data integrity.
echo "Running database migrations..." echo "Running database migrations..."
MIGRATION_OUTPUT=$(alembic upgrade head 2>&1) MIGRATION_OUTPUT=$(alembic upgrade head)
MIGRATION_EXIT_CODE=$? MIGRATION_EXIT_CODE=$?
if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
@ -42,5 +42,5 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
fi fi
else else
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
fi fi

View file

@ -1,225 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cognee Graphiti integration demo"
]
},
{
"cell_type": "markdown",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"source": [
"First we import the necessary libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import asyncio\n",
"\n",
"import cognee\n",
"from cognee.shared.logging_utils import get_logger, ERROR\n",
"from cognee.modules.pipelines import Task, run_tasks\n",
"from cognee.tasks.temporal_awareness import build_graph_with_temporal_awareness\n",
"from cognee.infrastructure.databases.relational import (\n",
" create_db_and_tables as create_relational_db_and_tables,\n",
")\n",
"from cognee.tasks.temporal_awareness.index_graphiti_objects import (\n",
" index_and_transform_graphiti_nodes_and_edges,\n",
")\n",
"from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search\n",
"from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever\n",
"from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt\n",
"from cognee.infrastructure.llm.get_llm_client import get_llm_client\n",
"from cognee.modules.users.methods import get_default_user"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set environment variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-15T10:43:57.893763Z",
"start_time": "2025-01-15T10:43:57.891332Z"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"# We ignore warnigns for now\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"# API key for cognee\n",
"if \"LLM_API_KEY\" not in os.environ:\n",
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
"\n",
"# API key for graphiti\n",
"if \"OPENAI_API_KEY\" not in os.environ:\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"GRAPH_DATABASE_PROVIDER = \"neo4j\"\n",
"GRAPH_DATABASE_USERNAME = \"neo4j\"\n",
"GRAPH_DATABASE_PASSWORD = \"pleaseletmein\"\n",
"GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
"\n",
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = GRAPH_DATABASE_PROVIDER\n",
"os.environ[\"GRAPH_DATABASE_USERNAME\"] = GRAPH_DATABASE_USERNAME\n",
"os.environ[\"GRAPH_DATABASE_PASSWORD\"] = GRAPH_DATABASE_PASSWORD\n",
"os.environ[\"GRAPH_DATABASE_URL\"] = GRAPH_DATABASE_URL\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Input texts with temporal information"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-15T10:43:57.928664Z",
"start_time": "2025-01-15T10:43:57.927105Z"
}
},
"outputs": [],
"source": [
"text_list = [\n",
" \"Kamala Harris is the Attorney General of California. She was previously \"\n",
" \"the district attorney for San Francisco.\",\n",
" \"As AG, Harris was in office from January 3, 2011 January 3, 2017\",\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Running graphiti + transforming its graph into cognee's core system (graph transformation + vector embeddings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-15T10:44:25.008501Z",
"start_time": "2025-01-15T10:43:57.932240Z"
}
},
"outputs": [],
"source": [
"await cognee.prune.prune_data()\n",
"await cognee.prune.prune_system(metadata=True)\n",
"await create_relational_db_and_tables()\n",
"\n",
"# Initialize default user\n",
"user = await get_default_user()\n",
"\n",
"for text in text_list:\n",
" await cognee.add(text)\n",
"\n",
"tasks = [\n",
" Task(build_graph_with_temporal_awareness, text_list=text_list),\n",
" ]\n",
"\n",
"pipeline = run_tasks(tasks, user=user)\n",
"\n",
"async for result in pipeline:\n",
" print(result)\n",
"\n",
"await index_and_transform_graphiti_nodes_and_edges()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Retrieving and generating answer from graphiti graph with cognee retriever"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-15T10:44:27.844438Z",
"start_time": "2025-01-15T10:44:25.013325Z"
}
},
"outputs": [],
"source": [
"# Step 1: Formulating the Query 🔍\n",
"query = \"When was Kamala Harris in office?\"\n",
"\n",
"# Step 2: Searching for Relevant Triplets 📊\n",
"triplets = await brute_force_triplet_search(\n",
" query=query,\n",
" top_k=3,\n",
" collections=[\"graphitinode_content\", \"graphitinode_name\", \"graphitinode_summary\"],\n",
")\n",
"\n",
"# Step 3: Preparing the Context for the LLM\n",
"retriever = GraphCompletionRetriever()\n",
"context = await retriever.resolve_edges_to_text(triplets)\n",
"\n",
"args = {\"question\": query, \"context\": context}\n",
"\n",
"# Step 4: Generating Prompts ✍️\n",
"user_prompt = render_prompt(\"graph_context_for_question.txt\", args)\n",
"system_prompt = read_query_prompt(\"answer_simple_question_restricted.txt\")\n",
"\n",
"# Step 5: Interacting with the LLM 🤖\n",
"llm_client = get_llm_client()\n",
"computed_answer = await llm_client.acreate_structured_output(\n",
" text_input=user_prompt, # Input prompt for the user context\n",
" system_prompt=system_prompt, # System-level instructions for the model\n",
" response_model=str,\n",
")\n",
"\n",
"# Step 6: Displaying the Computed Answer ✨\n",
"print(f\"💡 Answer: {computed_answer}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View file

@ -1,239 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cognee GraphRAG with LlamaIndex Documents"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-core\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data\n",
"\n",
"We will use a sample news article dataset retrieved from Diffbot, which Tomaz has conveniently made available on GitHub for easy access.\n",
"\n",
"The dataset contains 2,500 samples; for ease of experimentation, we will use 5 of these samples, which include the `title` and `text` of news articles."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from llama_index.core import Document\n",
"\n",
"news = pd.read_csv(\n",
" \"https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv\"\n",
")[:5]\n",
"\n",
"news.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare documents as required by LlamaIndex"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"documents = [Document(text=f\"{row['title']}: {row['text']}\") for i, row in news.iterrows()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set environment variables"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# Setting environment variables\n",
"if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
"\n",
"if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
"\n",
"if \"LLM_API_KEY\" not in os.environ:\n",
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
"\n",
"# \"neo4j\" or \"networkx\"\n",
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
"# Not needed if using networkx\n",
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
"\n",
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
"\n",
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
"\n",
"# Database name\n",
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
"\n",
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run Cognee with LlamaIndex Documents"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Union, BinaryIO\n",
"\n",
"from cognee.infrastructure.databases.vector.pgvector import (\n",
" create_db_and_tables as create_pgvector_db_and_tables,\n",
")\n",
"from cognee.infrastructure.databases.relational import (\n",
" create_db_and_tables as create_relational_db_and_tables,\n",
")\n",
"from cognee.modules.users.models import User\n",
"from cognee.modules.users.methods import get_default_user\n",
"from cognee.tasks.ingestion.ingest_data import ingest_data\n",
"import cognee\n",
"\n",
"# Create a clean slate for cognee -- reset data and system state\n",
"await cognee.prune.prune_data()\n",
"await cognee.prune.prune_system(metadata=True)\n",
"\n",
"\n",
"# Add the LlamaIndex documents, and make it available for cognify\n",
"async def add(\n",
" data: Union[BinaryIO, list[BinaryIO], str, list[str]],\n",
" dataset_name: str = \"main_dataset\",\n",
" user: User = None,\n",
"):\n",
" await create_relational_db_and_tables()\n",
" await create_pgvector_db_and_tables()\n",
"\n",
" if user is None:\n",
" user = await get_default_user()\n",
"\n",
" await ingest_data(data, dataset_name, user)\n",
"\n",
"\n",
"await add(documents)\n",
"\n",
"# Use LLMs and cognee to create knowledge graph\n",
"await cognee.cognify()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query Cognee for summaries related to data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee import SearchType\n",
"\n",
"# Query cognee for summaries\n",
"search_results = await cognee.search(\n",
" query_type=SearchType.SUMMARIES, query_text=\"What are the main news discussed in the document?\"\n",
")\n",
"# Display search results\n",
"print(\"\\n Summary of main news discussed:\\n\")\n",
"print(search_results[0][\"text\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Render Knowledge Graph generated from provided data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import graphistry\n",
"\n",
"from cognee.infrastructure.databases.graph import get_graph_engine\n",
"from cognee.shared.utils import render_graph\n",
"\n",
"# Get graph\n",
"graphistry.login(\n",
" username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n",
")\n",
"graph_engine = await get_graph_engine()\n",
"\n",
"graph_url = await render_graph(graph_engine.graph)\n",
"print(graph_url)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long