refactor: remove notebooks (#905)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
128f72078e
commit
aa3163561d
5 changed files with 0 additions and 1026 deletions
21
.github/workflows/notebooks_tests.yml
vendored
21
.github/workflows/notebooks_tests.yml
vendored
|
|
@ -11,30 +11,9 @@ jobs:
|
|||
# notebook-location: notebooks/cognee_demo.ipynb
|
||||
# secrets: inherit
|
||||
|
||||
run-llama-index-integration:
|
||||
name: LlamaIndex Integration Notebook
|
||||
uses: ./.github/workflows/reusable_notebook.yml
|
||||
with:
|
||||
notebook-location: notebooks/llama_index_cognee_integration.ipynb
|
||||
secrets: inherit
|
||||
|
||||
run-cognee-llama-index:
|
||||
name: Cognee LlamaIndex Notebook
|
||||
uses: ./.github/workflows/reusable_notebook.yml
|
||||
with:
|
||||
notebook-location: notebooks/cognee_llama_index.ipynb
|
||||
secrets: inherit
|
||||
|
||||
run-cognee-multimedia:
|
||||
name: Cognee Multimedia Notebook
|
||||
uses: ./.github/workflows/reusable_notebook.yml
|
||||
with:
|
||||
notebook-location: notebooks/cognee_multimedia_demo.ipynb
|
||||
secrets: inherit
|
||||
|
||||
# run-graphrag-vs-rag:
|
||||
# name: Graphrag vs Rag notebook
|
||||
# uses: ./.github/workflows/reusable_notebook.yml
|
||||
# with:
|
||||
# notebook-location: notebooks/graphrag_vs_rag.ipynb
|
||||
# secrets: inherit
|
||||
|
|
|
|||
|
|
@ -1,225 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cognee Graphiti integration demo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"First we import the necessary libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"import cognee\n",
|
||||
"from cognee.shared.logging_utils import get_logger, ERROR\n",
|
||||
"from cognee.modules.pipelines import Task, run_tasks\n",
|
||||
"from cognee.tasks.temporal_awareness import build_graph_with_temporal_awareness\n",
|
||||
"from cognee.infrastructure.databases.relational import (\n",
|
||||
" create_db_and_tables as create_relational_db_and_tables,\n",
|
||||
")\n",
|
||||
"from cognee.tasks.temporal_awareness.index_graphiti_objects import (\n",
|
||||
" index_and_transform_graphiti_nodes_and_edges,\n",
|
||||
")\n",
|
||||
"from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search\n",
|
||||
"from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever\n",
|
||||
"from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt\n",
|
||||
"from cognee.infrastructure.llm.get_llm_client import get_llm_client\n",
|
||||
"from cognee.modules.users.methods import get_default_user"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set environment variables"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:43:57.893763Z",
|
||||
"start_time": "2025-01-15T10:43:57.891332Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# We ignore warnigns for now\n",
|
||||
"warnings.filterwarnings(\"ignore\")\n",
|
||||
"\n",
|
||||
"# API key for cognee\n",
|
||||
"if \"LLM_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# API key for graphiti\n",
|
||||
"if \"OPENAI_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"GRAPH_DATABASE_PROVIDER = \"neo4j\"\n",
|
||||
"GRAPH_DATABASE_USERNAME = \"neo4j\"\n",
|
||||
"GRAPH_DATABASE_PASSWORD = \"pleaseletmein\"\n",
|
||||
"GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
|
||||
"\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = GRAPH_DATABASE_PROVIDER\n",
|
||||
"os.environ[\"GRAPH_DATABASE_USERNAME\"] = GRAPH_DATABASE_USERNAME\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PASSWORD\"] = GRAPH_DATABASE_PASSWORD\n",
|
||||
"os.environ[\"GRAPH_DATABASE_URL\"] = GRAPH_DATABASE_URL\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Input texts with temporal information"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:43:57.928664Z",
|
||||
"start_time": "2025-01-15T10:43:57.927105Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_list = [\n",
|
||||
" \"Kamala Harris is the Attorney General of California. She was previously \"\n",
|
||||
" \"the district attorney for San Francisco.\",\n",
|
||||
" \"As AG, Harris was in office from January 3, 2011 – January 3, 2017\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Running graphiti + transforming its graph into cognee's core system (graph transformation + vector embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:44:25.008501Z",
|
||||
"start_time": "2025-01-15T10:43:57.932240Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await cognee.prune.prune_data()\n",
|
||||
"await cognee.prune.prune_system(metadata=True)\n",
|
||||
"await create_relational_db_and_tables()\n",
|
||||
"\n",
|
||||
"# Initialize default user\n",
|
||||
"user = await get_default_user()\n",
|
||||
"\n",
|
||||
"for text in text_list:\n",
|
||||
" await cognee.add(text)\n",
|
||||
"\n",
|
||||
"tasks = [\n",
|
||||
" Task(build_graph_with_temporal_awareness, text_list=text_list),\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"pipeline = run_tasks(tasks, user=user)\n",
|
||||
"\n",
|
||||
"async for result in pipeline:\n",
|
||||
" print(result)\n",
|
||||
"\n",
|
||||
"await index_and_transform_graphiti_nodes_and_edges()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retrieving and generating answer from graphiti graph with cognee retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:44:27.844438Z",
|
||||
"start_time": "2025-01-15T10:44:25.013325Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Formulating the Query 🔍\n",
|
||||
"query = \"When was Kamala Harris in office?\"\n",
|
||||
"\n",
|
||||
"# Step 2: Searching for Relevant Triplets 📊\n",
|
||||
"triplets = await brute_force_triplet_search(\n",
|
||||
" query=query,\n",
|
||||
" top_k=3,\n",
|
||||
" collections=[\"graphitinode_content\", \"graphitinode_name\", \"graphitinode_summary\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 3: Preparing the Context for the LLM\n",
|
||||
"retriever = GraphCompletionRetriever()\n",
|
||||
"context = await retriever.resolve_edges_to_text(triplets)\n",
|
||||
"\n",
|
||||
"args = {\"question\": query, \"context\": context}\n",
|
||||
"\n",
|
||||
"# Step 4: Generating Prompts ✍️\n",
|
||||
"user_prompt = render_prompt(\"graph_context_for_question.txt\", args)\n",
|
||||
"system_prompt = read_query_prompt(\"answer_simple_question_restricted.txt\")\n",
|
||||
"\n",
|
||||
"# Step 5: Interacting with the LLM 🤖\n",
|
||||
"llm_client = get_llm_client()\n",
|
||||
"computed_answer = await llm_client.acreate_structured_output(\n",
|
||||
" text_input=user_prompt, # Input prompt for the user context\n",
|
||||
" system_prompt=system_prompt, # System-level instructions for the model\n",
|
||||
" response_model=str,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 6: Displaying the Computed Answer ✨\n",
|
||||
"print(f\"💡 Answer: {computed_answer}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
|
@ -1,239 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cognee GraphRAG with LlamaIndex Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index-core\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Data\n",
|
||||
"\n",
|
||||
"We will use a sample news article dataset retrieved from Diffbot, which Tomaz has conveniently made available on GitHub for easy access.\n",
|
||||
"\n",
|
||||
"The dataset contains 2,500 samples; for ease of experimentation, we will use 5 of these samples, which include the `title` and `text` of news articles."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"news = pd.read_csv(\n",
|
||||
" \"https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv\"\n",
|
||||
")[:5]\n",
|
||||
"\n",
|
||||
"news.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare documents as required by LlamaIndex"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = [Document(text=f\"{row['title']}: {row['text']}\") for i, row in news.iterrows()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set environment variables"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Setting environment variables\n",
|
||||
"if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
|
||||
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
||||
"\n",
|
||||
"if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
|
||||
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
||||
"\n",
|
||||
"if \"LLM_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# \"neo4j\" or \"networkx\"\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
|
||||
"# Not needed if using networkx\n",
|
||||
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
|
||||
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
|
||||
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
|
||||
"\n",
|
||||
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
|
||||
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
|
||||
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
|
||||
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
|
||||
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
|
||||
"\n",
|
||||
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
|
||||
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
|
||||
"\n",
|
||||
"# Database name\n",
|
||||
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
|
||||
"\n",
|
||||
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
|
||||
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
|
||||
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
|
||||
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
|
||||
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run Cognee with LlamaIndex Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Union, BinaryIO\n",
|
||||
"\n",
|
||||
"from cognee.infrastructure.databases.vector.pgvector import (\n",
|
||||
" create_db_and_tables as create_pgvector_db_and_tables,\n",
|
||||
")\n",
|
||||
"from cognee.infrastructure.databases.relational import (\n",
|
||||
" create_db_and_tables as create_relational_db_and_tables,\n",
|
||||
")\n",
|
||||
"from cognee.modules.users.models import User\n",
|
||||
"from cognee.modules.users.methods import get_default_user\n",
|
||||
"from cognee.tasks.ingestion.ingest_data import ingest_data\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"# Create a clean slate for cognee -- reset data and system state\n",
|
||||
"await cognee.prune.prune_data()\n",
|
||||
"await cognee.prune.prune_system(metadata=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add the LlamaIndex documents, and make it available for cognify\n",
|
||||
"async def add(\n",
|
||||
" data: Union[BinaryIO, list[BinaryIO], str, list[str]],\n",
|
||||
" dataset_name: str = \"main_dataset\",\n",
|
||||
" user: User = None,\n",
|
||||
"):\n",
|
||||
" await create_relational_db_and_tables()\n",
|
||||
" await create_pgvector_db_and_tables()\n",
|
||||
"\n",
|
||||
" if user is None:\n",
|
||||
" user = await get_default_user()\n",
|
||||
"\n",
|
||||
" await ingest_data(data, dataset_name, user)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await add(documents)\n",
|
||||
"\n",
|
||||
"# Use LLMs and cognee to create knowledge graph\n",
|
||||
"await cognee.cognify()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query Cognee for summaries related to data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee import SearchType\n",
|
||||
"\n",
|
||||
"# Query cognee for summaries\n",
|
||||
"search_results = await cognee.search(\n",
|
||||
" query_type=SearchType.SUMMARIES, query_text=\"What are the main news discussed in the document?\"\n",
|
||||
")\n",
|
||||
"# Display search results\n",
|
||||
"print(\"\\n Summary of main news discussed:\\n\")\n",
|
||||
"print(search_results[0][\"text\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Render Knowledge Graph generated from provided data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import graphistry\n",
|
||||
"\n",
|
||||
"from cognee.infrastructure.databases.graph import get_graph_engine\n",
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"\n",
|
||||
"# Get graph\n",
|
||||
"graphistry.login(\n",
|
||||
" username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n",
|
||||
")\n",
|
||||
"graph_engine = await get_graph_engine()\n",
|
||||
"\n",
|
||||
"graph_url = await render_graph(graph_engine.graph)\n",
|
||||
"print(graph_url)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue