Demo for relational db with cognee (#620)
<!-- .github/pull_request_template.md --> ## Description This demo uses pydantic models and dlt to pull data from the Pokémon API and structure it into a relational format. By feeding this structured data into cognee, it makes searching across multiple tables easier and more intuitive, thanks to the relational model. ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced a comprehensive Pokémon data processing pipeline, available as both a Python script and an interactive Jupyter Notebook. - Enabled asynchronous operations for efficient data collection and querying, including an integrated search functionality. - Improved error handling and data validation during the data fetching and processing stages for a smoother user experience. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
This commit is contained in:
parent
62c84dde5e
commit
56427f287e
2 changed files with 726 additions and 0 deletions
190
examples/python/pokemon_datapoints_example.py
Normal file
190
examples/python/pokemon_datapoints_example.py
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
# Standard library imports
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
import pathlib
|
||||
from uuid import uuid5, NAMESPACE_OID
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import dlt
|
||||
import requests
|
||||
import cognee
|
||||
from cognee.low_level import DataPoint, setup as cognee_setup
|
||||
from cognee.api.v1.search import SearchType
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.modules.pipelines.tasks.Task import Task
|
||||
from cognee.modules.pipelines import run_tasks
|
||||
|
||||
|
||||
BASE_URL = "https://pokeapi.co/api/v2/"
|
||||
os.environ["BUCKET_URL"] = "./.data_storage"
|
||||
os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true"
|
||||
|
||||
# Data Models
|
||||
class Abilities(DataPoint):
|
||||
name: str = "Abilities"
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
||||
class PokemonAbility(DataPoint):
|
||||
name: str
|
||||
ability__name: str
|
||||
ability__url: str
|
||||
is_hidden: bool
|
||||
slot: int
|
||||
_dlt_load_id: str
|
||||
_dlt_id: str
|
||||
_dlt_parent_id: str
|
||||
_dlt_list_idx: str
|
||||
is_type: Abilities
|
||||
metadata: dict = {"index_fields": ["ability__name"]}
|
||||
|
||||
class Pokemons(DataPoint):
|
||||
name: str = "Pokemons"
|
||||
have: Abilities
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
||||
class Pokemon(DataPoint):
|
||||
name: str
|
||||
base_experience: int
|
||||
height: int
|
||||
weight: int
|
||||
is_default: bool
|
||||
order: int
|
||||
location_area_encounters: str
|
||||
species__name: str
|
||||
species__url: str
|
||||
cries__latest: str
|
||||
cries__legacy: str
|
||||
sprites__front_default: str
|
||||
sprites__front_shiny: str
|
||||
sprites__back_default: Optional[str]
|
||||
sprites__back_shiny: Optional[str]
|
||||
_dlt_load_id: str
|
||||
_dlt_id: str
|
||||
is_type: Pokemons
|
||||
abilities: List[PokemonAbility]
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
||||
# Data Collection Functions
|
||||
@dlt.resource(write_disposition="replace")
|
||||
def pokemon_list(limit: int = 50):
|
||||
response = requests.get(f"{BASE_URL}pokemon", params={"limit": limit})
|
||||
response.raise_for_status()
|
||||
yield response.json()["results"]
|
||||
|
||||
@dlt.transformer(data_from=pokemon_list)
|
||||
def pokemon_details(pokemons):
|
||||
"""Fetches detailed info for each Pokémon"""
|
||||
for pokemon in pokemons:
|
||||
response = requests.get(pokemon["url"])
|
||||
response.raise_for_status()
|
||||
yield response.json()
|
||||
|
||||
# Data Loading Functions
|
||||
def load_abilities_data(jsonl_abilities):
|
||||
abilities_root = Abilities()
|
||||
pokemon_abilities = []
|
||||
|
||||
for jsonl_ability in jsonl_abilities:
|
||||
with open(jsonl_ability, "r") as f:
|
||||
for line in f:
|
||||
ability = json.loads(line)
|
||||
ability["id"] = uuid5(NAMESPACE_OID, ability["_dlt_id"])
|
||||
ability["name"] = ability["ability__name"]
|
||||
ability["is_type"] = abilities_root
|
||||
pokemon_abilities.append(ability)
|
||||
|
||||
return abilities_root, pokemon_abilities
|
||||
|
||||
def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
|
||||
pokemons = []
|
||||
|
||||
for jsonl_pokemon in jsonl_pokemons:
|
||||
with open(jsonl_pokemon, "r") as f:
|
||||
for line in f:
|
||||
pokemon_data = json.loads(line)
|
||||
abilities = [
|
||||
ability for ability in pokemon_abilities
|
||||
if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"]
|
||||
]
|
||||
pokemon_data["external_id"] = pokemon_data["id"]
|
||||
pokemon_data["id"] = uuid5(NAMESPACE_OID, str(pokemon_data["id"]))
|
||||
pokemon_data["abilities"] = [PokemonAbility(**ability) for ability in abilities]
|
||||
pokemon_data["is_type"] = pokemon_root
|
||||
pokemons.append(Pokemon(**pokemon_data))
|
||||
|
||||
return pokemons
|
||||
|
||||
# Main Application Logic
|
||||
async def setup_and_process_data():
|
||||
"""Setup configuration and process Pokemon data"""
|
||||
# Setup configuration
|
||||
data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve())
|
||||
cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve())
|
||||
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
cognee.config.system_root_directory(cognee_directory_path)
|
||||
|
||||
# Initialize pipeline and collect data
|
||||
pipeline = dlt.pipeline(
|
||||
pipeline_name="pokemon_pipeline",
|
||||
destination="filesystem",
|
||||
dataset_name="pokemon_data",
|
||||
)
|
||||
info = pipeline.run([pokemon_list, pokemon_details])
|
||||
print(info)
|
||||
|
||||
# Load and process data
|
||||
STORAGE_PATH = Path(".data_storage/pokemon_data/pokemon_details")
|
||||
jsonl_pokemons = sorted(STORAGE_PATH.glob("*.jsonl"))
|
||||
if not jsonl_pokemons:
|
||||
raise FileNotFoundError("No JSONL files found in the storage directory.")
|
||||
|
||||
ABILITIES_PATH = Path(".data_storage/pokemon_data/pokemon_details__abilities")
|
||||
jsonl_abilities = sorted(ABILITIES_PATH.glob("*.jsonl"))
|
||||
if not jsonl_abilities:
|
||||
raise FileNotFoundError("No JSONL files found in the storage directory.")
|
||||
|
||||
# Process data
|
||||
abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)
|
||||
pokemon_root = Pokemons(have=abilities_root)
|
||||
pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)
|
||||
|
||||
return pokemons
|
||||
|
||||
async def pokemon_cognify(pokemons):
|
||||
"""Process Pokemon data with Cognee and perform search"""
|
||||
# Setup and run Cognee tasks
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await cognee_setup()
|
||||
|
||||
tasks = [Task(add_data_points, task_config={"batch_size": 50})]
|
||||
results = run_tasks(
|
||||
tasks=tasks,
|
||||
data=pokemons,
|
||||
dataset_id=uuid5(NAMESPACE_OID, "Pokemon"),
|
||||
pipeline_name='pokemon_pipeline',
|
||||
)
|
||||
|
||||
async for result in results:
|
||||
print(result)
|
||||
print("Done")
|
||||
|
||||
# Perform search
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="pokemons?"
|
||||
)
|
||||
|
||||
print("Search results:")
|
||||
for result_text in search_results:
|
||||
print(result_text)
|
||||
|
||||
async def main():
|
||||
pokemons = await setup_and_process_data()
|
||||
await pokemon_cognify(pokemons)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
536
notebooks/pokemon_datapoints_notebook.ipynb
Normal file
536
notebooks/pokemon_datapoints_notebook.ipynb
Normal file
|
|
@ -0,0 +1,536 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:58:00.193158Z",
|
||||
"start_time": "2025-03-04T11:58:00.190238Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"nest_asyncio.apply()"
|
||||
],
|
||||
"id": "2efba278d106bb5f",
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Environment Configuration\n",
|
||||
"#### Setup required directories and environment variables.\n"
|
||||
],
|
||||
"id": "ccbb2bc23aa456ee"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:33.879188Z",
|
||||
"start_time": "2025-03-04T11:59:33.873682Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import pathlib\n",
|
||||
"import os\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"notebook_dir = pathlib.Path().resolve()\n",
|
||||
"data_directory_path = str(notebook_dir / \".data_storage\")\n",
|
||||
"cognee_directory_path = str(notebook_dir / \".cognee_system\")\n",
|
||||
"\n",
|
||||
"cognee.config.data_root_directory(data_directory_path)\n",
|
||||
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
||||
"\n",
|
||||
"BASE_URL = \"https://pokeapi.co/api/v2/\"\n",
|
||||
"os.environ[\"BUCKET_URL\"] = data_directory_path\n",
|
||||
"os.environ[\"DATA_WRITER__DISABLE_COMPRESSION\"] = \"true\"\n"
|
||||
],
|
||||
"id": "662d554f96f211d9",
|
||||
"outputs": [],
|
||||
"execution_count": 8
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Initialize DLT Pipeline\n",
|
||||
"### Create the DLT pipeline to fetch Pokémon data.\n"
|
||||
],
|
||||
"id": "36ae0be71f6e9167"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:58:03.982939Z",
|
||||
"start_time": "2025-03-04T11:58:03.819676Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import dlt\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"pipeline = dlt.pipeline(\n",
|
||||
" pipeline_name=\"pokemon_pipeline\",\n",
|
||||
" destination=\"filesystem\",\n",
|
||||
" dataset_name=\"pokemon_data\",\n",
|
||||
")\n"
|
||||
],
|
||||
"id": "25101ae5f016ce0c",
|
||||
"outputs": [],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Fetch Pokémon List\n",
|
||||
"### Retrieve a list of Pokémon from the API.\n"
|
||||
],
|
||||
"id": "9a87ce05a072c48b"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:58:03.990076Z",
|
||||
"start_time": "2025-03-04T11:58:03.987199Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"@dlt.resource(write_disposition=\"replace\")\n",
|
||||
"def pokemon_list(limit: int = 50):\n",
|
||||
" import requests\n",
|
||||
" response = requests.get(f\"{BASE_URL}pokemon\", params={\"limit\": limit})\n",
|
||||
" response.raise_for_status()\n",
|
||||
" yield response.json()[\"results\"]\n"
|
||||
],
|
||||
"id": "3b6e60778c61e24a",
|
||||
"outputs": [],
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Fetch Pokémon Details\n",
|
||||
"### Fetch detailed information about each Pokémon.\n"
|
||||
],
|
||||
"id": "9952767846194e97"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:58:03.996394Z",
|
||||
"start_time": "2025-03-04T11:58:03.994122Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"@dlt.transformer(data_from=pokemon_list)\n",
|
||||
"def pokemon_details(pokemons):\n",
|
||||
" \"\"\"Fetches detailed info for each Pokémon\"\"\"\n",
|
||||
" import requests\n",
|
||||
" for pokemon in pokemons:\n",
|
||||
" response = requests.get(pokemon[\"url\"])\n",
|
||||
" response.raise_for_status()\n",
|
||||
" yield response.json()\n"
|
||||
],
|
||||
"id": "79ec9fef12267485",
|
||||
"outputs": [],
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Run Data Pipeline\n",
|
||||
"### Execute the pipeline and store Pokémon data.\n"
|
||||
],
|
||||
"id": "41e05f660bf9e9d2"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:41.571015Z",
|
||||
"start_time": "2025-03-04T11:59:36.840744Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"info = pipeline.run([pokemon_list, pokemon_details])\n",
|
||||
"print(info)\n"
|
||||
],
|
||||
"id": "20a3b2c7f404677f",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Pipeline pokemon_pipeline load step completed in 0.06 seconds\n",
|
||||
"1 load package(s) were loaded to destination filesystem and into dataset pokemon_data\n",
|
||||
"The filesystem destination used file:///Users/lazar/PycharmProjects/cognee/.data_storage location to store data\n",
|
||||
"Load package 1741089576.860229 is LOADED and contains no failed jobs\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 9
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Load Pokémon Abilities\n",
|
||||
"### Load Pokémon ability data from stored files.\n"
|
||||
],
|
||||
"id": "937f10b8d1037743"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:44.377719Z",
|
||||
"start_time": "2025-03-04T11:59:44.363718Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from cognee.low_level import DataPoint\n",
|
||||
"from uuid import uuid5, NAMESPACE_OID\n",
|
||||
"\n",
|
||||
"class Abilities(DataPoint):\n",
|
||||
" name: str = \"Abilities\"\n",
|
||||
" metadata: dict = {\"index_fields\": [\"name\"]}\n",
|
||||
"\n",
|
||||
"def load_abilities_data(jsonl_abilities):\n",
|
||||
" abilities_root = Abilities()\n",
|
||||
" pokemon_abilities = []\n",
|
||||
"\n",
|
||||
" for jsonl_ability in jsonl_abilities:\n",
|
||||
" with open(jsonl_ability, \"r\") as f:\n",
|
||||
" for line in f:\n",
|
||||
" ability = json.loads(line)\n",
|
||||
" ability[\"id\"] = uuid5(NAMESPACE_OID, ability[\"_dlt_id\"])\n",
|
||||
" ability[\"name\"] = ability[\"ability__name\"]\n",
|
||||
" ability[\"is_type\"] = abilities_root\n",
|
||||
" pokemon_abilities.append(ability)\n",
|
||||
"\n",
|
||||
" return abilities_root, pokemon_abilities\n"
|
||||
],
|
||||
"id": "be73050036439ea1",
|
||||
"outputs": [],
|
||||
"execution_count": 10
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Load Pokémon Data\n",
|
||||
"### Load Pokémon details and associate them with abilities.\n"
|
||||
],
|
||||
"id": "98c97f799f73df77"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:46.251306Z",
|
||||
"start_time": "2025-03-04T11:59:46.238283Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from typing import List, Optional\n",
|
||||
"\n",
|
||||
"class Pokemons(DataPoint):\n",
|
||||
" name: str = \"Pokemons\"\n",
|
||||
" have: Abilities\n",
|
||||
" metadata: dict = {\"index_fields\": [\"name\"]}\n",
|
||||
"\n",
|
||||
"class PokemonAbility(DataPoint):\n",
|
||||
" name: str\n",
|
||||
" ability__name: str\n",
|
||||
" ability__url: str\n",
|
||||
" is_hidden: bool\n",
|
||||
" slot: int\n",
|
||||
" _dlt_load_id: str\n",
|
||||
" _dlt_id: str\n",
|
||||
" _dlt_parent_id: str\n",
|
||||
" _dlt_list_idx: str\n",
|
||||
" is_type: Abilities\n",
|
||||
" metadata: dict = {\"index_fields\": [\"ability__name\"]}\n",
|
||||
"\n",
|
||||
"class Pokemon(DataPoint):\n",
|
||||
" name: str\n",
|
||||
" base_experience: int\n",
|
||||
" height: int\n",
|
||||
" weight: int\n",
|
||||
" is_default: bool\n",
|
||||
" order: int\n",
|
||||
" location_area_encounters: str\n",
|
||||
" species__name: str\n",
|
||||
" species__url: str\n",
|
||||
" cries__latest: str\n",
|
||||
" cries__legacy: str\n",
|
||||
" sprites__front_default: str\n",
|
||||
" sprites__front_shiny: str\n",
|
||||
" sprites__back_default: Optional[str]\n",
|
||||
" sprites__back_shiny: Optional[str]\n",
|
||||
" _dlt_load_id: str\n",
|
||||
" _dlt_id: str\n",
|
||||
" is_type: Pokemons\n",
|
||||
" abilities: List[PokemonAbility]\n",
|
||||
" metadata: dict = {\"index_fields\": [\"name\"]}\n",
|
||||
"\n",
|
||||
"def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):\n",
|
||||
" pokemons = []\n",
|
||||
"\n",
|
||||
" for jsonl_pokemon in jsonl_pokemons:\n",
|
||||
" with open(jsonl_pokemon, \"r\") as f:\n",
|
||||
" for line in f:\n",
|
||||
" pokemon_data = json.loads(line)\n",
|
||||
" abilities = [\n",
|
||||
" ability for ability in pokemon_abilities\n",
|
||||
" if ability[\"_dlt_parent_id\"] == pokemon_data[\"_dlt_id\"]\n",
|
||||
" ]\n",
|
||||
" pokemon_data[\"external_id\"] = pokemon_data[\"id\"]\n",
|
||||
" pokemon_data[\"id\"] = uuid5(NAMESPACE_OID, str(pokemon_data[\"id\"]))\n",
|
||||
" pokemon_data[\"abilities\"] = [PokemonAbility(**ability) for ability in abilities]\n",
|
||||
" pokemon_data[\"is_type\"] = pokemon_root\n",
|
||||
" pokemons.append(Pokemon(**pokemon_data))\n",
|
||||
"\n",
|
||||
" return pokemons\n"
|
||||
],
|
||||
"id": "7862951248df0bf5",
|
||||
"outputs": [],
|
||||
"execution_count": 11
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Process Pokémon Data\n",
|
||||
"### Load and associate Pokémon abilities.\n"
|
||||
],
|
||||
"id": "676fa5a2b61c2107"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:47.365226Z",
|
||||
"start_time": "2025-03-04T11:59:47.356722Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"STORAGE_PATH = Path(\".data_storage/pokemon_data/pokemon_details\")\n",
|
||||
"jsonl_pokemons = sorted(STORAGE_PATH.glob(\"*.jsonl\"))\n",
|
||||
"\n",
|
||||
"ABILITIES_PATH = Path(\".data_storage/pokemon_data/pokemon_details__abilities\")\n",
|
||||
"jsonl_abilities = sorted(ABILITIES_PATH.glob(\"*.jsonl\"))\n",
|
||||
"\n",
|
||||
"abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)\n",
|
||||
"pokemon_root = Pokemons(have=abilities_root)\n",
|
||||
"pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)\n"
|
||||
],
|
||||
"id": "ad14cdecdccd71bb",
|
||||
"outputs": [],
|
||||
"execution_count": 12
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Initialize Cognee\n",
|
||||
"### Setup Cognee for data processing.\n"
|
||||
],
|
||||
"id": "59dec67b2ae50f0f"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:49.244577Z",
|
||||
"start_time": "2025-03-04T11:59:48.618261Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from cognee.low_level import setup as cognee_setup\n",
|
||||
"\n",
|
||||
"async def initialize_cognee():\n",
|
||||
" await cognee.prune.prune_data()\n",
|
||||
" await cognee.prune.prune_system(metadata=True)\n",
|
||||
" await cognee_setup()\n",
|
||||
"\n",
|
||||
"await initialize_cognee()\n"
|
||||
],
|
||||
"id": "d2e095ae576a02c1",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully.INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully."
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 13
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Process Pokémon Data\n",
|
||||
"### Add Pokémon data points to Cognee.\n"
|
||||
],
|
||||
"id": "5f0b8090bc7b1fe6"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T11:59:57.744035Z",
|
||||
"start_time": "2025-03-04T11:59:50.574033Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||
"from cognee.tasks.storage import add_data_points\n",
|
||||
"from cognee.modules.pipelines import run_tasks\n",
|
||||
"\n",
|
||||
"tasks = [Task(add_data_points, task_config={\"batch_size\": 50})]\n",
|
||||
"results = run_tasks(\n",
|
||||
" tasks=tasks,\n",
|
||||
" data=pokemons,\n",
|
||||
" dataset_id=uuid5(NAMESPACE_OID, \"Pokemon\"),\n",
|
||||
" pipeline_name='pokemon_pipeline',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"async for result in results:\n",
|
||||
" print(result)\n",
|
||||
"print(\"Done\")\n"
|
||||
],
|
||||
"id": "ffa12fc1f5350d95",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:run_tasks(tasks: [Task], data):Pipeline run started: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`INFO:run_tasks(tasks: [Task], data):Coroutine task started: `add_data_points`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x300bb3950>\n",
|
||||
"User d347ea85-e512-4cae-b9d7-496fe1745424 has registered.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:79: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
|
||||
" class PGVectorDataPoint(Base):\n",
|
||||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:113: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
|
||||
" class PGVectorDataPoint(Base):\n",
|
||||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 8, column: 16, offset: 335} for query: '\\n UNWIND $nodes AS node\\n MERGE (n {id: node.node_id})\\n ON CREATE SET n += node.properties, n.updated_at = timestamp()\\n ON MATCH SET n += node.properties, n.updated_at = timestamp()\\n WITH n, node.node_id AS label\\n CALL apoc.create.addLabels(n, [label]) YIELD node AS labeledNode\\n RETURN ID(labeledNode) AS internal_id, labeledNode.id AS nodeId\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:run_tasks(tasks: [Task], data):Coroutine task completed: `add_data_points`INFO:run_tasks(tasks: [Task], data):Pipeline run completed: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x30016fd40>\n",
|
||||
"Done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 14
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Search Pokémon Data\n",
|
||||
"### Execute a search query using Cognee.\n"
|
||||
],
|
||||
"id": "e0d98d9832a2797a"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-03-04T12:00:02.878871Z",
|
||||
"start_time": "2025-03-04T11:59:59.571965Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from cognee.api.v1.search import SearchType\n",
|
||||
"\n",
|
||||
"search_results = await cognee.search(\n",
|
||||
" query_type=SearchType.GRAPH_COMPLETION,\n",
|
||||
" query_text=\"pokemons?\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Search results:\")\n",
|
||||
"for result_text in search_results:\n",
|
||||
" print(result_text)"
|
||||
],
|
||||
"id": "bb2476b6b0c2aff",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\u001B[92m13:00:02 - LiteLLM:INFO\u001B[0m: utils.py:2784 - \n",
|
||||
"LiteLLM completion() model= gpt-4o-mini; provider = openaiINFO:LiteLLM:\n",
|
||||
"LiteLLM completion() model= gpt-4o-mini; provider = openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Search results:\n",
|
||||
"The Pokemons mentioned are: golbat, jigglypuff, raichu, vulpix, and pikachu.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 15
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "",
|
||||
"id": "a4c2d3e9c15b017"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue