Demo for relational db with cognee (#620)

<!-- .github/pull_request_template.md -->

## Description
This demo uses pydantic models and dlt to pull data from the Pokémon API
and structure it into a relational format. By feeding this structured
data into cognee, it makes searching across multiple tables easier and
more intuitive, thanks to the relational model.

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Introduced a comprehensive Pokémon data processing pipeline, available
as both a Python script and an interactive Jupyter Notebook.
- Enabled asynchronous operations for efficient data collection and
querying, including an integrated search functionality.
- Improved error handling and data validation during the data fetching
and processing stages for a smoother user experience.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
This commit is contained in:
hibajamal 2025-03-08 20:33:42 +01:00 committed by GitHub
parent 62c84dde5e
commit 56427f287e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 726 additions and 0 deletions

View file

@ -0,0 +1,190 @@
# Standard library imports
import os
import json
import asyncio
import pathlib
from uuid import uuid5, NAMESPACE_OID
from typing import List, Optional
from pathlib import Path
import dlt
import requests
import cognee
from cognee.low_level import DataPoint, setup as cognee_setup
from cognee.api.v1.search import SearchType
from cognee.tasks.storage import add_data_points
from cognee.modules.pipelines.tasks.Task import Task
from cognee.modules.pipelines import run_tasks
BASE_URL = "https://pokeapi.co/api/v2/"
os.environ["BUCKET_URL"] = "./.data_storage"
os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true"
# Data Models
class Abilities(DataPoint):
name: str = "Abilities"
metadata: dict = {"index_fields": ["name"]}
class PokemonAbility(DataPoint):
name: str
ability__name: str
ability__url: str
is_hidden: bool
slot: int
_dlt_load_id: str
_dlt_id: str
_dlt_parent_id: str
_dlt_list_idx: str
is_type: Abilities
metadata: dict = {"index_fields": ["ability__name"]}
class Pokemons(DataPoint):
name: str = "Pokemons"
have: Abilities
metadata: dict = {"index_fields": ["name"]}
class Pokemon(DataPoint):
name: str
base_experience: int
height: int
weight: int
is_default: bool
order: int
location_area_encounters: str
species__name: str
species__url: str
cries__latest: str
cries__legacy: str
sprites__front_default: str
sprites__front_shiny: str
sprites__back_default: Optional[str]
sprites__back_shiny: Optional[str]
_dlt_load_id: str
_dlt_id: str
is_type: Pokemons
abilities: List[PokemonAbility]
metadata: dict = {"index_fields": ["name"]}
# Data Collection Functions
@dlt.resource(write_disposition="replace")
def pokemon_list(limit: int = 50):
response = requests.get(f"{BASE_URL}pokemon", params={"limit": limit})
response.raise_for_status()
yield response.json()["results"]
@dlt.transformer(data_from=pokemon_list)
def pokemon_details(pokemons):
"""Fetches detailed info for each Pokémon"""
for pokemon in pokemons:
response = requests.get(pokemon["url"])
response.raise_for_status()
yield response.json()
# Data Loading Functions
def load_abilities_data(jsonl_abilities):
abilities_root = Abilities()
pokemon_abilities = []
for jsonl_ability in jsonl_abilities:
with open(jsonl_ability, "r") as f:
for line in f:
ability = json.loads(line)
ability["id"] = uuid5(NAMESPACE_OID, ability["_dlt_id"])
ability["name"] = ability["ability__name"]
ability["is_type"] = abilities_root
pokemon_abilities.append(ability)
return abilities_root, pokemon_abilities
def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
pokemons = []
for jsonl_pokemon in jsonl_pokemons:
with open(jsonl_pokemon, "r") as f:
for line in f:
pokemon_data = json.loads(line)
abilities = [
ability for ability in pokemon_abilities
if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"]
]
pokemon_data["external_id"] = pokemon_data["id"]
pokemon_data["id"] = uuid5(NAMESPACE_OID, str(pokemon_data["id"]))
pokemon_data["abilities"] = [PokemonAbility(**ability) for ability in abilities]
pokemon_data["is_type"] = pokemon_root
pokemons.append(Pokemon(**pokemon_data))
return pokemons
# Main Application Logic
async def setup_and_process_data():
"""Setup configuration and process Pokemon data"""
# Setup configuration
data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve())
cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve())
cognee.config.data_root_directory(data_directory_path)
cognee.config.system_root_directory(cognee_directory_path)
# Initialize pipeline and collect data
pipeline = dlt.pipeline(
pipeline_name="pokemon_pipeline",
destination="filesystem",
dataset_name="pokemon_data",
)
info = pipeline.run([pokemon_list, pokemon_details])
print(info)
# Load and process data
STORAGE_PATH = Path(".data_storage/pokemon_data/pokemon_details")
jsonl_pokemons = sorted(STORAGE_PATH.glob("*.jsonl"))
if not jsonl_pokemons:
raise FileNotFoundError("No JSONL files found in the storage directory.")
ABILITIES_PATH = Path(".data_storage/pokemon_data/pokemon_details__abilities")
jsonl_abilities = sorted(ABILITIES_PATH.glob("*.jsonl"))
if not jsonl_abilities:
raise FileNotFoundError("No JSONL files found in the storage directory.")
# Process data
abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)
pokemon_root = Pokemons(have=abilities_root)
pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)
return pokemons
async def pokemon_cognify(pokemons):
"""Process Pokemon data with Cognee and perform search"""
# Setup and run Cognee tasks
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await cognee_setup()
tasks = [Task(add_data_points, task_config={"batch_size": 50})]
results = run_tasks(
tasks=tasks,
data=pokemons,
dataset_id=uuid5(NAMESPACE_OID, "Pokemon"),
pipeline_name='pokemon_pipeline',
)
async for result in results:
print(result)
print("Done")
# Perform search
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text="pokemons?"
)
print("Search results:")
for result_text in search_results:
print(result_text)
async def main():
pokemons = await setup_and_process_data()
await pokemon_cognify(pokemons)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,536 @@
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:58:00.193158Z",
"start_time": "2025-03-04T11:58:00.190238Z"
}
},
"cell_type": "code",
"source": [
"import nest_asyncio\n",
"nest_asyncio.apply()"
],
"id": "2efba278d106bb5f",
"outputs": [],
"execution_count": 2
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"### Environment Configuration\n",
"#### Setup required directories and environment variables.\n"
],
"id": "ccbb2bc23aa456ee"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:33.879188Z",
"start_time": "2025-03-04T11:59:33.873682Z"
}
},
"cell_type": "code",
"source": [
"import pathlib\n",
"import os\n",
"import cognee\n",
"\n",
"notebook_dir = pathlib.Path().resolve()\n",
"data_directory_path = str(notebook_dir / \".data_storage\")\n",
"cognee_directory_path = str(notebook_dir / \".cognee_system\")\n",
"\n",
"cognee.config.data_root_directory(data_directory_path)\n",
"cognee.config.system_root_directory(cognee_directory_path)\n",
"\n",
"BASE_URL = \"https://pokeapi.co/api/v2/\"\n",
"os.environ[\"BUCKET_URL\"] = data_directory_path\n",
"os.environ[\"DATA_WRITER__DISABLE_COMPRESSION\"] = \"true\"\n"
],
"id": "662d554f96f211d9",
"outputs": [],
"execution_count": 8
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Initialize DLT Pipeline\n",
"### Create the DLT pipeline to fetch Pokémon data.\n"
],
"id": "36ae0be71f6e9167"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:58:03.982939Z",
"start_time": "2025-03-04T11:58:03.819676Z"
}
},
"cell_type": "code",
"source": [
"import dlt\n",
"from pathlib import Path\n",
"\n",
"pipeline = dlt.pipeline(\n",
" pipeline_name=\"pokemon_pipeline\",\n",
" destination=\"filesystem\",\n",
" dataset_name=\"pokemon_data\",\n",
")\n"
],
"id": "25101ae5f016ce0c",
"outputs": [],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Fetch Pokémon List\n",
"### Retrieve a list of Pokémon from the API.\n"
],
"id": "9a87ce05a072c48b"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:58:03.990076Z",
"start_time": "2025-03-04T11:58:03.987199Z"
}
},
"cell_type": "code",
"source": [
"@dlt.resource(write_disposition=\"replace\")\n",
"def pokemon_list(limit: int = 50):\n",
" import requests\n",
" response = requests.get(f\"{BASE_URL}pokemon\", params={\"limit\": limit})\n",
" response.raise_for_status()\n",
" yield response.json()[\"results\"]\n"
],
"id": "3b6e60778c61e24a",
"outputs": [],
"execution_count": 5
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Fetch Pokémon Details\n",
"### Fetch detailed information about each Pokémon.\n"
],
"id": "9952767846194e97"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:58:03.996394Z",
"start_time": "2025-03-04T11:58:03.994122Z"
}
},
"cell_type": "code",
"source": [
"@dlt.transformer(data_from=pokemon_list)\n",
"def pokemon_details(pokemons):\n",
" \"\"\"Fetches detailed info for each Pokémon\"\"\"\n",
" import requests\n",
" for pokemon in pokemons:\n",
" response = requests.get(pokemon[\"url\"])\n",
" response.raise_for_status()\n",
" yield response.json()\n"
],
"id": "79ec9fef12267485",
"outputs": [],
"execution_count": 6
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Run Data Pipeline\n",
"### Execute the pipeline and store Pokémon data.\n"
],
"id": "41e05f660bf9e9d2"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:41.571015Z",
"start_time": "2025-03-04T11:59:36.840744Z"
}
},
"cell_type": "code",
"source": [
"info = pipeline.run([pokemon_list, pokemon_details])\n",
"print(info)\n"
],
"id": "20a3b2c7f404677f",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pipeline pokemon_pipeline load step completed in 0.06 seconds\n",
"1 load package(s) were loaded to destination filesystem and into dataset pokemon_data\n",
"The filesystem destination used file:///Users/lazar/PycharmProjects/cognee/.data_storage location to store data\n",
"Load package 1741089576.860229 is LOADED and contains no failed jobs\n"
]
}
],
"execution_count": 9
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Load Pokémon Abilities\n",
"### Load Pokémon ability data from stored files.\n"
],
"id": "937f10b8d1037743"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:44.377719Z",
"start_time": "2025-03-04T11:59:44.363718Z"
}
},
"cell_type": "code",
"source": [
"import json\n",
"from cognee.low_level import DataPoint\n",
"from uuid import uuid5, NAMESPACE_OID\n",
"\n",
"class Abilities(DataPoint):\n",
" name: str = \"Abilities\"\n",
" metadata: dict = {\"index_fields\": [\"name\"]}\n",
"\n",
"def load_abilities_data(jsonl_abilities):\n",
" abilities_root = Abilities()\n",
" pokemon_abilities = []\n",
"\n",
" for jsonl_ability in jsonl_abilities:\n",
" with open(jsonl_ability, \"r\") as f:\n",
" for line in f:\n",
" ability = json.loads(line)\n",
" ability[\"id\"] = uuid5(NAMESPACE_OID, ability[\"_dlt_id\"])\n",
" ability[\"name\"] = ability[\"ability__name\"]\n",
" ability[\"is_type\"] = abilities_root\n",
" pokemon_abilities.append(ability)\n",
"\n",
" return abilities_root, pokemon_abilities\n"
],
"id": "be73050036439ea1",
"outputs": [],
"execution_count": 10
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Load Pokémon Data\n",
"### Load Pokémon details and associate them with abilities.\n"
],
"id": "98c97f799f73df77"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:46.251306Z",
"start_time": "2025-03-04T11:59:46.238283Z"
}
},
"cell_type": "code",
"source": [
"from typing import List, Optional\n",
"\n",
"class Pokemons(DataPoint):\n",
" name: str = \"Pokemons\"\n",
" have: Abilities\n",
" metadata: dict = {\"index_fields\": [\"name\"]}\n",
"\n",
"class PokemonAbility(DataPoint):\n",
" name: str\n",
" ability__name: str\n",
" ability__url: str\n",
" is_hidden: bool\n",
" slot: int\n",
" _dlt_load_id: str\n",
" _dlt_id: str\n",
" _dlt_parent_id: str\n",
" _dlt_list_idx: str\n",
" is_type: Abilities\n",
" metadata: dict = {\"index_fields\": [\"ability__name\"]}\n",
"\n",
"class Pokemon(DataPoint):\n",
" name: str\n",
" base_experience: int\n",
" height: int\n",
" weight: int\n",
" is_default: bool\n",
" order: int\n",
" location_area_encounters: str\n",
" species__name: str\n",
" species__url: str\n",
" cries__latest: str\n",
" cries__legacy: str\n",
" sprites__front_default: str\n",
" sprites__front_shiny: str\n",
" sprites__back_default: Optional[str]\n",
" sprites__back_shiny: Optional[str]\n",
" _dlt_load_id: str\n",
" _dlt_id: str\n",
" is_type: Pokemons\n",
" abilities: List[PokemonAbility]\n",
" metadata: dict = {\"index_fields\": [\"name\"]}\n",
"\n",
"def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):\n",
" pokemons = []\n",
"\n",
" for jsonl_pokemon in jsonl_pokemons:\n",
" with open(jsonl_pokemon, \"r\") as f:\n",
" for line in f:\n",
" pokemon_data = json.loads(line)\n",
" abilities = [\n",
" ability for ability in pokemon_abilities\n",
" if ability[\"_dlt_parent_id\"] == pokemon_data[\"_dlt_id\"]\n",
" ]\n",
" pokemon_data[\"external_id\"] = pokemon_data[\"id\"]\n",
" pokemon_data[\"id\"] = uuid5(NAMESPACE_OID, str(pokemon_data[\"id\"]))\n",
" pokemon_data[\"abilities\"] = [PokemonAbility(**ability) for ability in abilities]\n",
" pokemon_data[\"is_type\"] = pokemon_root\n",
" pokemons.append(Pokemon(**pokemon_data))\n",
"\n",
" return pokemons\n"
],
"id": "7862951248df0bf5",
"outputs": [],
"execution_count": 11
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Process Pokémon Data\n",
"### Load and associate Pokémon abilities.\n"
],
"id": "676fa5a2b61c2107"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:47.365226Z",
"start_time": "2025-03-04T11:59:47.356722Z"
}
},
"cell_type": "code",
"source": [
"STORAGE_PATH = Path(\".data_storage/pokemon_data/pokemon_details\")\n",
"jsonl_pokemons = sorted(STORAGE_PATH.glob(\"*.jsonl\"))\n",
"\n",
"ABILITIES_PATH = Path(\".data_storage/pokemon_data/pokemon_details__abilities\")\n",
"jsonl_abilities = sorted(ABILITIES_PATH.glob(\"*.jsonl\"))\n",
"\n",
"abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)\n",
"pokemon_root = Pokemons(have=abilities_root)\n",
"pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)\n"
],
"id": "ad14cdecdccd71bb",
"outputs": [],
"execution_count": 12
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Initialize Cognee\n",
"### Setup Cognee for data processing.\n"
],
"id": "59dec67b2ae50f0f"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:49.244577Z",
"start_time": "2025-03-04T11:59:48.618261Z"
}
},
"cell_type": "code",
"source": [
"import asyncio\n",
"from cognee.low_level import setup as cognee_setup\n",
"\n",
"async def initialize_cognee():\n",
" await cognee.prune.prune_data()\n",
" await cognee.prune.prune_system(metadata=True)\n",
" await cognee_setup()\n",
"\n",
"await initialize_cognee()\n"
],
"id": "d2e095ae576a02c1",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully.INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully."
]
}
],
"execution_count": 13
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Process Pokémon Data\n",
"### Add Pokémon data points to Cognee.\n"
],
"id": "5f0b8090bc7b1fe6"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T11:59:57.744035Z",
"start_time": "2025-03-04T11:59:50.574033Z"
}
},
"cell_type": "code",
"source": [
"from cognee.modules.pipelines.tasks.Task import Task\n",
"from cognee.tasks.storage import add_data_points\n",
"from cognee.modules.pipelines import run_tasks\n",
"\n",
"tasks = [Task(add_data_points, task_config={\"batch_size\": 50})]\n",
"results = run_tasks(\n",
" tasks=tasks,\n",
" data=pokemons,\n",
" dataset_id=uuid5(NAMESPACE_OID, \"Pokemon\"),\n",
" pipeline_name='pokemon_pipeline',\n",
")\n",
"\n",
"async for result in results:\n",
" print(result)\n",
"print(\"Done\")\n"
],
"id": "ffa12fc1f5350d95",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:run_tasks(tasks: [Task], data):Pipeline run started: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`INFO:run_tasks(tasks: [Task], data):Coroutine task started: `add_data_points`"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x300bb3950>\n",
"User d347ea85-e512-4cae-b9d7-496fe1745424 has registered.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:79: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
" class PGVectorDataPoint(Base):\n",
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:113: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
" class PGVectorDataPoint(Base):\n",
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 8, column: 16, offset: 335} for query: '\\n UNWIND $nodes AS node\\n MERGE (n {id: node.node_id})\\n ON CREATE SET n += node.properties, n.updated_at = timestamp()\\n ON MATCH SET n += node.properties, n.updated_at = timestamp()\\n WITH n, node.node_id AS label\\n CALL apoc.create.addLabels(n, [label]) YIELD node AS labeledNode\\n RETURN ID(labeledNode) AS internal_id, labeledNode.id AS nodeId\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:run_tasks(tasks: [Task], data):Coroutine task completed: `add_data_points`INFO:run_tasks(tasks: [Task], data):Pipeline run completed: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x30016fd40>\n",
"Done\n"
]
}
],
"execution_count": 14
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## Search Pokémon Data\n",
"### Execute a search query using Cognee.\n"
],
"id": "e0d98d9832a2797a"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-04T12:00:02.878871Z",
"start_time": "2025-03-04T11:59:59.571965Z"
}
},
"cell_type": "code",
"source": [
"from cognee.api.v1.search import SearchType\n",
"\n",
"search_results = await cognee.search(\n",
" query_type=SearchType.GRAPH_COMPLETION,\n",
" query_text=\"pokemons?\"\n",
")\n",
"\n",
"print(\"Search results:\")\n",
"for result_text in search_results:\n",
" print(result_text)"
],
"id": "bb2476b6b0c2aff",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\u001B[92m13:00:02 - LiteLLM:INFO\u001B[0m: utils.py:2784 - \n",
"LiteLLM completion() model= gpt-4o-mini; provider = openaiINFO:LiteLLM:\n",
"LiteLLM completion() model= gpt-4o-mini; provider = openai"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Search results:\n",
"The Pokemons mentioned are: golbat, jigglypuff, raichu, vulpix, and pikachu.\n"
]
}
],
"execution_count": 15
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "a4c2d3e9c15b017"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}