From 56427f287ef85e1548e77d650e648a378c8410ab Mon Sep 17 00:00:00 2001 From: hibajamal <35984866+hibajamal@users.noreply.github.com> Date: Sat, 8 Mar 2025 20:33:42 +0100 Subject: [PATCH] Demo for relational db with cognee (#620) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description This demo uses pydantic models and dlt to pull data from the Pokémon API and structure it into a relational format. By feeding this structured data into cognee, it makes searching across multiple tables easier and more intuitive, thanks to the relational model. ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin ## Summary by CodeRabbit - **New Features** - Introduced a comprehensive Pokémon data processing pipeline, available as both a Python script and an interactive Jupyter Notebook. - Enabled asynchronous operations for efficient data collection and querying, including an integrated search functionality. - Improved error handling and data validation during the data fetching and processing stages for a smoother user experience. Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com> --- examples/python/pokemon_datapoints_example.py | 190 +++++++ notebooks/pokemon_datapoints_notebook.ipynb | 536 ++++++++++++++++++ 2 files changed, 726 insertions(+) create mode 100644 examples/python/pokemon_datapoints_example.py create mode 100644 notebooks/pokemon_datapoints_notebook.ipynb diff --git a/examples/python/pokemon_datapoints_example.py b/examples/python/pokemon_datapoints_example.py new file mode 100644 index 000000000..058492e63 --- /dev/null +++ b/examples/python/pokemon_datapoints_example.py @@ -0,0 +1,190 @@ +# Standard library imports +import os +import json +import asyncio +import pathlib +from uuid import uuid5, NAMESPACE_OID +from typing import List, Optional +from pathlib import Path + +import dlt +import requests +import cognee +from cognee.low_level import DataPoint, setup as cognee_setup +from cognee.api.v1.search import SearchType +from cognee.tasks.storage import add_data_points +from cognee.modules.pipelines.tasks.Task import Task +from cognee.modules.pipelines import run_tasks + + +BASE_URL = "https://pokeapi.co/api/v2/" +os.environ["BUCKET_URL"] = "./.data_storage" +os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true" + +# Data Models +class Abilities(DataPoint): + name: str = "Abilities" + metadata: dict = {"index_fields": ["name"]} + +class PokemonAbility(DataPoint): + name: str + ability__name: str + ability__url: str + is_hidden: bool + slot: int + _dlt_load_id: str + _dlt_id: str + _dlt_parent_id: str + _dlt_list_idx: str + is_type: Abilities + metadata: dict = {"index_fields": ["ability__name"]} + +class Pokemons(DataPoint): + name: str = "Pokemons" + have: Abilities + metadata: dict = {"index_fields": ["name"]} + +class Pokemon(DataPoint): + name: str + base_experience: int + height: int + weight: int + is_default: bool + order: int + location_area_encounters: str + species__name: str + species__url: str + cries__latest: str + cries__legacy: str + sprites__front_default: str + sprites__front_shiny: str + sprites__back_default: Optional[str] + sprites__back_shiny: Optional[str] + _dlt_load_id: str + _dlt_id: str + is_type: Pokemons + abilities: List[PokemonAbility] + metadata: dict = {"index_fields": ["name"]} + +# Data Collection Functions +@dlt.resource(write_disposition="replace") +def pokemon_list(limit: int = 50): + response = requests.get(f"{BASE_URL}pokemon", params={"limit": limit}) + response.raise_for_status() + yield response.json()["results"] + +@dlt.transformer(data_from=pokemon_list) +def pokemon_details(pokemons): + """Fetches detailed info for each Pokémon""" + for pokemon in pokemons: + response = requests.get(pokemon["url"]) + response.raise_for_status() + yield response.json() + +# Data Loading Functions +def load_abilities_data(jsonl_abilities): + abilities_root = Abilities() + pokemon_abilities = [] + + for jsonl_ability in jsonl_abilities: + with open(jsonl_ability, "r") as f: + for line in f: + ability = json.loads(line) + ability["id"] = uuid5(NAMESPACE_OID, ability["_dlt_id"]) + ability["name"] = ability["ability__name"] + ability["is_type"] = abilities_root + pokemon_abilities.append(ability) + + return abilities_root, pokemon_abilities + +def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root): + pokemons = [] + + for jsonl_pokemon in jsonl_pokemons: + with open(jsonl_pokemon, "r") as f: + for line in f: + pokemon_data = json.loads(line) + abilities = [ + ability for ability in pokemon_abilities + if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"] + ] + pokemon_data["external_id"] = pokemon_data["id"] + pokemon_data["id"] = uuid5(NAMESPACE_OID, str(pokemon_data["id"])) + pokemon_data["abilities"] = [PokemonAbility(**ability) for ability in abilities] + pokemon_data["is_type"] = pokemon_root + pokemons.append(Pokemon(**pokemon_data)) + + return pokemons + +# Main Application Logic +async def setup_and_process_data(): + """Setup configuration and process Pokemon data""" + # Setup configuration + data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve()) + cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve()) + + cognee.config.data_root_directory(data_directory_path) + cognee.config.system_root_directory(cognee_directory_path) + + # Initialize pipeline and collect data + pipeline = dlt.pipeline( + pipeline_name="pokemon_pipeline", + destination="filesystem", + dataset_name="pokemon_data", + ) + info = pipeline.run([pokemon_list, pokemon_details]) + print(info) + + # Load and process data + STORAGE_PATH = Path(".data_storage/pokemon_data/pokemon_details") + jsonl_pokemons = sorted(STORAGE_PATH.glob("*.jsonl")) + if not jsonl_pokemons: + raise FileNotFoundError("No JSONL files found in the storage directory.") + + ABILITIES_PATH = Path(".data_storage/pokemon_data/pokemon_details__abilities") + jsonl_abilities = sorted(ABILITIES_PATH.glob("*.jsonl")) + if not jsonl_abilities: + raise FileNotFoundError("No JSONL files found in the storage directory.") + + # Process data + abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities) + pokemon_root = Pokemons(have=abilities_root) + pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root) + + return pokemons + +async def pokemon_cognify(pokemons): + """Process Pokemon data with Cognee and perform search""" + # Setup and run Cognee tasks + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await cognee_setup() + + tasks = [Task(add_data_points, task_config={"batch_size": 50})] + results = run_tasks( + tasks=tasks, + data=pokemons, + dataset_id=uuid5(NAMESPACE_OID, "Pokemon"), + pipeline_name='pokemon_pipeline', + ) + + async for result in results: + print(result) + print("Done") + + # Perform search + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="pokemons?" + ) + + print("Search results:") + for result_text in search_results: + print(result_text) + +async def main(): + pokemons = await setup_and_process_data() + await pokemon_cognify(pokemons) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/notebooks/pokemon_datapoints_notebook.ipynb b/notebooks/pokemon_datapoints_notebook.ipynb new file mode 100644 index 000000000..9fbc34bc1 --- /dev/null +++ b/notebooks/pokemon_datapoints_notebook.ipynb @@ -0,0 +1,536 @@ +{ + "cells": [ + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:58:00.193158Z", + "start_time": "2025-03-04T11:58:00.190238Z" + } + }, + "cell_type": "code", + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ], + "id": "2efba278d106bb5f", + "outputs": [], + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Environment Configuration\n", + "#### Setup required directories and environment variables.\n" + ], + "id": "ccbb2bc23aa456ee" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:33.879188Z", + "start_time": "2025-03-04T11:59:33.873682Z" + } + }, + "cell_type": "code", + "source": [ + "import pathlib\n", + "import os\n", + "import cognee\n", + "\n", + "notebook_dir = pathlib.Path().resolve()\n", + "data_directory_path = str(notebook_dir / \".data_storage\")\n", + "cognee_directory_path = str(notebook_dir / \".cognee_system\")\n", + "\n", + "cognee.config.data_root_directory(data_directory_path)\n", + "cognee.config.system_root_directory(cognee_directory_path)\n", + "\n", + "BASE_URL = \"https://pokeapi.co/api/v2/\"\n", + "os.environ[\"BUCKET_URL\"] = data_directory_path\n", + "os.environ[\"DATA_WRITER__DISABLE_COMPRESSION\"] = \"true\"\n" + ], + "id": "662d554f96f211d9", + "outputs": [], + "execution_count": 8 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Initialize DLT Pipeline\n", + "### Create the DLT pipeline to fetch Pokémon data.\n" + ], + "id": "36ae0be71f6e9167" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:58:03.982939Z", + "start_time": "2025-03-04T11:58:03.819676Z" + } + }, + "cell_type": "code", + "source": [ + "import dlt\n", + "from pathlib import Path\n", + "\n", + "pipeline = dlt.pipeline(\n", + " pipeline_name=\"pokemon_pipeline\",\n", + " destination=\"filesystem\",\n", + " dataset_name=\"pokemon_data\",\n", + ")\n" + ], + "id": "25101ae5f016ce0c", + "outputs": [], + "execution_count": 4 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Fetch Pokémon List\n", + "### Retrieve a list of Pokémon from the API.\n" + ], + "id": "9a87ce05a072c48b" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:58:03.990076Z", + "start_time": "2025-03-04T11:58:03.987199Z" + } + }, + "cell_type": "code", + "source": [ + "@dlt.resource(write_disposition=\"replace\")\n", + "def pokemon_list(limit: int = 50):\n", + " import requests\n", + " response = requests.get(f\"{BASE_URL}pokemon\", params={\"limit\": limit})\n", + " response.raise_for_status()\n", + " yield response.json()[\"results\"]\n" + ], + "id": "3b6e60778c61e24a", + "outputs": [], + "execution_count": 5 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Fetch Pokémon Details\n", + "### Fetch detailed information about each Pokémon.\n" + ], + "id": "9952767846194e97" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:58:03.996394Z", + "start_time": "2025-03-04T11:58:03.994122Z" + } + }, + "cell_type": "code", + "source": [ + "@dlt.transformer(data_from=pokemon_list)\n", + "def pokemon_details(pokemons):\n", + " \"\"\"Fetches detailed info for each Pokémon\"\"\"\n", + " import requests\n", + " for pokemon in pokemons:\n", + " response = requests.get(pokemon[\"url\"])\n", + " response.raise_for_status()\n", + " yield response.json()\n" + ], + "id": "79ec9fef12267485", + "outputs": [], + "execution_count": 6 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Run Data Pipeline\n", + "### Execute the pipeline and store Pokémon data.\n" + ], + "id": "41e05f660bf9e9d2" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:41.571015Z", + "start_time": "2025-03-04T11:59:36.840744Z" + } + }, + "cell_type": "code", + "source": [ + "info = pipeline.run([pokemon_list, pokemon_details])\n", + "print(info)\n" + ], + "id": "20a3b2c7f404677f", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline pokemon_pipeline load step completed in 0.06 seconds\n", + "1 load package(s) were loaded to destination filesystem and into dataset pokemon_data\n", + "The filesystem destination used file:///Users/lazar/PycharmProjects/cognee/.data_storage location to store data\n", + "Load package 1741089576.860229 is LOADED and contains no failed jobs\n" + ] + } + ], + "execution_count": 9 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Load Pokémon Abilities\n", + "### Load Pokémon ability data from stored files.\n" + ], + "id": "937f10b8d1037743" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:44.377719Z", + "start_time": "2025-03-04T11:59:44.363718Z" + } + }, + "cell_type": "code", + "source": [ + "import json\n", + "from cognee.low_level import DataPoint\n", + "from uuid import uuid5, NAMESPACE_OID\n", + "\n", + "class Abilities(DataPoint):\n", + " name: str = \"Abilities\"\n", + " metadata: dict = {\"index_fields\": [\"name\"]}\n", + "\n", + "def load_abilities_data(jsonl_abilities):\n", + " abilities_root = Abilities()\n", + " pokemon_abilities = []\n", + "\n", + " for jsonl_ability in jsonl_abilities:\n", + " with open(jsonl_ability, \"r\") as f:\n", + " for line in f:\n", + " ability = json.loads(line)\n", + " ability[\"id\"] = uuid5(NAMESPACE_OID, ability[\"_dlt_id\"])\n", + " ability[\"name\"] = ability[\"ability__name\"]\n", + " ability[\"is_type\"] = abilities_root\n", + " pokemon_abilities.append(ability)\n", + "\n", + " return abilities_root, pokemon_abilities\n" + ], + "id": "be73050036439ea1", + "outputs": [], + "execution_count": 10 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Load Pokémon Data\n", + "### Load Pokémon details and associate them with abilities.\n" + ], + "id": "98c97f799f73df77" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:46.251306Z", + "start_time": "2025-03-04T11:59:46.238283Z" + } + }, + "cell_type": "code", + "source": [ + "from typing import List, Optional\n", + "\n", + "class Pokemons(DataPoint):\n", + " name: str = \"Pokemons\"\n", + " have: Abilities\n", + " metadata: dict = {\"index_fields\": [\"name\"]}\n", + "\n", + "class PokemonAbility(DataPoint):\n", + " name: str\n", + " ability__name: str\n", + " ability__url: str\n", + " is_hidden: bool\n", + " slot: int\n", + " _dlt_load_id: str\n", + " _dlt_id: str\n", + " _dlt_parent_id: str\n", + " _dlt_list_idx: str\n", + " is_type: Abilities\n", + " metadata: dict = {\"index_fields\": [\"ability__name\"]}\n", + "\n", + "class Pokemon(DataPoint):\n", + " name: str\n", + " base_experience: int\n", + " height: int\n", + " weight: int\n", + " is_default: bool\n", + " order: int\n", + " location_area_encounters: str\n", + " species__name: str\n", + " species__url: str\n", + " cries__latest: str\n", + " cries__legacy: str\n", + " sprites__front_default: str\n", + " sprites__front_shiny: str\n", + " sprites__back_default: Optional[str]\n", + " sprites__back_shiny: Optional[str]\n", + " _dlt_load_id: str\n", + " _dlt_id: str\n", + " is_type: Pokemons\n", + " abilities: List[PokemonAbility]\n", + " metadata: dict = {\"index_fields\": [\"name\"]}\n", + "\n", + "def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):\n", + " pokemons = []\n", + "\n", + " for jsonl_pokemon in jsonl_pokemons:\n", + " with open(jsonl_pokemon, \"r\") as f:\n", + " for line in f:\n", + " pokemon_data = json.loads(line)\n", + " abilities = [\n", + " ability for ability in pokemon_abilities\n", + " if ability[\"_dlt_parent_id\"] == pokemon_data[\"_dlt_id\"]\n", + " ]\n", + " pokemon_data[\"external_id\"] = pokemon_data[\"id\"]\n", + " pokemon_data[\"id\"] = uuid5(NAMESPACE_OID, str(pokemon_data[\"id\"]))\n", + " pokemon_data[\"abilities\"] = [PokemonAbility(**ability) for ability in abilities]\n", + " pokemon_data[\"is_type\"] = pokemon_root\n", + " pokemons.append(Pokemon(**pokemon_data))\n", + "\n", + " return pokemons\n" + ], + "id": "7862951248df0bf5", + "outputs": [], + "execution_count": 11 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Process Pokémon Data\n", + "### Load and associate Pokémon abilities.\n" + ], + "id": "676fa5a2b61c2107" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:47.365226Z", + "start_time": "2025-03-04T11:59:47.356722Z" + } + }, + "cell_type": "code", + "source": [ + "STORAGE_PATH = Path(\".data_storage/pokemon_data/pokemon_details\")\n", + "jsonl_pokemons = sorted(STORAGE_PATH.glob(\"*.jsonl\"))\n", + "\n", + "ABILITIES_PATH = Path(\".data_storage/pokemon_data/pokemon_details__abilities\")\n", + "jsonl_abilities = sorted(ABILITIES_PATH.glob(\"*.jsonl\"))\n", + "\n", + "abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)\n", + "pokemon_root = Pokemons(have=abilities_root)\n", + "pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)\n" + ], + "id": "ad14cdecdccd71bb", + "outputs": [], + "execution_count": 12 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Initialize Cognee\n", + "### Setup Cognee for data processing.\n" + ], + "id": "59dec67b2ae50f0f" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:49.244577Z", + "start_time": "2025-03-04T11:59:48.618261Z" + } + }, + "cell_type": "code", + "source": [ + "import asyncio\n", + "from cognee.low_level import setup as cognee_setup\n", + "\n", + "async def initialize_cognee():\n", + " await cognee.prune.prune_data()\n", + " await cognee.prune.prune_system(metadata=True)\n", + " await cognee_setup()\n", + "\n", + "await initialize_cognee()\n" + ], + "id": "d2e095ae576a02c1", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully.INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully." + ] + } + ], + "execution_count": 13 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Process Pokémon Data\n", + "### Add Pokémon data points to Cognee.\n" + ], + "id": "5f0b8090bc7b1fe6" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T11:59:57.744035Z", + "start_time": "2025-03-04T11:59:50.574033Z" + } + }, + "cell_type": "code", + "source": [ + "from cognee.modules.pipelines.tasks.Task import Task\n", + "from cognee.tasks.storage import add_data_points\n", + "from cognee.modules.pipelines import run_tasks\n", + "\n", + "tasks = [Task(add_data_points, task_config={\"batch_size\": 50})]\n", + "results = run_tasks(\n", + " tasks=tasks,\n", + " data=pokemons,\n", + " dataset_id=uuid5(NAMESPACE_OID, \"Pokemon\"),\n", + " pipeline_name='pokemon_pipeline',\n", + ")\n", + "\n", + "async for result in results:\n", + " print(result)\n", + "print(\"Done\")\n" + ], + "id": "ffa12fc1f5350d95", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:run_tasks(tasks: [Task], data):Pipeline run started: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`INFO:run_tasks(tasks: [Task], data):Coroutine task started: `add_data_points`" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "User d347ea85-e512-4cae-b9d7-496fe1745424 has registered.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:79: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n", + " class PGVectorDataPoint(Base):\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:113: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n", + " class PGVectorDataPoint(Base):\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 8, column: 16, offset: 335} for query: '\\n UNWIND $nodes AS node\\n MERGE (n {id: node.node_id})\\n ON CREATE SET n += node.properties, n.updated_at = timestamp()\\n ON MATCH SET n += node.properties, n.updated_at = timestamp()\\n WITH n, node.node_id AS label\\n CALL apoc.create.addLabels(n, [label]) YIELD node AS labeledNode\\n RETURN ID(labeledNode) AS internal_id, labeledNode.id AS nodeId\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:run_tasks(tasks: [Task], data):Coroutine task completed: `add_data_points`INFO:run_tasks(tasks: [Task], data):Pipeline run completed: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Done\n" + ] + } + ], + "execution_count": 14 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Search Pokémon Data\n", + "### Execute a search query using Cognee.\n" + ], + "id": "e0d98d9832a2797a" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-04T12:00:02.878871Z", + "start_time": "2025-03-04T11:59:59.571965Z" + } + }, + "cell_type": "code", + "source": [ + "from cognee.api.v1.search import SearchType\n", + "\n", + "search_results = await cognee.search(\n", + " query_type=SearchType.GRAPH_COMPLETION,\n", + " query_text=\"pokemons?\"\n", + ")\n", + "\n", + "print(\"Search results:\")\n", + "for result_text in search_results:\n", + " print(result_text)" + ], + "id": "bb2476b6b0c2aff", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n MATCH (n)-[r]->(m)\\n RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n 'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\u001B[92m13:00:02 - LiteLLM:INFO\u001B[0m: utils.py:2784 - \n", + "LiteLLM completion() model= gpt-4o-mini; provider = openaiINFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-4o-mini; provider = openai" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Search results:\n", + "The Pokemons mentioned are: golbat, jigglypuff, raichu, vulpix, and pikachu.\n" + ] + } + ], + "execution_count": 15 + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "a4c2d3e9c15b017" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}