{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Cognee GraphRAG with Multimedia files" ] }, { "cell_type": "markdown", "metadata": { "vscode": { "languageId": "plaintext" } }, "source": [ "## Load Data\n", "\n", "We will use a few sample multimedia files which we have on GitHub for easy access." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2025-06-30T11:54:44.613431Z", "start_time": "2025-06-30T11:54:44.606687Z" } }, "outputs": [], "source": [ "import os\n", "import pathlib\n", "\n", "# cognee knowledge graph will be created based on the text\n", "# and description of these files\n", "mp3_file_path = os.path.join(\n", " os.path.abspath(\"\"),\n", " \"../\",\n", " \"examples/data/multimedia/text_to_speech.mp3\",\n", ")\n", "png_file_path = os.path.join(\n", " os.path.abspath(\"\"),\n", " \"../\",\n", " \"examples/data/multimedia/example.png\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set environment variables" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2025-06-30T11:54:46.739157Z", "start_time": "2025-06-30T11:54:46.734808Z" } }, "outputs": [], "source": [ "import os\n", "\n", "if \"LLM_API_KEY\" not in os.environ:\n", " os.environ[\"LLM_API_KEY\"] = \"\"\n", "\n", "# \"neo4j\" or \"networkx\"\n", "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"kuzu\"\n", "# Not needed if using networkx\n", "# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n", "# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n", "# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n", "\n", "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n", "os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n", "# Not needed if using \"lancedb\" or \"pgvector\"\n", "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", "\n", "# Relational Database provider \"sqlite\" or \"postgres\"\n", "os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n", "\n", "# Database name\n", "os.environ[\"DB_NAME\"] = \"cognee_db\"\n", "\n", "# Postgres specific parameters (Only if Postgres or PGVector is used)\n", "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n", "# os.environ[\"DB_PORT\"]=\"5432\"\n", "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n", "# os.environ[\"DB_PASSWORD\"]=\"cognee\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "\u001b[2m2025-10-07T20:37:13.488510\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-10-07_21-16-23.log\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:14.172414\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mLogging initialized \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m \u001b[36mcognee_version\u001b[0m=\u001b[35m0.3.5-local\u001b[0m \u001b[36mdatabase_path\u001b[0m=\u001b[35m/Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m \u001b[36mgraph_database_name\u001b[0m=\u001b[35m\u001b[0m \u001b[36mos_info\u001b[0m=\u001b[35m'Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)'\u001b[0m \u001b[36mpython_version\u001b[0m=\u001b[35m3.10.11\u001b[0m \u001b[36mrelational_config\u001b[0m=\u001b[35mcognee_db\u001b[0m \u001b[36mstructlog_version\u001b[0m=\u001b[35m25.4.0\u001b[0m \u001b[36mvector_config\u001b[0m=\u001b[35mlancedb\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:14.172932\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase storage: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n", "/Users/daulet/Desktop/dev/cognee-claude/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "0.3.5-local\n" ] } ], "source": [ "import cognee\n", "print(cognee.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run Cognee with multimedia files" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "\u001b[2m2025-10-07T20:37:20.743332\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mLoaded JSON extension \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:20.776490\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:23.387773\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n", "\n", "\u001b[1mStorage manager absolute path: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_cache\u001b[0m\n", "\n", "\u001b[1mDeleting cache... \u001b[0m\n", "\n", "\u001b[1m✓ Cache deleted successfully! \u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "User 03f552c1-331f-40b2-a99b-b3b05aa93e0d has registered.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "\u001b[1mEmbeddingRateLimiter initialized: enabled=False, requests_limit=60, interval_seconds=60\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.691142\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `e16895e4-38f6-5ad7-a969-cd1629861b40`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.691670\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.692087\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.693388\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `e16895e4-38f6-5ad7-a969-cd1629861b40`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.693668\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.694024\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.708303\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: pypdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.708776\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: text_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.709084\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: image_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.709426\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: audio_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.709654\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: unstructured_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:24.709898\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: advanced_pdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.420233\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.420796\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.421255\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `e16895e4-38f6-5ad7-a969-cd1629861b40`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.423491\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.423881\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.424259\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `e16895e4-38f6-5ad7-a969-cd1629861b40`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.434168\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOntology file 'None' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.453069\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `453ce944-eb27-567c-9918-0d44d1614f97`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.453489\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.453823\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.454419\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `453ce944-eb27-567c-9918-0d44d1614f97`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.454689\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.454948\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.462413\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.466745\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.470294\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:28.476006\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.030103\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mLoaded JSON extension \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.065148\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.065868\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.066315\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.066713\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.067064\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.067410\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.202761\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'profession' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.203355\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmers' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.203785\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.204225\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.204544\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:32.204964\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'humor' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:34.265785\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:35.003525\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:35.952187\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:35.970171\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.024476\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.025311\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.025564\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.025803\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.026065\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.026413\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.026663\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `453ce944-eb27-567c-9918-0d44d1614f97`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.680393\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.680986\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.681355\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.681647\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.681917\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.682229\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:38.682567\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `453ce944-eb27-567c-9918-0d44d1614f97`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n" ] }, { "data": { "text/plain": [ "{UUID('8f486d81-4723-5f3d-b37b-5e27d9967d33'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1c237436-d3eb-5408-874d-91647cf2dcef'), dataset_id=UUID('8f486d81-4723-5f3d-b37b-5e27d9967d33'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1c237436-d3eb-5408-874d-91647cf2dcef'), dataset_id=UUID('8f486d81-4723-5f3d-b37b-5e27d9967d33'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('56c22102-965d-592e-958c-c1ebebf0008f')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1c237436-d3eb-5408-874d-91647cf2dcef'), dataset_id=UUID('8f486d81-4723-5f3d-b37b-5e27d9967d33'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('e26acfac-f1c2-5d9d-b95a-e970a75aedde')}])}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import cognee\n", "\n", "# Create a clean slate for cognee -- reset data and system state\n", "await cognee.prune.prune_data()\n", "await cognee.prune.prune_system(metadata=True)\n", "\n", "# Add multimedia files and make them available for cognify\n", "await cognee.add([mp3_file_path, png_file_path])\n", "\n", "# Create knowledge graph with cognee\n", "await cognee.cognify()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Query Cognee for summaries related to multimedia files" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2025-06-30T11:44:56.372628Z", "start_time": "2025-06-30T11:44:55.978258Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "\u001b[2m2025-10-07T20:37:42.668682\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting summary retrieval for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "\u001b[2m2025-10-07T20:37:42.933137\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFound 2 summaries from vector search\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:42.933995\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning 2 summary payloads \u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:42.934301\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting completion generation for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n", "\n", "\u001b[2m2025-10-07T20:37:42.934604\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning context with 2 item(s)\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{'id': '766ac5d6-1a81-530e-a934-61e2bf505d9b', 'created_at': 1759869455990, 'updated_at': 1759869455990, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'A humorous take on programmers and light bulbs.'}\n", "{'id': '2862798a-0dfc-5994-a3ca-9f4329f42f06', 'created_at': 1759869455989, 'updated_at': 1759869455989, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Programmers won't change a light bulb.\"}\n" ] } ], "source": [ "from cognee.api.v1.search import SearchType\n", "\n", "# Query cognee for summaries of the data in the multimedia files\n", "search_results = await cognee.search(\n", " query_type=SearchType.SUMMARIES,\n", " query_text=\"What is in the multimedia files?\",\n", ")\n", "\n", "# Display search results\n", "for result_text in search_results:\n", " print(result_text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "# Only exit in interactive mode, not during GitHub Actions\n", "import os\n", "\n", "# Skip exit if we're running in GitHub Actions\n", "if not os.environ.get('GITHUB_ACTIONS'):\n", " print(\"Exiting kernel to clean up resources...\")\n", " os._exit(0)\n", "else:\n", " print(\"Skipping kernel exit - running in GitHub Actions\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }