428 lines
27 KiB
Text
Vendored
428 lines
27 KiB
Text
Vendored
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Cognee GraphRAG with Multimedia files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"vscode": {
|
|
"languageId": "plaintext"
|
|
}
|
|
},
|
|
"source": [
|
|
"## Load Data\n",
|
|
"\n",
|
|
"We will use a few sample multimedia files which we have on GitHub for easy access."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-30T11:54:44.613431Z",
|
|
"start_time": "2025-06-30T11:54:44.606687Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import pathlib\n",
|
|
"\n",
|
|
"# cognee knowledge graph will be created based on the text\n",
|
|
"# and description of these files\n",
|
|
"mp3_file_path = os.path.join(\n",
|
|
" os.path.abspath(\"\"),\n",
|
|
" \"../\",\n",
|
|
" \"examples/data/multimedia/text_to_speech.mp3\",\n",
|
|
")\n",
|
|
"png_file_path = os.path.join(\n",
|
|
" os.path.abspath(\"\"),\n",
|
|
" \"../\",\n",
|
|
" \"examples/data/multimedia/example.png\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Set environment variables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-30T11:54:46.739157Z",
|
|
"start_time": "2025-06-30T11:54:46.734808Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"if \"LLM_API_KEY\" not in os.environ:\n",
|
|
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
|
"\n",
|
|
"# \"neo4j\" or \"networkx\"\n",
|
|
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"kuzu\"\n",
|
|
"# Not needed if using networkx\n",
|
|
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
|
|
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
|
|
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
|
|
"\n",
|
|
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
|
|
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
|
|
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
|
|
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
|
|
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
|
|
"\n",
|
|
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
|
|
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
|
|
"\n",
|
|
"# Database name\n",
|
|
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
|
|
"\n",
|
|
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
|
|
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
|
|
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
|
|
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
|
|
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Run Cognee with multimedia files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:47.304571\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:47.739751\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[1mLangfuse client is disabled since no public_key was provided as a parameter or environment variable 'LANGFUSE_PUBLIC_KEY'. See our docs: https://langfuse.com/docs/sdk/python/low-level-sdk#initialize-client\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"User f5c66ce8-859b-44d4-941a-df6eee1f1d2a has registered.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[92m14:21:48 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
|
|
"\n",
|
|
"\u001b[1m\n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[1mEmbeddingRateLimiter initialized: enabled=False, requests_limit=60, interval_seconds=60\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.231053\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `bb1e12db-8d3f-5e80-8615-2444eda4b32a`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.377156\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.527056\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.687437\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: pypdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.687879\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: text_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.688276\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: image_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.688542\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: audio_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:54.688832\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: unstructured_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
|
|
"\u001b[92m14:21:56 - LiteLLM:INFO\u001b[0m: utils.py:1274 - Wrapper: Completed Call, calling success_handler\n",
|
|
"\n",
|
|
"\u001b[1mWrapper: Completed Call, calling success_handler\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:56.382164\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:56.534718\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:56.688121\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `bb1e12db-8d3f-5e80-8615-2444eda4b32a`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:56.848444\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `bb1e12db-8d3f-5e80-8615-2444eda4b32a`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:57.015428\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:21:57.159922\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\u001b[92m14:21:57 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
|
|
"\n",
|
|
"\u001b[1m\n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
|
|
"\u001b[92m14:22:01 - LiteLLM:INFO\u001b[0m: utils.py:1274 - Wrapper: Completed Call, calling success_handler\n",
|
|
"\n",
|
|
"\u001b[1mWrapper: Completed Call, calling success_handler\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:01.502324\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:01.643327\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:01.786439\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `bb1e12db-8d3f-5e80-8615-2444eda4b32a`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:01.951101\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOntology file 'None' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:01.978540\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `52948913-bf39-51ee-a535-e4f140f34c10`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:02.126845\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:02.304535\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:02.471292\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:02.632964\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\u001b[92m14:22:02 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
|
|
"\n",
|
|
"\u001b[1m\n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.524347\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.525119\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.525401\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.525697\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.526076\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.526395\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.526661\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'joke' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.526931\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'how many programmers does it take to change a light bulb? none, thats a hardware problem.' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:29.527282\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'none' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:31.777289\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\u001b[92m14:22:31 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
|
|
"\n",
|
|
"\u001b[1m\n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:37.705311\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:40.603223\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:40.769398\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:40.984903\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:41.128272\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:41.275155\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:41.436831\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:41.606070\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `52948913-bf39-51ee-a535-e4f140f34c10`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:41.767152\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `52948913-bf39-51ee-a535-e4f140f34c10`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:41.912031\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:42.052884\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:42.218749\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:22:42.376103\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\u001b[92m14:22:42 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
|
|
"\n",
|
|
"\u001b[1m\n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.700919\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'text' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.703407\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'how many programmers does it take to change a light bulb? none. thats a hardware problem.' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.703797\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.704185\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmers' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.704581\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.704897\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.705215\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.705511\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'none' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:24.705874\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:27.372108\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\u001b[92m14:23:27 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
|
|
"\n",
|
|
"\u001b[1m\n",
|
|
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:33.517938\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:36.007176\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:36.181973\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:36.332093\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:36.475271\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:36.717941\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:36.870258\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:37.018440\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `52948913-bf39-51ee-a535-e4f140f34c10`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1cde937f-ae7a-5151-a20c-dc3567bee0a9'), dataset_id=UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1cde937f-ae7a-5151-a20c-dc3567bee0a9'), dataset_id=UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('692741cd-46e5-5988-85e9-f3901d104b7e')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('1cde937f-ae7a-5151-a20c-dc3567bee0a9'), dataset_id=UUID('241b64d6-f023-5b87-9a8f-87056f0a442c'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('899de74a-1bef-5afd-a478-1ea944503514')}])}"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import cognee\n",
|
|
"\n",
|
|
"# Create a clean slate for cognee -- reset data and system state\n",
|
|
"await cognee.prune.prune_data()\n",
|
|
"await cognee.prune.prune_system(metadata=True)\n",
|
|
"\n",
|
|
"# Add multimedia files and make them available for cognify\n",
|
|
"await cognee.add([mp3_file_path, png_file_path])\n",
|
|
"\n",
|
|
"# Create knowledge graph with cognee\n",
|
|
"await cognee.cognify()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Query Cognee for summaries related to multimedia files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-30T11:44:56.372628Z",
|
|
"start_time": "2025-06-30T11:44:55.978258Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:56.768437\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting completion generation for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:56.769790\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting summary retrieval for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:57.168012\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFound 2 summaries from vector search\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:57.168772\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning 2 summary payloads \u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[2m2025-08-27T13:23:57.169214\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning context with 2 item(s)\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'id': '686b9e03-4505-56ec-9295-94c53d4db004', 'created_at': 1756301013713, 'updated_at': 1756301013713, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Programmer light-bulb joke: none — it's a hardware problem.\"}\n",
|
|
"{'id': '82343440-bd02-5149-ad3f-80289121146c', 'created_at': 1756300957894, 'updated_at': 1756300957894, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Programmers don't change light bulbs — that's a hardware problem.\"}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from cognee.api.v1.search import SearchType\n",
|
|
"\n",
|
|
"# Query cognee for summaries of the data in the multimedia files\n",
|
|
"search_results = await cognee.search(\n",
|
|
" query_type=SearchType.SUMMARIES,\n",
|
|
" query_text=\"What is in the multimedia files?\",\n",
|
|
")\n",
|
|
"\n",
|
|
"# Display search results\n",
|
|
"for result_text in search_results:\n",
|
|
" print(result_text)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
|
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
|
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
|
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import os\n",
|
|
"os._exit(0)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|