188 lines
5.2 KiB
Text
Vendored
188 lines
5.2 KiB
Text
Vendored
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Cognee GraphRAG with Multimedia files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"vscode": {
|
||
"languageId": "plaintext"
|
||
}
|
||
},
|
||
"source": [
|
||
"## Load Data\n",
|
||
"\n",
|
||
"We will use a few sample multimedia files which we have on GitHub for easy access."
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-06-30T11:54:44.613431Z",
|
||
"start_time": "2025-06-30T11:54:44.606687Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import os\n",
|
||
"import pathlib\n",
|
||
"\n",
|
||
"# cognee knowledge graph will be created based on the text\n",
|
||
"# and description of these files\n",
|
||
"mp3_file_path = os.path.join(\n",
|
||
" os.path.abspath(\"\"),\n",
|
||
" \"../\",\n",
|
||
" \"examples/data/multimedia/text_to_speech.mp3\",\n",
|
||
")\n",
|
||
"png_file_path = os.path.join(\n",
|
||
" os.path.abspath(\"\"),\n",
|
||
" \"../\",\n",
|
||
" \"examples/data/multimedia/example.png\",\n",
|
||
")"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Set environment variables"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-06-30T11:54:46.739157Z",
|
||
"start_time": "2025-06-30T11:54:46.734808Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import os\n",
|
||
"\n",
|
||
"if \"LLM_API_KEY\" not in os.environ:\n",
|
||
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||
"\n",
|
||
"# \"neo4j\" or \"networkx\"\n",
|
||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"kuzu\"\n",
|
||
"# Not needed if using networkx\n",
|
||
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
|
||
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
|
||
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
|
||
"\n",
|
||
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
|
||
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
|
||
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
|
||
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
|
||
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
|
||
"\n",
|
||
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
|
||
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
|
||
"\n",
|
||
"# Database name\n",
|
||
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
|
||
"\n",
|
||
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
|
||
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
|
||
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
|
||
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
|
||
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 2
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Run Cognee with multimedia files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {},
|
||
"source": [
|
||
"import cognee\n",
|
||
"\n",
|
||
"# Create a clean slate for cognee -- reset data and system state\n",
|
||
"await cognee.prune.prune_data()\n",
|
||
"await cognee.prune.prune_system(metadata=True)\n",
|
||
"\n",
|
||
"# Add multimedia files and make them available for cognify\n",
|
||
"await cognee.add([mp3_file_path, png_file_path])\n",
|
||
"\n",
|
||
"# Create knowledge graph with cognee\n",
|
||
"await cognee.cognify()"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": null
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Query Cognee for summaries related to multimedia files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-06-30T11:44:56.372628Z",
|
||
"start_time": "2025-06-30T11:44:55.978258Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"from cognee.api.v1.search import SearchType\n",
|
||
"\n",
|
||
"# Query cognee for summaries of the data in the multimedia files\n",
|
||
"search_results = await cognee.search(\n",
|
||
" query_type=SearchType.SUMMARIES,\n",
|
||
" query_text=\"What is in the multimedia files?\",\n",
|
||
")\n",
|
||
"\n",
|
||
"# Display search results\n",
|
||
"for result_text in search_results:\n",
|
||
" print(result_text)"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'id': '3b530220-7e7c-52a2-8b62-ce5adce1a46c', 'created_at': 1751283883122, 'updated_at': 1751283883122, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"The joke queries the number of programmers required to change a light bulb and answers, 'None. That’s a hardware issue.' This humor highlights the divide between software and hardware challenges in programming.\"}\n",
|
||
"{'id': '128eb96e-fd36-53ef-ab6d-d4884ecbfee9', 'created_at': 1751283883122, 'updated_at': 1751283883122, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Changing a light bulb doesn't require programmers.\"}\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 5
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|