<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic <igorilic03@gmail.com>
171 lines
4.4 KiB
Text
171 lines
4.4 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Cognee GraphRAG with Multimedia files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"vscode": {
|
|
"languageId": "plaintext"
|
|
}
|
|
},
|
|
"source": [
|
|
"## Load Data\n",
|
|
"\n",
|
|
"We will use a few sample multimedia files which we have on GitHub for easy access."
|
|
]
|
|
},
|
|
{
|
|
"metadata": {},
|
|
"cell_type": "code",
|
|
"outputs": [],
|
|
"execution_count": null,
|
|
"source": [
|
|
"import os\n",
|
|
"import pathlib\n",
|
|
"\n",
|
|
"# cognee knowledge graph will be created based on the text\n",
|
|
"# and description of these files\n",
|
|
"mp3_file_path = os.path.join(\n",
|
|
" os.path.abspath(\"\"),\n",
|
|
" \"../\",\n",
|
|
" \"examples/data/multimedia/text_to_speech.mp3\",\n",
|
|
")\n",
|
|
"png_file_path = os.path.join(\n",
|
|
" os.path.abspath(\"\"),\n",
|
|
" \"../\",\n",
|
|
" \"examples/data/multimedia/example.png\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Set environment variables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"# Setting environment variables\n",
|
|
"if \"GRAPHISTRY_USERNAME\" not in os.environ:\n",
|
|
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
|
"\n",
|
|
"if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n",
|
|
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
|
"\n",
|
|
"if \"LLM_API_KEY\" not in os.environ:\n",
|
|
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
|
"\n",
|
|
"# \"neo4j\" or \"networkx\"\n",
|
|
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n",
|
|
"# Not needed if using networkx\n",
|
|
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
|
|
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
|
|
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
|
|
"\n",
|
|
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
|
|
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
|
|
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
|
|
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
|
|
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
|
|
"\n",
|
|
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
|
|
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
|
|
"\n",
|
|
"# Database name\n",
|
|
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
|
|
"\n",
|
|
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
|
|
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
|
|
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
|
|
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
|
|
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Run Cognee with multimedia files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import cognee\n",
|
|
"\n",
|
|
"# Create a clean slate for cognee -- reset data and system state\n",
|
|
"await cognee.prune.prune_data()\n",
|
|
"await cognee.prune.prune_system(metadata=True)\n",
|
|
"\n",
|
|
"# Add multimedia files and make them available for cognify\n",
|
|
"await cognee.add([mp3_file_path, png_file_path])\n",
|
|
"\n",
|
|
"# Create knowledge graph with cognee\n",
|
|
"await cognee.cognify()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Query Cognee for summaries related to multimedia files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from cognee.api.v1.search import SearchType\n",
|
|
"\n",
|
|
"# Query cognee for summaries of the data in the multimedia files\n",
|
|
"search_results = await cognee.search(\n",
|
|
" query_type=SearchType.SUMMARIES,\n",
|
|
" query_text=\"What is in the multimedia files?\",\n",
|
|
")\n",
|
|
"\n",
|
|
"# Display search results\n",
|
|
"for result_text in search_results:\n",
|
|
" print(result_text)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|