cognee/notebooks/dspy.ipynb
Vasilije bb679c2dd7
Improve processing, update networkx client, and Neo4j, and dspy (#69)
* Update cognify and the networkx client to prepare for running in Neo4j

* Fix for openai model

* Add the fix to the infra so that the models can be passed to the library. Enable llm_provider to be passed.

* Auto graph generation now works with neo4j

* Added fixes for both neo4j and networkx

* Explicitly name semantic node connections

* Added updated docs, readme, chunkers and updates to cognify

* Make docs build trigger only when changes on it happen

* Update docs, test git actions

* Separate cognify logic into tasks

* Introduce dspy knowledge graph extraction

---------
Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
2024-04-20 19:05:40 +02:00

174 lines
5.8 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from dspy.datasets import HotPotQA\n",
"\n",
"hotpot_dataset = HotPotQA(train_seed = 1, eval_seed = 2023, dev_size = 20, keep_details = True)\n",
"example_data = hotpot_dataset.dev[0]\n",
"\n",
"context_text = \"\\n\\n\".join(\"\\n\".join(focused_context) for focused_context in example_data.context[\"sentences\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee.modules.cognify.extract_categories import ExtractCategories\n",
"\n",
"extract_categories = ExtractCategories()\n",
"\n",
"categories = extract_categories(text = context_text)\n",
"print(categories)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee.modules.cognify.extract_cognitive_layers import ExtractCognitiveLayers\n",
"from cognee.shared.data_models import TextContent\n",
"\n",
"extract_cognitive_layers = ExtractCognitiveLayers()\n",
"\n",
"category = categories[0].subclass[0].value\n",
"\n",
"cognitive_layers = extract_cognitive_layers(text = context_text, category = category)\n",
"\n",
"print(cognitive_layers)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import dspy\n",
"from uuid import uuid4\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"from cognee.modules.cognify.generate_knowledge_graph import GenerateKnowledgeGraph\n",
"from cognee.shared.data_models import Node, Edge\n",
"\n",
"dspy.configure(trace=[])\n",
"\n",
"generate_knowledge_graph = GenerateKnowledgeGraph().activate_assertions()\n",
"\n",
"graph_file_name = str(uuid4()) + \".pkl\"\n",
"\n",
"graph_client = get_graph_client(GraphDBType.NETWORKX, graph_file_name)\n",
"\n",
"graph = generate_knowledge_graph(layer = 'Transportation Infrastructure', text = context_text)\n",
"\n",
"root_node_per_category = {}\n",
"\n",
"for node in graph.nodes:\n",
" if node.entity_type is not None and node.entity_name is not None:\n",
" graph_client.add_node(node.id, entity_name = node.entity_name, entity_type = node.entity_type)\n",
"\n",
" if node.entity_type not in root_node_per_category:\n",
" root_node = Node(\n",
" id = node.entity_type + \" root\",\n",
" entity_name = node.entity_type,\n",
" entity_type = node.entity_type + \" root\"\n",
" )\n",
" root_node_per_category[node.entity_type] = root_node\n",
" graph_client.add_node(\n",
" id = root_node.id,\n",
" entity_name = root_node.entity_name,\n",
" entity_type = root_node.entity_type\n",
" )\n",
"\n",
" graph_client.add_edge(\n",
" node.id,\n",
" root_node_per_category[node.entity_type].id,\n",
" relationship_name = \"is\"\n",
" )\n",
"\n",
"for edge in graph.edges:\n",
" if edge.source_node_id is not None and edge.target_node_id is not None and edge.relationship_name is not None:\n",
" graph_client.add_edge(edge.source_node_id, edge.target_node_id, relationship_name = edge.relationship_name)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=842a911115124473bbf23f2769dc3e96&type=arrow&viztoken=65c1d750-91fa-4e42-8696-6e8e000c34ae&usertag=993172cb-pygraphistry-0.33.5&splashAfter=1712859766&info=true\n"
]
}
],
"source": [
"import networkx as nx\n",
"import pandas as pd\n",
"import graphistry\n",
"from cognee.config import Config\n",
"from cognee.utils import render_graph\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"\n",
"config = Config()\n",
"config.load()\n",
"\n",
"graphistry.register(\n",
" api = 3,\n",
" username = config.graphistry_username,\n",
" password = config.graphistry_password\n",
")\n",
"\n",
"graph_client = get_graph_client(GraphDBType.NETWORKX, \"32652370-04d9-418e-916d-3086aa41685c.pkl\")\n",
"graph = graph_client.graph\n",
"\n",
"edges = nx.to_pandas_edgelist(graph)\n",
"\n",
"nodes_data = [{\n",
" \"id\": node_id,\n",
" \"entity_name\": node[\"entity_name\"],\n",
" \"entity_type\": node[\"entity_type\"]\n",
"} for (node_id, node) in graph.nodes(data = True)]\n",
"\n",
"nodes = pd.DataFrame(nodes_data)\n",
"\n",
"plotter = graphistry.edges(edges, source = \"source\", destination = \"target\").nodes(nodes, \"id\")\n",
"\n",
"plotter.bind(edge_title = \"relationship_name\", edge_label = \"relationship_name\", point_title = \"entity_name\", point_label = \"entity_name\")\n",
"url = plotter.plot(render = False, as_files = True)\n",
"print(f\"Graph is visualized at: {url}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}