* Update cognify and the networkx client to prepare for running in Neo4j * Fix for openai model * Add the fix to the infra so that the models can be passed to the library. Enable llm_provider to be passed. * Auto graph generation now works with neo4j * Added fixes for both neo4j and networkx * Explicitly name semantic node connections * Added updated docs, readme, chunkers and updates to cognify * Make docs build trigger only when changes on it happen * Update docs, test git actions * Separate cognify logic into tasks * Introduce dspy knowledge graph extraction --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
174 lines
5.8 KiB
Text
174 lines
5.8 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from dspy.datasets import HotPotQA\n",
|
|
"\n",
|
|
"hotpot_dataset = HotPotQA(train_seed = 1, eval_seed = 2023, dev_size = 20, keep_details = True)\n",
|
|
"example_data = hotpot_dataset.dev[0]\n",
|
|
"\n",
|
|
"context_text = \"\\n\\n\".join(\"\\n\".join(focused_context) for focused_context in example_data.context[\"sentences\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from cognee.modules.cognify.extract_categories import ExtractCategories\n",
|
|
"\n",
|
|
"extract_categories = ExtractCategories()\n",
|
|
"\n",
|
|
"categories = extract_categories(text = context_text)\n",
|
|
"print(categories)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from cognee.modules.cognify.extract_cognitive_layers import ExtractCognitiveLayers\n",
|
|
"from cognee.shared.data_models import TextContent\n",
|
|
"\n",
|
|
"extract_cognitive_layers = ExtractCognitiveLayers()\n",
|
|
"\n",
|
|
"category = categories[0].subclass[0].value\n",
|
|
"\n",
|
|
"cognitive_layers = extract_cognitive_layers(text = context_text, category = category)\n",
|
|
"\n",
|
|
"print(cognitive_layers)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import dspy\n",
|
|
"from uuid import uuid4\n",
|
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
|
"from cognee.modules.cognify.generate_knowledge_graph import GenerateKnowledgeGraph\n",
|
|
"from cognee.shared.data_models import Node, Edge\n",
|
|
"\n",
|
|
"dspy.configure(trace=[])\n",
|
|
"\n",
|
|
"generate_knowledge_graph = GenerateKnowledgeGraph().activate_assertions()\n",
|
|
"\n",
|
|
"graph_file_name = str(uuid4()) + \".pkl\"\n",
|
|
"\n",
|
|
"graph_client = get_graph_client(GraphDBType.NETWORKX, graph_file_name)\n",
|
|
"\n",
|
|
"graph = generate_knowledge_graph(layer = 'Transportation Infrastructure', text = context_text)\n",
|
|
"\n",
|
|
"root_node_per_category = {}\n",
|
|
"\n",
|
|
"for node in graph.nodes:\n",
|
|
" if node.entity_type is not None and node.entity_name is not None:\n",
|
|
" graph_client.add_node(node.id, entity_name = node.entity_name, entity_type = node.entity_type)\n",
|
|
"\n",
|
|
" if node.entity_type not in root_node_per_category:\n",
|
|
" root_node = Node(\n",
|
|
" id = node.entity_type + \" root\",\n",
|
|
" entity_name = node.entity_type,\n",
|
|
" entity_type = node.entity_type + \" root\"\n",
|
|
" )\n",
|
|
" root_node_per_category[node.entity_type] = root_node\n",
|
|
" graph_client.add_node(\n",
|
|
" id = root_node.id,\n",
|
|
" entity_name = root_node.entity_name,\n",
|
|
" entity_type = root_node.entity_type\n",
|
|
" )\n",
|
|
"\n",
|
|
" graph_client.add_edge(\n",
|
|
" node.id,\n",
|
|
" root_node_per_category[node.entity_type].id,\n",
|
|
" relationship_name = \"is\"\n",
|
|
" )\n",
|
|
"\n",
|
|
"for edge in graph.edges:\n",
|
|
" if edge.source_node_id is not None and edge.target_node_id is not None and edge.relationship_name is not None:\n",
|
|
" graph_client.add_edge(edge.source_node_id, edge.target_node_id, relationship_name = edge.relationship_name)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=842a911115124473bbf23f2769dc3e96&type=arrow&viztoken=65c1d750-91fa-4e42-8696-6e8e000c34ae&usertag=993172cb-pygraphistry-0.33.5&splashAfter=1712859766&info=true\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import networkx as nx\n",
|
|
"import pandas as pd\n",
|
|
"import graphistry\n",
|
|
"from cognee.config import Config\n",
|
|
"from cognee.utils import render_graph\n",
|
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
|
"\n",
|
|
"config = Config()\n",
|
|
"config.load()\n",
|
|
"\n",
|
|
"graphistry.register(\n",
|
|
" api = 3,\n",
|
|
" username = config.graphistry_username,\n",
|
|
" password = config.graphistry_password\n",
|
|
")\n",
|
|
"\n",
|
|
"graph_client = get_graph_client(GraphDBType.NETWORKX, \"32652370-04d9-418e-916d-3086aa41685c.pkl\")\n",
|
|
"graph = graph_client.graph\n",
|
|
"\n",
|
|
"edges = nx.to_pandas_edgelist(graph)\n",
|
|
"\n",
|
|
"nodes_data = [{\n",
|
|
" \"id\": node_id,\n",
|
|
" \"entity_name\": node[\"entity_name\"],\n",
|
|
" \"entity_type\": node[\"entity_type\"]\n",
|
|
"} for (node_id, node) in graph.nodes(data = True)]\n",
|
|
"\n",
|
|
"nodes = pd.DataFrame(nodes_data)\n",
|
|
"\n",
|
|
"plotter = graphistry.edges(edges, source = \"source\", destination = \"target\").nodes(nodes, \"id\")\n",
|
|
"\n",
|
|
"plotter.bind(edge_title = \"relationship_name\", edge_label = \"relationship_name\", point_title = \"entity_name\", point_label = \"entity_name\")\n",
|
|
"url = plotter.plot(render = False, as_files = True)\n",
|
|
"print(f\"Graph is visualized at: {url}\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|