diff --git a/Demo_graph.ipynb b/Demo_graph.ipynb index d99d81349..5cea71c62 100644 --- a/Demo_graph.ipynb +++ b/Demo_graph.ipynb @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "id": "8a8942b5-91d6-4746-b35d-00f58bc16d7b", "metadata": {}, "outputs": [], @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 2, "id": "14484e25-fae8-4306-b03f-dae91fe5d0aa", "metadata": {}, "outputs": [], @@ -160,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "id": "50d5afda-418f-436b-b467-004863193d4a", "metadata": {}, "outputs": [], @@ -334,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 4, "id": "f97f11f1-4490-49ea-b193-1f858e72893b", "metadata": {}, "outputs": [], @@ -349,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 5, "id": "84da594a-459e-4ec5-9a5c-3a6cc3ab98af", "metadata": {}, "outputs": [], @@ -359,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 6, "id": "f56ae869-0dce-41f2-9db0-5f8d5eccba52", "metadata": {}, "outputs": [ @@ -367,7 +367,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'label': {'type': 'TEXT', 'subclass': []}}\n" + "{'label': {'type': 'TEXT', 'subclass': []}}\n" ] } ], @@ -377,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 7, "id": "6e64e72f-d18b-4d21-85d6-55ed3621124a", "metadata": {}, "outputs": [], @@ -388,7 +388,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 8, "id": "cdeb3631-fb55-4580-a5e5-d2a193a44e79", "metadata": {}, "outputs": [ @@ -432,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 9, "id": "fad0c4b0-cd61-4c3c-9964-47f019278060", "metadata": {}, "outputs": [], @@ -468,7 +468,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 10, "id": "709ec529-bb91-45cd-82cb-c122eb69fcd7", "metadata": {}, "outputs": [ @@ -477,10 +477,10 @@ "text/plain": [ "{'data_type': 'text',\n", " 'context_name': 'TEXT',\n", - " 'layer_name': 'News stories and blog posts'}" + " 'layer_name': 'Articles, essays, and reports'}" ] }, - "execution_count": 53, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -491,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 11, "id": "06b483bf-2fa0-414f-8253-27ffe9a2881c", "metadata": {}, "outputs": [], @@ -501,7 +501,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 12, "id": "35461aff-fd80-4eb2-94b2-66c742db8e55", "metadata": {}, "outputs": [], @@ -511,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 13, "id": "41d06ecb-83b9-4284-8d88-6a3f710cb457", "metadata": {}, "outputs": [ @@ -519,7 +519,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extracted Layer Names: ['Contextual Layer', 'Thematic Layer', 'Sentimental Layer', 'Structural Layer', 'Temporal Layer', 'Interactivity Layer', 'Semantic Layer', 'Network Layer']\n" + "Extracted Layer Names: ['Structural Analysis', 'Thematic Analysis', 'Semantic Analysis', 'Sentiment Analysis', 'Referential Analysis', 'Lexical Richness', 'Authorship Style']\n" ] } ], @@ -531,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 14, "id": "1a287a2a-2fb5-4ad3-a69e-80ed2e2ffa5a", "metadata": {}, "outputs": [ @@ -539,7 +539,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extracted Layer Names: ['Semantic Content Layer', 'Entity and Relationship Layer', 'Sentiment and Emotion Layer', 'Temporal Layer', 'Geospatial Layer', 'Topical and Thematic Layer']\n" + "Extracted Layer Names: ['Thematic Layer', 'Semantic Layer', 'Structural Layer', 'Entity Layer', 'Sentiment Layer', 'Temporal Layer', 'Source Layer']\n" ] } ], @@ -551,7 +551,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 15, "id": "609b1287-e0bf-42a5-856a-f2e0d859ea8b", "metadata": {}, "outputs": [], @@ -561,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 16, "id": "42dbf97d-79b9-4627-b307-b64ac22db4f7", "metadata": {}, "outputs": [ @@ -570,10 +570,10 @@ "text/plain": [ "{'data_type': 'text',\n", " 'context_name': 'TEXT',\n", - " 'layer_name': 'News stories and blog posts'}" + " 'layer_name': 'Articles, essays, and reports'}" ] }, - "execution_count": 59, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -584,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 17, "id": "25adeeb7-cce2-4eac-8fb5-4ff47029d77d", "metadata": {}, "outputs": [], @@ -616,7 +616,165 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 18, + "id": "2c49c020-8fd0-4c8b-ae33-a593e50b2a6f", + "metadata": {}, + "outputs": [], + "source": [ + "import networkx as nx\n", + "from pydantic import BaseModel\n", + "from typing import Optional, Any, List, Dict\n", + "from datetime import datetime\n", + "\n", + "# Models for representing different entities\n", + "class Relationship(BaseModel):\n", + " type: str\n", + " properties: Optional[Dict[str, Any]] = None\n", + "\n", + "class DocumentType(BaseModel):\n", + " type_id: str\n", + " description: str\n", + " default_relationship: Relationship = Relationship(type='is_type')\n", + "\n", + "class Category(BaseModel):\n", + " category_id: str\n", + " name: str\n", + " default_relationship: Relationship = Relationship(type='categorized_as')\n", + "\n", + "class Document(BaseModel):\n", + " doc_id: str\n", + " title: str\n", + " summary: Optional[str] = None\n", + " content_id: Optional[str] = None\n", + " doc_type: Optional[DocumentType] = None\n", + " categories: List[Category] = []\n", + " default_relationship: Relationship = Relationship(type='has_document')\n", + "\n", + "class UserLocation(BaseModel):\n", + " location_id: str\n", + " description: str\n", + " default_relationship: Relationship = Relationship(type='located_in')\n", + "\n", + "class UserProperties(BaseModel):\n", + " custom_properties: Optional[Dict[str, Any]] = None\n", + " location: Optional[UserLocation] = None\n", + "\n", + "class GraphModel(BaseModel):\n", + " id: str\n", + " user_properties: UserProperties = UserProperties()\n", + " documents: List[Document] = []\n", + " default_fields: Optional[Dict[str, Any]] = {}\n", + "\n", + "def generate_node_id(instance: BaseModel) -> str:\n", + " for field in ['id', 'doc_id', 'location_id', 'type_id']:\n", + " if hasattr(instance, field):\n", + " return f\"{instance.__class__.__name__}:{getattr(instance, field)}\"\n", + " return f\"{instance.__class__.__name__}:default\"\n", + "\n", + "def add_node_and_edge(G, parent_id: Optional[str], node_id: str, node_data: dict, relationship_data: dict):\n", + " G.add_node(node_id, **node_data) # Add the current node with its data\n", + " if parent_id:\n", + " # Add an edge between the parent node and the current node with the correct relationship data\n", + " G.add_edge(parent_id, node_id, **relationship_data)\n", + "\n", + "def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any):\n", + " if isinstance(value, BaseModel):\n", + " node_id = generate_node_id(value)\n", + " node_data = value.dict(exclude={'default_relationship'})\n", + " # Use the specified default relationship for the edge between the parent node and the current node\n", + " relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {}\n", + " add_node_and_edge(G, parent_id, node_id, node_data, relationship_data)\n", + "\n", + " # Recursively process nested attributes to ensure all nodes and relationships are added to the graph\n", + " for sub_attr, sub_val in value.__dict__.items(): # Access attributes and their values directly\n", + " process_attribute(G, node_id, sub_attr, sub_val)\n", + "\n", + " elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value):\n", + " # For lists of BaseModel instances, process each item in the list\n", + " for item in value:\n", + " process_attribute(G, parent_id, attribute, item)\n", + "\n", + "def create_dynamic(graph_model: BaseModel, existing_graph: Optional[nx.Graph] = None) -> nx.Graph:\n", + " G = existing_graph or nx.Graph()\n", + " root_id = generate_node_id(graph_model)\n", + " print(root_id)\n", + " G.add_node(root_id, **graph_model.dict(exclude={'default_relationship'}))\n", + "\n", + " for attribute_name, attribute_value in graph_model:\n", + " process_attribute(G, root_id, attribute_name, attribute_value)\n", + "\n", + " return G\n", + "\n", + "# Example usage with GraphModel instance\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "02dbe7f1-32cd-411a-8480-00f4fc342afc", + "metadata": {}, + "outputs": [], + "source": [ + "graph_model_instance = GraphModel(\n", + " id=\"user123\",\n", + " documents=[\n", + " Document(\n", + " doc_id=\"doc1\",\n", + " title=\"Document 1\",\n", + " summary=\"Summary of Document 1\",\n", + " content_id=\"content_id_for_doc1\", # Assuming external content storage ID\n", + " doc_type=DocumentType(type_id=\"PDF\", description=\"Portable Document Format\"),\n", + " categories=[\n", + " Category(category_id=\"finance\", name=\"Finance\", default_relationship=Relationship(type=\"belongs_to\")),\n", + " Category(category_id=\"tech\", name=\"Technology\", default_relationship=Relationship(type=\"belongs_to\"))\n", + " ],\n", + " default_relationship=Relationship(type='has_document')\n", + " ),\n", + " Document(\n", + " doc_id=\"doc2\",\n", + " title=\"Document 2\",\n", + " summary=\"Summary of Document 2\",\n", + " content_id=\"content_id_for_doc2\",\n", + " doc_type=DocumentType(type_id=\"TXT\", description=\"Text File\"),\n", + " categories=[\n", + " Category(category_id=\"health\", name=\"Health\", default_relationship=Relationship(type=\"belongs_to\")),\n", + " Category(category_id=\"wellness\", name=\"Wellness\", default_relationship=Relationship(type=\"belongs_to\"))\n", + " ],\n", + " default_relationship=Relationship(type='has_document')\n", + " )\n", + " ],\n", + " user_properties=UserProperties(\n", + " custom_properties={\"age\": \"30\"},\n", + " location=UserLocation(location_id=\"ny\", description=\"New York\", default_relationship=Relationship(type='located_in'))\n", + " ),\n", + " default_fields={\n", + " 'created_at': datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"),\n", + " 'updated_at': datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", + " }\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "062a317c-0dee-4ce9-959b-6f2ce50e652b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GraphModel:user123\n" + ] + } + ], + "source": [ + "R = create_dynamic(graph_model_instance)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, "id": "b59a52d7-d82b-4546-b0b1-a3d0f62a2a65", "metadata": {}, "outputs": [], @@ -626,24 +784,23 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 22, "id": "3e7b9fde-fcd5-4891-b43c-177d3877559d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['Contextual Layer',\n", - " 'Thematic Layer',\n", - " 'Sentimental Layer',\n", - " 'Structural Layer',\n", - " 'Temporal Layer',\n", - " 'Interactivity Layer',\n", - " 'Semantic Layer',\n", - " 'Network Layer']" + "['Structural Analysis',\n", + " 'Thematic Analysis',\n", + " 'Semantic Analysis',\n", + " 'Sentiment Analysis',\n", + " 'Referential Analysis',\n", + " 'Lexical Richness',\n", + " 'Authorship Style']" ] }, - "execution_count": 70, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -654,7 +811,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 23, "id": "f06edd84-c455-4034-a38b-3a7d2f746f42", "metadata": {}, "outputs": [], @@ -685,7 +842,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "a34971b4-d1fa-4db8-abbc-395bc70b0b49", "metadata": {}, "outputs": [], @@ -695,15 +852,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "de3bdbb7-0b2b-46fa-a42f-3ca288c4d875", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "layer_2_graph = await async_graph_per_layer(input_article_one, cognitive_layers_two)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "088e92a0-06e7-4128-b9b8-eacd9e735cb4", "metadata": {}, "outputs": [], @@ -714,118 +873,7 @@ }, { "cell_type": "code", - "execution_count": 54, - "id": "142f4bd8-ec50-4715-ba58-981bda65116c", - "metadata": {}, - "outputs": [], - "source": [ - " # print(\"Nodes and their data:\")\n", - " # for node, data in U.nodes(data=True):\n", - " # print(node, data)\n", - "\n", - " # # Print edges with their data\n", - " # print(\"\\nEdges and their data:\")\n", - " # for source, target, data in U.edges(data=True):\n", - " # print(f\"{source} -> {target} {data}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58644c64-7ef0-415f-8e41-e2edcf5fd15b", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c94056bd-8d32-48e2-9982-e9af06da3333", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd3f0e55-9f9d-4804-9ad6-31afd2088ab5", - "metadata": {}, - "outputs": [], - "source": [ - "# Example usage\n", - "# user_id = 'user123'\n", - "# custom_user_properties = {\n", - "# 'username': 'exampleUser',\n", - "# 'email': 'user@example.com'\n", - "# }\n", - "\n", - "# additional_categories = {\n", - "# \"Natural Language Text\": [\"Articles, essays, and reports\", \"Books and manuscripts\"]\n", - "# }\n", - "\n", - "# G = await create_semantic_graph(user_id, custom_user_properties, transformed_dict_1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cc7c3bb-7cc0-453b-beab-2983a703ccda", - "metadata": {}, - "outputs": [], - "source": [ - "# transformed_dict_1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3160a1d-a6ea-40ce-a521-37ad26d31ffb", - "metadata": {}, - "outputs": [], - "source": [ - "# print(\"Nodes in the graph:\")\n", - "# print(G.nodes(data=True))\n", - "# print(\"\\nEdges in the graph:\")\n", - "# print(G.edges(data=True))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b254dfc9-ce85-4175-9d1e-c0f1ede67e3b", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a77b4f24-3046-4ab6-9ba1-c802096498df", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4dab2ff0-0d12-4a00-a4e4-fb901e701bd3", - "metadata": {}, - "outputs": [], - "source": [ - "# B = create_user_content_graph(user_id, custom_user_properties, transformed_dict_2, existing_graph=G)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "627d42fd-d2ce-4ccd-a2a1-2f7ac2f463cf", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 73, + "execution_count": 27, "id": "512f15be-0114-4c8c-9754-e82f2fa16344", "metadata": {}, "outputs": [ @@ -833,7 +881,7 @@ "data": { "text/html": [ "\n", - "