diff --git a/Demo_graph.ipynb b/Demo_graph.ipynb index 3cdd4fd5b..ca4148543 100644 --- a/Demo_graph.ipynb +++ b/Demo_graph.ipynb @@ -129,7 +129,13 @@ "aclient = instructor.patch(OpenAI())\n", "\n", "from typing import Optional, List\n", - "from pydantic import BaseModel, Field\n" + "from pydantic import BaseModel, Field\n", + "\n", + "from cognitive_architecture.modules.cognify.llm.classify_content import classify_into_categories\n", + "from cognitive_architecture.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers\n", + "from cognitive_architecture.modules.cognify.llm.content_to_propositions import generate_graph\n", + "from cognitive_architecture.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer\n", + "\n" ] }, { @@ -341,40 +347,40 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba", "metadata": {}, "outputs": [], - "source": [ - "import os" - ] + "source": [] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 4, "id": "f97f11f1-4490-49ea-b193-1f858e72893b", "metadata": {}, "outputs": [], "source": [ + "import os\n", "from cognitive_architecture.modules.cognify.llm.classify_content import classify_into_categories\n", "from cognitive_architecture.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers\n", "from cognitive_architecture.modules.cognify.llm.content_to_propositions import generate_graph\n", - "from cognitive_architecture.shared.data_models import ContentPrediction, KnowledgeGraph, CognitiveLayer" + "from cognitive_architecture.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer\n", + "# from cognitive_architecture.modules.cognify.graph import create_semantic_graph" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "84da594a-459e-4ec5-9a5c-3a6cc3ab98af", + "metadata": {}, + "outputs": [], + "source": [ + "required_layers_one = await classify_into_categories(input_article_one, \"classify_content.txt\", DefaultContentPrediction)" ] }, { "cell_type": "code", "execution_count": 6, - "id": "84da594a-459e-4ec5-9a5c-3a6cc3ab98af", - "metadata": {}, - "outputs": [], - "source": [ - "required_layers_one = await classify_into_categories(input_article_one, \"classify_content.txt\", ContentPrediction)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, "id": "f56ae869-0dce-41f2-9db0-5f8d5eccba52", "metadata": {}, "outputs": [ @@ -392,18 +398,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "6e64e72f-d18b-4d21-85d6-55ed3621124a", "metadata": {}, "outputs": [], "source": [ "#note that you can provide your own Pydantic model that would represent your own categorisation\n", - "required_layers_two = await classify_into_categories(input_article_two, \"classify_content.txt\", ContentPrediction)" + "required_layers_two = await classify_into_categories(input_article_two, \"classify_content.txt\", DefaultContentPrediction)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "cdeb3631-fb55-4580-a5e5-d2a193a44e79", "metadata": {}, "outputs": [ @@ -411,7 +417,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'label': {'type': 'TEXT', 'subclass': []}}\n" + "{'label': {'type': 'TEXT', 'subclass': []}}\n" ] } ], @@ -439,7 +445,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, + "id": "1c04c116-8f2e-4957-887a-aaf71874e8c0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "fad0c4b0-cd61-4c3c-9964-47f019278060", "metadata": {}, "outputs": [], @@ -448,6 +462,7 @@ " # Extract the first subclass from the list (assuming there could be more)\n", " subclass_enum = original['label']['subclass'][0]\n", "\n", + "\n", " # The data type is derived from 'type' and converted to lowercase\n", " data_type = original['label']['type'].lower()\n", " \n", @@ -460,8 +475,8 @@ " # Construct the new dictionary\n", " new_dict = {\n", " 'data_type': data_type,\n", - " 'context_name': context_name,\n", - " 'layer_name': layer_name\n", + " 'context_name': data_type.upper(), #llm context classification\n", + " 'layer_name': layer_name #llm layer classification\n", " }\n", "\n", " return new_dict\n", @@ -474,69 +489,50 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "06b483bf-2fa0-414f-8253-27ffe9a2881c", + "execution_count": 10, + "id": "709ec529-bb91-45cd-82cb-c122eb69fcd7", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "You are tasked with analyzing `text` files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction. Various layers can be incorporated to capture the depth and breadth of information contained within the text.\n", - "\n", - "These layers can help in understanding the content, context, and characteristics of the `text`.\n", - "\n", - "Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.\n", - "\n", - "Approach this task by considering the unique characteristics and inherent properties of the data at hand.\n", - "\n", - "VERY IMPORTANT: The context you are working in is `Articles, essays, and reports` and the specific domain you are extracting data on is `Articles, essays, and reports`.\n", - "\n", - "Guidelines for Layer Extraction:\n", - "Take into account: The content type, in this case, is: `Articles, essays, and reports`, should play a major role in how you decompose into layers.\n", - "\n", - "Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.\n" - ] + "data": { + "text/plain": [ + "{'data_type': 'text',\n", + " 'context_name': 'TEXT',\n", + " 'layer_name': 'Articles, essays, and reports'}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "cognitive_layers_one = await content_to_cog_layers(\"generate_cog_layers.txt\", transformed_dict_1, response_model=CognitiveLayer)" + "transformed_dict_1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "06b483bf-2fa0-414f-8253-27ffe9a2881c", + "metadata": {}, + "outputs": [], + "source": [ + "cognitive_layers_one = await content_to_cog_layers(\"generate_cog_layers.txt\", transformed_dict_1, response_model=DefaultCognitiveLayer)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "35461aff-fd80-4eb2-94b2-66c742db8e55", + "metadata": {}, + "outputs": [], + "source": [ + "cognitive_layers_two = await content_to_cog_layers(\"generate_cog_layers.txt\", transformed_dict_2, response_model=DefaultCognitiveLayer)" ] }, { "cell_type": "code", "execution_count": 13, - "id": "35461aff-fd80-4eb2-94b2-66c742db8e55", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "You are tasked with analyzing `text` files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction. Various layers can be incorporated to capture the depth and breadth of information contained within the text.\n", - "\n", - "These layers can help in understanding the content, context, and characteristics of the `text`.\n", - "\n", - "Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.\n", - "\n", - "Approach this task by considering the unique characteristics and inherent properties of the data at hand.\n", - "\n", - "VERY IMPORTANT: The context you are working in is `Personal narratives and stories` and the specific domain you are extracting data on is `Personal narratives and stories`.\n", - "\n", - "Guidelines for Layer Extraction:\n", - "Take into account: The content type, in this case, is: `Personal narratives and stories`, should play a major role in how you decompose into layers.\n", - "\n", - "Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.\n" - ] - } - ], - "source": [ - "cognitive_layers_two = await content_to_cog_layers(\"generate_cog_layers.txt\", transformed_dict_2, response_model=CognitiveLayer)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, "id": "41d06ecb-83b9-4284-8d88-6a3f710cb457", "metadata": {}, "outputs": [ @@ -544,7 +540,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extracted Layer Names: ['Structural Layer', 'Semantic Layer', 'Referential Layer', 'Lexical Layer', 'Narrative Layer', 'Contextual Layer', 'Intertextual Layer', 'Visual Layer', 'Interactional Layer', 'Statistical Layer']\n" + "Extracted Layer Names: ['Semantic Layer', 'Syntactic Layer', 'Referential Layer', 'Contextual Layer', 'Lexical Layer', 'Narrative Layer', 'Discourse Layer', 'Pragmatic Layer', 'Stylistic Layer']\n" ] } ], @@ -556,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "1a287a2a-2fb5-4ad3-a69e-80ed2e2ffa5a", "metadata": {}, "outputs": [ @@ -564,7 +560,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extracted Layer Names: ['Narrative Structure', 'Character Identification', 'Emotional Trajectories', 'Thematic Elements', 'Cultural and Social Context', 'Temporal Aspects', 'Language and Style', 'Interpersonal Dynamics', 'Symbolism and Metaphors', 'Intertextuality']\n" + "Extracted Layer Names: ['Thematic Layer', 'Narrative Layer', 'Sentiment Layer', 'Semantic Layer', 'Temporal Layer', 'Geographic Layer', 'Source Credibility Layer', 'Interaction Layer', 'Multimedia Layer', 'Intertextuality Layer']\n" ] } ], @@ -576,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "609b1287-e0bf-42a5-856a-f2e0d859ea8b", "metadata": {}, "outputs": [], @@ -586,737 +582,89 @@ }, { "cell_type": "code", - "execution_count": 28, - "id": "f06edd84-c455-4034-a38b-3a7d2f746f42", + "execution_count": null, + "id": "42dbf97d-79b9-4627-b307-b64ac22db4f7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "25adeeb7-cce2-4eac-8fb5-4ff47029d77d", + "metadata": {}, + "outputs": [], + "source": [ + "def add_classification_nodes(G, id, classification_data):\n", + "\n", + " context = classification_data['context_name']\n", + " layer = classification_data['layer_name']\n", + "\n", + " # Create the layer classification node ID using the context_name\n", + " layer_classification_node_id = f'LLM_LAYER_CLASSIFICATION:{context}:{id}'\n", + "\n", + " # Add the node to the graph, unpacking the node data from the dictionary\n", + " G.add_node(layer_classification_node_id, **classification_data)\n", + " \n", + " # Link this node to the corresponding document node\n", + " G.add_edge(id, layer_classification_node_id, relationship='classified_as')\n", + "\n", + " # Create the detailed classification node ID using the context_name\n", + " detailed_classification_node_id = f'LLM_CLASSIFICATION:LAYER:{layer}:{id}'\n", + "\n", + " # Add the detailed classification node, reusing the same node data\n", + " G.add_node(detailed_classification_node_id, **classification_data)\n", + " \n", + " # Link the detailed classification node to the layer classification node\n", + " G.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship='contains_analysis')\n", + " return G" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "id": "b59a52d7-d82b-4546-b0b1-a3d0f62a2a65", + "metadata": {}, + "outputs": [], + "source": [ + "U =add_classification_nodes(R, \"Document:doc1\",transformed_dict_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "3e7b9fde-fcd5-4891-b43c-177d3877559d", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Structural Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Semantic Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Referential Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Lexical Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Narrative Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Contextual Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Intertextual Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Visual Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Interactional Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Statistical Layer`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Narrative Structure`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Character Identification`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Emotional Trajectories`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Thematic Elements`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Cultural and Social Context`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Temporal Aspects`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Language and Style`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Interpersonal Dynamics`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Symbolism and Metaphors`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n", - "You are a top-tier algorithm\n", - "designed for extracting information in structured formats to build a knowledge graph.\n", - "- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n", - "- **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n", - "- The aim is to achieve simplicity and clarity in the\n", - "knowledge graph, making it accessible for a vast audience.\n", - "YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `Intertextuality`\n", - "## 2. Labeling Nodes\n", - "- **Consistency**: Ensure you use basic or elementary types for node labels.\n", - " - For example, when you identify an entity representing a person,\n", - " always label it as **\"person\"**.\n", - " Avoid using more specific terms like \"mathematician\" or \"scientist\".\n", - " - Include event, entity, time, or action nodes to the category.\n", - " - Classify the memory type as episodic or semantic.\n", - "- **Node IDs**: Never utilize integers as node IDs.\n", - " Node IDs should be names or human-readable identifiers found in the text.\n", - "## 3. Handling Numerical Data and Dates\n", - "- Numerical data, like age or other related information,\n", - "should be incorporated as attributes or properties of the respective nodes.\n", - "- **No Separate Nodes for Dates/Numbers**:\n", - "Do not create separate nodes for dates or numerical values.\n", - " Always attach them as attributes or properties of nodes.\n", - "- **Property Format**: Properties must be in a key-value format.\n", - "- **Quotation Marks**: Never use escaped single or double quotes within property values.\n", - "- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.\n", - "## 4. Coreference Resolution\n", - "- **Maintain Entity Consistency**:\n", - "When extracting entities, it's vital to ensure consistency.\n", - "If an entity, such as \"John Doe\", is mentioned multiple times\n", - "in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n", - "always use the most complete identifier for that entity throughout the knowledge graph.\n", - " In this example, use \"John Doe\" as the entity ID.\n", - "Remember, the knowledge graph should be coherent and easily understandable,\n", - " so maintaining consistency in entity references is crucial.\n", - "## 5. Strict Compliance\n", - "Adhere to the rules strictly. Non-compliance will result in termination\"\"\"\n" - ] + "data": { + "text/plain": [ + "['Structural Layer',\n", + " 'Semantic Layer',\n", + " 'Syntactic Layer',\n", + " 'Discourse Layer',\n", + " 'Pragmatic Layer',\n", + " 'Stylistic Layer',\n", + " 'Referential Layer',\n", + " 'Citation Layer',\n", + " 'Metadata Layer']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" } ], + "source": [ + "cognitive_layers_one" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f06edd84-c455-4034-a38b-3a7d2f746f42", + "metadata": {}, + "outputs": [], "source": [ "import nest_asyncio\n", "nest_asyncio.apply()\n", @@ -1344,154 +692,133 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4a19cc82-b892-47f3-99db-b70edccefda5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15dc7863-0f4c-47ae-89ef-2656e8478249", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04e24001-a42a-4f18-adc1-7a4a926515c4", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "58644c64-7ef0-415f-8e41-e2edcf5fd15b", + "execution_count": 30, + "id": "a34971b4-d1fa-4db8-abbc-395bc70b0b49", "metadata": {}, "outputs": [], "source": [ - "# import networkx as nx\n", - "# import uuid\n", - "# from datetime import datetime\n", - "\n", - "# def create_user_content_graph(user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):\n", - "\n", - "# category_name = required_layers.dict()['name']\n", - "# subgroup_names = [subgroup['name'] for subgroup in required_layers.dict()['cognitive_subgroups']]\n", - "\n", - " \n", - "# # Construct the additional_categories structure\n", - "# additional_categories = {\n", - "# category_name: subgroup_names\n", - "# }\n", - "\n", - "# # Define default fields for all nodes if not provided\n", - "# if default_fields is None:\n", - "# default_fields = {\n", - "# 'created_at': datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"),\n", - "# 'updated_at': datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", - "# }\n", - "\n", - "# # Merge custom user properties with default properties; custom properties take precedence\n", - "# user_properties = {**default_fields, **(custom_user_properties or {})}\n", - "\n", - "# # Default content categories\n", - "# content_categories = {\n", - "# \"Temporal\": [\"Historical events\", \"Schedules and timelines\"],\n", - "# \"Positional\": [\"Geographical locations\", \"Spatial data\"],\n", - "# \"Propositions\": [\"Hypotheses and theories\", \"Claims and arguments\"],\n", - "# \"Personalization\": [\"User preferences\", \"User information\"]\n", - "# }\n", - "\n", - "# # Update content categories with any additional categories provided\n", - "# if additional_categories:\n", - "# content_categories.update(additional_categories)\n", - "\n", - "# G = existing_graph if existing_graph else nx.MultiDiGraph()\n", - "\n", - "# # Check if the user node already exists, if not, add the user node with properties\n", - "# if not G.has_node(user_id):\n", - "# G.add_node(user_id, **user_properties)\n", - "\n", - "# # Add or update content category nodes and their edges\n", - "# for category, subclasses in content_categories.items():\n", - "# category_properties = {**default_fields, 'type': 'category'}\n", - "\n", - "# # Add or update the category node\n", - "# if not G.has_node(category):\n", - "# G.add_node(category, **category_properties)\n", - "# G.add_edge(user_id, category, relationship='created')\n", - "\n", - "# # Add or update subclass nodes and their edges\n", - "# for subclass in subclasses:\n", - "# # Using both category and subclass names to ensure uniqueness within categories\n", - "# subclass_node_id = f\"{category}:{subclass}\"\n", - "\n", - "# # Check if subclass node exists before adding, based on node content\n", - "# if not any(subclass == data.get('content') for _, data in G.nodes(data=True)):\n", - "# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}\n", - "# G.add_node(subclass_node_id, **subclass_properties)\n", - "# G.add_edge(category, subclass_node_id, relationship='includes')\n", - "\n", - "# return G\n", - "\n", - "# # # Add content category nodes and their edges\n", - "# # for category, subclasses in content_categories.items():\n", - "# # category_properties = {**default_fields, 'type': 'category'}\n", - "# # G.add_node(category, **category_properties)\n", - "# # G.add_edge(user_id, category, relationship='created')\n", - "\n", - "# # # Add subclass nodes and their edges\n", - "# # for subclass in subclasses:\n", - "# # unique_id = str(uuid.uuid4())\n", - "# # subclass_node_id = f\"{subclass} - {unique_id}\"\n", - "# # subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}\n", - "# # G.add_node(subclass_node_id, **subclass_properties)\n", - "# # G.add_edge(category, subclass_node_id, relationship='includes')\n", - "\n", - "# # return G\n", - "\n" + "import ast \n" ] }, { "cell_type": "code", - "execution_count": 264, - "id": "dd3f0e55-9f9d-4804-9ad6-31afd2088ab5", - "metadata": {}, - "outputs": [], - "source": [ - "# G = None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cc7c3bb-7cc0-453b-beab-2983a703ccda", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "199ef3ab-5e73-40d2-b531-6a402edf3f17", + "execution_count": 31, + "id": "088e92a0-06e7-4128-b9b8-eacd9e735cb4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Nodes in the graph:\n", - "[('user123', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'username': 'exampleUser', 'email': 'user@example.com'}), ('Temporal', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'category'}), ('Temporal:Historical events', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Historical events'}), ('Temporal:Schedules and timelines', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Schedules and timelines'}), ('Positional', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'category'}), ('Positional:Geographical locations', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Geographical locations'}), ('Positional:Spatial data', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Spatial data'}), ('Propositions', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'category'}), ('Propositions:Hypotheses and theories', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Hypotheses and theories'}), ('Propositions:Claims and arguments', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Claims and arguments'}), ('Personalization', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'category'}), ('Personalization:User preferences', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'User preferences'}), ('Personalization:User information', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'User information'}), ('Natural Language Text', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'category'}), ('Natural Language Text:Articles, essays, and reports', {'created_at': '2024-03-04 20:16:14', 'updated_at': '2024-03-04 20:16:14', 'type': 'subclass', 'content': 'Articles, essays, and reports'})]\n", - "\n", - "Edges in the graph:\n", - "[('user123', 'Temporal', {'relationship': 'created'}), ('user123', 'Positional', {'relationship': 'created'}), ('user123', 'Propositions', {'relationship': 'created'}), ('user123', 'Personalization', {'relationship': 'created'}), ('user123', 'Natural Language Text', {'relationship': 'created'}), ('Temporal', 'Temporal:Historical events', {'relationship': 'includes'}), ('Temporal', 'Temporal:Schedules and timelines', {'relationship': 'includes'}), ('Positional', 'Positional:Geographical locations', {'relationship': 'includes'}), ('Positional', 'Positional:Spatial data', {'relationship': 'includes'}), ('Propositions', 'Propositions:Hypotheses and theories', {'relationship': 'includes'}), ('Propositions', 'Propositions:Claims and arguments', {'relationship': 'includes'}), ('Personalization', 'Personalization:User preferences', {'relationship': 'includes'}), ('Personalization', 'Personalization:User information', {'relationship': 'includes'}), ('Natural Language Text', 'Natural Language Text:Articles, essays, and reports', {'relationship': 'includes'})]\n" + "Semantic Layer\n" ] } ], + "source": [ + "for n,y in layer_1_graph[0].items():\n", + " print(ast.literal_eval(n)['layer'])" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "id": "142f4bd8-ec50-4715-ba58-981bda65116c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nodes and their data:\n", + "GraphModel:user123 {'id': 'user123', 'user_properties': {'custom_properties': {'age': '30'}, 'location': {'location_id': 'ny', 'description': 'New York', 'default_relationship': {'type': 'located_in', 'properties': None}}}, 'documents': [{'doc_id': 'doc1', 'title': 'Document 1', 'summary': 'Summary of Document 1', 'content_id': 'content_id_for_doc1', 'doc_type': {'type_id': 'PDF', 'description': 'Portable Document Format', 'default_relationship': {'type': 'is_type', 'properties': None}}, 'categories': [{'category_id': 'finance', 'name': 'Finance', 'default_relationship': {'type': 'belongs_to', 'properties': None}}, {'category_id': 'tech', 'name': 'Technology', 'default_relationship': {'type': 'belongs_to', 'properties': None}}], 'default_relationship': {'type': 'has_document', 'properties': None}}, {'doc_id': 'doc2', 'title': 'Document 2', 'summary': 'Summary of Document 2', 'content_id': 'content_id_for_doc2', 'doc_type': {'type_id': 'TXT', 'description': 'Text File', 'default_relationship': {'type': 'is_type', 'properties': None}}, 'categories': [{'category_id': 'health', 'name': 'Health', 'default_relationship': {'type': 'belongs_to', 'properties': None}}, {'category_id': 'wellness', 'name': 'Wellness', 'default_relationship': {'type': 'belongs_to', 'properties': None}}], 'default_relationship': {'type': 'has_document', 'properties': None}}], 'default_fields': {'created_at': '2024-03-09 12:30:06', 'updated_at': '2024-03-09 12:30:06'}}\n", + "UserProperties:default {'custom_properties': {'age': '30'}, 'location': {'location_id': 'ny', 'description': 'New York', 'default_relationship': {'type': 'located_in', 'properties': None}}}\n", + "UserLocation:ny {'location_id': 'ny', 'description': 'New York'}\n", + "Relationship:default {'type': 'has_document', 'properties': None}\n", + "Document:doc1 {'doc_id': 'doc1', 'title': 'Document 1', 'summary': 'Summary of Document 1', 'content_id': 'content_id_for_doc1', 'doc_type': {'type_id': 'PDF', 'description': 'Portable Document Format', 'default_relationship': {'type': 'is_type', 'properties': None}}, 'categories': [{'category_id': 'finance', 'name': 'Finance', 'default_relationship': {'type': 'belongs_to', 'properties': None}}, {'category_id': 'tech', 'name': 'Technology', 'default_relationship': {'type': 'belongs_to', 'properties': None}}]}\n", + "DocumentType:PDF {'type_id': 'PDF', 'description': 'Portable Document Format'}\n", + "Category:default {'category_id': 'wellness', 'name': 'Wellness'}\n", + "Document:doc2 {'doc_id': 'doc2', 'title': 'Document 2', 'summary': 'Summary of Document 2', 'content_id': 'content_id_for_doc2', 'doc_type': {'type_id': 'TXT', 'description': 'Text File', 'default_relationship': {'type': 'is_type', 'properties': None}}, 'categories': [{'category_id': 'health', 'name': 'Health', 'default_relationship': {'type': 'belongs_to', 'properties': None}}, {'category_id': 'wellness', 'name': 'Wellness', 'default_relationship': {'type': 'belongs_to', 'properties': None}}]}\n", + "DocumentType:TXT {'type_id': 'TXT', 'description': 'Text File'}\n", + "LLM_LAYER_CLASSIFICATION:TEXT:123 {'data_type': 'text', 'context_name': 'TEXT', 'layer_name': 'News stories and blog posts'}\n", + "123 {}\n", + "LLM_CLASSIFICATION:LAYER:News stories and blog posts:123 {'data_type': 'text', 'context_name': 'TEXT', 'layer_name': 'News stories and blog posts'}\n", + "LLM_LAYER_CLASSIFICATION:TEXT:doc1 {'data_type': 'text', 'context_name': 'TEXT', 'layer_name': 'News stories and blog posts'}\n", + "doc1 {}\n", + "LLM_CLASSIFICATION:LAYER:News stories and blog posts:doc1 {'data_type': 'text', 'context_name': 'TEXT', 'layer_name': 'News stories and blog posts'}\n", + "\n", + "Edges and their data:\n", + "GraphModel:user123 -> UserProperties:default {}\n", + "GraphModel:user123 -> Document:doc1 {'type': 'has_document', 'properties': None}\n", + "GraphModel:user123 -> Document:doc2 {'type': 'has_document', 'properties': None}\n", + "UserProperties:default -> UserLocation:ny {'type': 'located_in', 'properties': None}\n", + "UserLocation:ny -> Relationship:default {}\n", + "Relationship:default -> DocumentType:PDF {}\n", + "Relationship:default -> Category:default {}\n", + "Relationship:default -> Document:doc1 {}\n", + "Relationship:default -> DocumentType:TXT {}\n", + "Relationship:default -> Document:doc2 {}\n", + "Document:doc1 -> DocumentType:PDF {'type': 'is_type', 'properties': None}\n", + "Document:doc1 -> Category:default {'type': 'belongs_to', 'properties': None}\n", + "Category:default -> Document:doc2 {'type': 'belongs_to', 'properties': None}\n", + "Document:doc2 -> DocumentType:TXT {'type': 'is_type', 'properties': None}\n", + "LLM_LAYER_CLASSIFICATION:TEXT:123 -> 123 {'relationship': 'classified_as'}\n", + "LLM_LAYER_CLASSIFICATION:TEXT:123 -> LLM_CLASSIFICATION:LAYER:News stories and blog posts:123 {'relationship': 'contains_analysis'}\n", + "LLM_LAYER_CLASSIFICATION:TEXT:doc1 -> doc1 {'relationship': 'classified_as'}\n", + "LLM_LAYER_CLASSIFICATION:TEXT:doc1 -> LLM_CLASSIFICATION:LAYER:News stories and blog posts:doc1 {'relationship': 'contains_analysis'}\n" + ] + } + ], + "source": [ + " print(\"Nodes and their data:\")\n", + " for node, data in U.nodes(data=True):\n", + " print(node, data)\n", + "\n", + " # Print edges with their data\n", + " print(\"\\nEdges and their data:\")\n", + " for source, target, data in U.edges(data=True):\n", + " print(f\"{source} -> {target} {data}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "58644c64-7ef0-415f-8e41-e2edcf5fd15b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[KnowledgeGraph(nodes=[Node(id=1, description='British society', category='society', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='Animals', category='entity', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description='Pets', category='entity', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=4, description='Kate Fox', category='person', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=5, description='Dogs', category='animal', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=6, description='Public transport', category='transport', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=7, description='Dog owners', category='people', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=8, description='Pandemic', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=9, description='Increase in number of pet dogs in the UK from about 9 million to 13 million between 2019 and 2022', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=10, description='Rise in number of dog attacks', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=11, description='Designer dog breeds in fashion', category='trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=12, description='Pets treated as substitutes for children', category='trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=13, description='Total spend on pets in the UK has more than doubled in the past decade, reaching nearly £10bn last year', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=14, description='Pet boutiques selling luxury pet products', category='business', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=15, description='Basic needs and desires of pets', category='concept', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=16, description='Dog-friendly establishments like restaurants, cinemas, churches', category='places', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=17, description='Behavioral problems in dogs', category='condition', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=18, description='Wellbeing of dogs', category='concept', color='blue', memory_type='semantic', created_at=None, summarized=None)], edges=[Edge(source=1, target=2, description=\"British society's fondness for animals\", color='blue', created_at=None, summarized=None), Edge(source=6, target=5, description='Dogs are welcomed on public transport in the UK', color='blue', created_at=None, summarized=None), Edge(source=3, target=4, description='Kate Fox observed pet keeping as a way of life in British society', color='blue', created_at=None, summarized=None), Edge(source=7, target=8, description='Dog ownership increased during the pandemic', color='blue', created_at=None, summarized=None), Edge(source=9, target=1, description='British society saw an increase in the number of pet dogs', color='blue', created_at=None, summarized=None), Edge(source=10, target=6, description='Increase in dog attacks may be correlated with dogs in public spaces', color='blue', created_at=None, summarized=None), Edge(source=12, target=1, description='Shift in British society to treat pets similarly to children', color='blue', created_at=None, summarized=None), Edge(source=13, target=7, description='Dog owners spending more on pets', color='blue', created_at=None, summarized=None), Edge(source=14, target=16, description='Pet boutiques and dog-friendly establishments part of the same trend', color='blue', created_at=None, summarized=None), Edge(source=17, target=18, description='Behavioral problems can affect dog wellbeing', color='blue', created_at=None, summarized=None), Edge(source=1, target=11, description=\"British society's interest in designer dog breeds\", color='blue', created_at=None, summarized=None)]),\n", + " KnowledgeGraph(nodes=[Node(id=1, description='Britons show a strong affection for animals, viewing pet-keeping as a way of life.', category='people', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='In British culture, pets are an outlet for emotions and serve as connectors with others.', category='concept', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description='Dogs in the UK are seen as emotional outlets and receive welcoming treatment in public spaces.', category='animals', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=4, description='In the UK, dogs are encouraged to accompany owners on public transport.', category='entity', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=5, description='The COVID-19 pandemic led to a surge in dog ownership in the UK.', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=6, description='Pet dog population in the UK rose from about 9 million to 13 million between 2019 and 2022.', category='data', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=7, description='Green spaces have been declining, impacting dog-friendly areas.', category='entity', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=8, description='Dog attacks in England and Wales increased by over a third between 2018 and 2022.', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=9, description='Designer dog breeds in the UK often face health issues due to their physical features.', category='concept', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=10, description='A portion of pet owners in the UK enjoy keeping up with pet trends and products.', category='concept', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=11, description='Expenditure on pets in the UK has doubled in the past decade, with non-essential items becoming popular.', category='data', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=12, description='The health and wellbeing of dogs are often compromised due to human lifestyle and consumerism.', category='concept', color='blue', memory_type='semantic', created_at=None, summarized=None)], edges=[Edge(source=1, target=2, description='have a strong affection for', color='blue', created_at=None, summarized=None), Edge(source=2, target=3, description='are particularly significant as', color='blue', created_at=None, summarized=None), Edge(source=3, target=4, description='are encouraged on', color='blue', created_at=None, summarized=None), Edge(source=5, target=6, description='caused an increase in', color='blue', created_at=None, summarized=None), Edge(source=7, target=6, description='are contrasted with the rise in', color='blue', created_at=None, summarized=None), Edge(source=8, target=7, description='increase may be affected by the decline in', color='blue', created_at=None, summarized=None), Edge(source=9, target=10, description='are part of', color='blue', created_at=None, summarized=None), Edge(source=10, target=11, description='contribute to', color='blue', created_at=None, summarized=None), Edge(source=12, target=9, description='is impacted by the promotion of', color='blue', created_at=None, summarized=None)]),\n", + " KnowledgeGraph(nodes=[Node(id=1, description='Britons have a special relationship with animals, considering them an integral part of their lifestyle and an outlet for emotions and social engagement.', category='cultural practice', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='In the UK, dogs are allowed and encouraged to ride on public transport.', category='policy', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description='Many UK establishments are pet-friendly, often displaying signs that humorously prioritize dogs over people.', category='social norm', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=4, description='The number of pet dogs in the UK surged from about nine million to 13 million between 2019 and 2022, partly influenced by the COVID-19 pandemic.', category='event', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=5, description=\"The Dogs Trust charity coined the slogan 'A dog is for life, not just for Christmas' to combat impulsive pet adoption.\", category='campaign', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=6, description='Dog attacks recorded by police in England and Wales have risen by more than a third from 2018 to 2022.', category='statistic', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=7, description=\"Living beings such as pets are increasingly being treated as commodities, evident in the popularity of 'designer' breeds and the aesthetic-driven breeding choices.\", category='trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=8, description='The total spending on pets in the UK has surpassed nearly £10bn, with a significant part going to non-essential items and services.', category='economic statistic', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=9, description='Modern lifestyles involve leaving dogs alone for extended periods, which can lead to behavioral problems.', category='social issue', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=10, description='There is a shift in pet parenting, with pets increasingly treated as child substitutes rather than valued for the qualities inherent to their species.', category='social change', color='blue', memory_type='semantic', created_at=None, summarized=None)], edges=[Edge(source=1, target=2, description='The relationship Britons have with animals extends to policies such as allowing dogs on public transport.', color='blue', created_at=None, summarized=None), Edge(source=2, target=3, description='The pet-friendly nature of public transport in the UK is echoed in the pet-friendly approach of many establishments.', color='blue', created_at=None, summarized=None), Edge(source=4, target=5, description=\"The increase in pet adoption during the pandemic highlights the relevance of Dogs Trust's long-standing slogan.\", color='blue', created_at=None, summarized=None), Edge(source=4, target=6, description='The surge in pet dog numbers during the pandemic correlates with an increase in dog attacks.', color='blue', created_at=None, summarized=None), Edge(source=7, target=8, description='The commodification of pets is mirrored in the increased expenditure on them.', color='blue', created_at=None, summarized=None), Edge(source=9, target=6, description='Behavioral problems due to modern lifestyle impacts on dogs may contribute to the rise in dog attacks.', color='blue', created_at=None, summarized=None), Edge(source=10, target=8, description='The shift in pet parenting attitudes towards treating pets as child substitutes drives up pet-related spending.', color='blue', created_at=None, summarized=None)]),\n", + " KnowledgeGraph(nodes=[Node(id=1, description='Britons have a notable fondness for keeping pets, particularly dogs which serve as outlets for affection and social interaction.', category='cultural behavior', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='The UK is accommodating to dogs in public spaces, with establishments often welcoming dogs and integrating them into social norms.', category='cultural practice', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description='Pet ownership surged during the COVID-19 pandemic, with the number of pet dogs in the UK rising significantly.', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=4, description='The Dogs Trust charity has long advocated for the lifelong commitment to pets, emphasizing that dogs are not just for temporary companionship.', category='organization message', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=5, description='The increase in dog ownership has led to various consequences, including a rise in dog attacks and dogs being listed for rehoming after the pandemic.', category='societal issue', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=6, description='Pets are increasingly treated as commodities or status symbols, leading to ethical concerns and health issues for certain breeds.', category='ethical concern', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=7, description='The humanization of pets is on the rise, with dogs being treated more like human children and less like animals with their own specific needs.', category='behavioral trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=8, description='Consumerism related to pets has grown drastically in the UK, with a significant amount of spending on non-essential pet products and services.', category='economic trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=9, description='There is a disconnect between how people treat their pets and what pets actually need, manifesting in overlooked animal welfare and indulgence in human-like treats.', category='welfare issue', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=10, description='Pets often suffer from behavior problems due to inadequate exercise, social interaction, and consistent routines, which can be mitigated with better pet care practices.', category='animal behavior', color='blue', memory_type='semantic', created_at=None, summarized=None)], edges=[Edge(source=1, target=2, description='The cultural affinity for pets in the UK translates into dog-friendly public spaces and societal norms.', color='blue', created_at=None, summarized=None), Edge(source=3, target=1, description='The COVID-19 pandemic led to a marked increase in pet ownership in the UK, reflecting the cultural behavior of Britons towards pets.', color='blue', created_at=None, summarized=None), Edge(source=4, target=3, description='The Dogs Trust charity message echoes the need for long-term responsibility taken by new pet owners during the pandemic.', color='blue', created_at=None, summarized=None), Edge(source=5, target=3, description='The surge in pet ownership during the pandemic has resulted in societal challenges, including dog attacks and rehoming issues.', color='blue', created_at=None, summarized=None), Edge(source=6, target=7, description='The commodification of pets correlates with the trend of treating them as human substitutes, raising ethical and health concerns.', color='blue', created_at=None, summarized=None), Edge(source=8, target=7, description='Increased pet consumerism is evident in the humanization and indulgence of pets, leading to unnecessary spending on pet trends and products.', color='blue', created_at=None, summarized=None), Edge(source=9, target=8, description='The misalignment between human treatment of pets and their actual needs is mirrored in the increase in consumerism and pet indulgence.', color='blue', created_at=None, summarized=None), Edge(source=10, target=9, description='Behavior problems in pets often arise from the disconnect in pet care, highlighting the need for addressing their true needs and welfare.', color='blue', created_at=None, summarized=None)]),\n", + " KnowledgeGraph(nodes=[Node(id=1, description='Britons have always been a bit silly about animals', category='cultural trait', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='Keeping pets is an entire way of life in England', category='culture', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description='Dogs serve as an acceptable outlet for British emotions', category='societal behavior', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=4, description='In the UK, dogs are permitted and encouraged on public transport', category='policy', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=5, description='The number of pet dogs in the UK increased from about nine million to 13 million between 2019 and 2022', category='statistic', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=6, description='Dog attacks in England and Wales rose by over a third between 2018 and 2022', category='statistic', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=7, description='The pandemic led to an increase in dog ownership as people spent more time at home', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=8, description='Pets are being treated as commodities in modern times', category='social issue', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=9, description='Certain dog breeds are chosen as a signifier of masculinity or for their aesthetic appeal', category='trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=10, description='Pets are increasingly treated as substitutes for children in Britain', category='social trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=11, description='The total spend on pets in the UK has more than doubled in the past decade, reaching nearly £10bn last year', category='economic statistic', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=12, description='There is a rise in dog-friendly establishments like restaurants, cinemas, and churches', category='trend', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=13, description='Modern busy schedules lead to dogs suffering daily deprivation and behavioral issues', category='societal issue', color='blue', memory_type='semantic', created_at=None, summarized=None)], edges=[Edge(source=1, target=3, description='reflects', color='blue', created_at=None, summarized=None), Edge(source=2, target=4, description='leads to', color='blue', created_at=None, summarized=None), Edge(source=7, target=5, description='caused', color='blue', created_at=None, summarized=None), Edge(source=8, target=9, description='associated with', color='blue', created_at=None, summarized=None), Edge(source=10, target=11, description='contributes to', color='blue', created_at=None, summarized=None), Edge(source=13, target=6, description='leads to', color='blue', created_at=None, summarized=None), Edge(source=6, target=12, description='in contrast to', color='blue', created_at=None, summarized=None)]),\n", + " KnowledgeGraph(nodes=[Node(id=1, description='Keeping pets is seen as an entire way of life in Britain, with dogs as an acceptable outlet for emotions', category='cultural practice', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='Dogs in the UK are accommodated in society and considered friends, with allowances in public spaces', category='social norm', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description='The pandemic led to a rise in pet dogs from about 9 million to 13 million between 2019 and 2022', category='event', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=4, description=\"The Dogs Trust charity coined the slogan 'A dog is for life, not just for Christmas' in 1978\", category='campaign', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=5, description='Green spaces have been declining, affecting dog-friendly areas', category='environment change', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=6, description='Dog attacks recorded by police in England and Wales rose by over a third between 2018 and 2022', category='statistic', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=7, description=\"Post-pandemic adjustments lead to increase in dogs being rehomed as they no longer fit owners' lifestyles\", category='social issue', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=8, description=\"Living beings are being downgraded to commodities, influenced by 'designer' breeds in fashion and pets treated as children substitutes\", category='cultural shift', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=9, description='Modern human lifestyles often do not align with the needs of dogs, leading to behavioral problems', category='social problem', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=10, description='Pet spending in the UK reached nearly £10bn with a trend towards nonessential indulgences for pets', category='economic trend', color='blue', memory_type='episodic', created_at=None, summarized=None), Node(id=11, description=\"The rise of pet 'boutiques' and dog-friendly businesses is an indicator of gentrification\", category='social phenomenon', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=12, description='There is a trend of overindulging pets with nonessential items and services', category='social behavior', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=13, description='Dogs have basic needs that are often overshadowed by human projection of indulgences', category='animal welfare', color='blue', memory_type='semantic', created_at=None, summarized=None), Node(id=14, description='Proper pet care requires sacrifices and prioritizing the wellbeing of pets', category='ethical principle', color='blue', memory_type='semantic', created_at=None, summarized=None)], edges=[Edge(source=1, target=2, description='reflects', color='blue', created_at=None, summarized=None), Edge(source=2, target=3, description='facilitated the increase in', color='blue', created_at=None, summarized=None), Edge(source=3, target=6, description='contributed to the', color='blue', created_at=None, summarized=None), Edge(source=8, target=10, description='leads to growth in', color='blue', created_at=None, summarized=None), Edge(source=9, target=7, description='resulting in', color='blue', created_at=None, summarized=None), Edge(source=12, target=10, description='drives', color='blue', created_at=None, summarized=None), Edge(source=14, target=13, description='based on understanding', color='blue', created_at=None, summarized=None)])]" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c94056bd-8d32-48e2-9982-e9af06da3333", + "metadata": {}, + "outputs": [], + "source": [ + "# from cognitive_architecture.modules.cognify.graph.create_semantic_graph import create_semantic_graph" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "dd3f0e55-9f9d-4804-9ad6-31afd2088ab5", + "metadata": {}, + "outputs": [], "source": [ "# Example usage\n", "user_id = 'user123'\n", @@ -1504,34 +831,55 @@ "# \"Natural Language Text\": [\"Articles, essays, and reports\", \"Books and manuscripts\"]\n", "# }\n", "\n", - "G = create_user_content_graph(user_id, custom_user_properties, required_layers_one)\n", - "\n", - "# Accessing the graph\n", - "print(\"Nodes in the graph:\")\n", - "print(G.nodes(data=True))\n", - "print(\"\\nEdges in the graph:\")\n", - "print(G.edges(data=True))" + "# G = await create_semantic_graph(user_id, custom_user_properties, transformed_dict_1)" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "b3160a1d-a6ea-40ce-a521-37ad26d31ffb", + "execution_count": 233, + "id": "2cc7c3bb-7cc0-453b-beab-2983a703ccda", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "CognitiveCategory(name='Natural Language Text', cognitive_subgroups=[CognitiveLayerSubgroup(id=1, name='Articles, essays, and reports', data_type='TEXT')])" + "{'data_type': 'text',\n", + " 'context_name': 'TEXT',\n", + " 'layer_name': 'News stories and blog posts'}" ] }, - "execution_count": 25, + "execution_count": 233, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "required_layers_one" + "transformed_dict_1" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "b3160a1d-a6ea-40ce-a521-37ad26d31ffb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nodes in the graph:\n", + "[('user123', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'username': 'exampleUser', 'email': 'user@example.com', 'exist': True}), ('Temporal', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'category', 'exist': True}), ('Temporal:Historical events', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'Historical events', 'exist': True}), ('Temporal:Schedules and timelines', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'Schedules and timelines', 'exist': True}), ('Positional', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'category', 'exist': True}), ('Positional:Geographical locations', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'Geographical locations', 'exist': True}), ('Positional:Spatial data', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'Spatial data', 'exist': True}), ('Propositions', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'category', 'exist': True}), ('Propositions:Hypotheses and theories', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'Hypotheses and theories', 'exist': True}), ('Propositions:Claims and arguments', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'Claims and arguments', 'exist': True}), ('Personalization', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'category', 'exist': True}), ('Personalization:User preferences', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'User preferences', 'exist': True}), ('Personalization:User information', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'User information', 'exist': True}), ('News Stories', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'category', 'exist': True}), ('News Stories:News stories and blog posts', {'created_at': '2024-03-08 14:41:03', 'updated_at': '2024-03-08 14:41:03', 'type': 'subclass', 'content': 'News stories and blog posts', 'exist': True})]\n", + "\n", + "Edges in the graph:\n", + "[('user123', 'Temporal', {'relationship': 'created'}), ('user123', 'Positional', {'relationship': 'created'}), ('user123', 'Propositions', {'relationship': 'created'}), ('user123', 'Personalization', {'relationship': 'created'}), ('user123', 'News Stories', {'relationship': 'created'}), ('Temporal', 'Temporal:Historical events', {'relationship': 'includes'}), ('Temporal', 'Temporal:Schedules and timelines', {'relationship': 'includes'}), ('Positional', 'Positional:Geographical locations', {'relationship': 'includes'}), ('Positional', 'Positional:Spatial data', {'relationship': 'includes'}), ('Propositions', 'Propositions:Hypotheses and theories', {'relationship': 'includes'}), ('Propositions', 'Propositions:Claims and arguments', {'relationship': 'includes'}), ('Personalization', 'Personalization:User preferences', {'relationship': 'includes'}), ('Personalization', 'Personalization:User information', {'relationship': 'includes'}), ('News Stories', 'News Stories:News stories and blog posts', {'relationship': 'includes'})]\n" + ] + } + ], + "source": [ + "print(\"Nodes in the graph:\")\n", + "print(G.nodes(data=True))\n", + "print(\"\\nEdges in the graph:\")\n", + "print(G.edges(data=True))" ] }, { @@ -1552,17 +900,17 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 32, "id": "4dab2ff0-0d12-4a00-a4e4-fb901e701bd3", "metadata": {}, "outputs": [], "source": [ - "B = create_user_content_graph(user_id, custom_user_properties, required_layers_two, existing_graph=G)" + "B = create_user_content_graph(user_id, custom_user_properties, transformed_dict_2, existing_graph=G)" ] }, { "cell_type": "code", - "execution_count": 269, + "execution_count": 33, "id": "627d42fd-d2ce-4ccd-a2a1-2f7ac2f463cf", "metadata": {}, "outputs": [ @@ -1570,7 +918,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MultiDiGraph with 16 nodes and 15 edges\n" + "MultiDiGraph with 15 nodes and 14 edges\n" ] } ], @@ -1580,7 +928,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 34, "id": "512f15be-0114-4c8c-9754-e82f2fa16344", "metadata": {}, "outputs": [ @@ -1588,7 +936,7 @@ "data": { "text/html": [ "\n", - " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import graphistry\n", + "import pandas as pd\n", + "\n", + "# Assuming Graphistry is already configured with API key\n", + "# graphistry.register(api=3, username='your_username', password='your_password')\n", + "\n", + "# Convert NetworkX graph to a Pandas DataFrame\n", + "edges = nx.to_pandas_edgelist(R)\n", + "graphistry.register(api=3, username='Vasilije1990', password='Q@HLdgv5SMUsGxy') \n", + "\n", + "# Visualize the graph\n", + "graphistry.edges(edges, 'source', 'target').plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "id": "4ed998eb-34e7-40f0-b638-80f36fb233e5", + "metadata": {}, + "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": 206, + "id": "8887b4a7-9c0e-474e-b0e2-8545e904e58a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Node 'Relationship:default' has been removed from the graph.\n" + ] + } + ], + "source": [ + "def delete_node(G, node_id: str):\n", + " \"\"\"\n", + " Deletes a node and its associated edges from the graph.\n", + "\n", + " Parameters:\n", + " - G: The graph from which the node will be removed (NetworkX graph).\n", + " - node_id: The ID of the node to be removed.\n", + " \"\"\"\n", + " # Check if the node exists in the graph\n", + " if G.has_node(node_id):\n", + " # Remove the node and its associated edges\n", + " G.remove_node(node_id)\n", + " print(f\"Node '{node_id}' has been removed from the graph.\")\n", + " else:\n", + " print(f\"Node '{node_id}' not found in the graph.\")\n", + " return G\n", + "\n", + "# Example usage:\n", + "# Assume G is your NetworkX graph\n", + "R = delete_node(R, \"Relationship:default\")" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "id": "ca9cf69d-e56a-45e3-9812-f862c0f138c5", + "metadata": {}, + "outputs": [], + "source": [ + "from pydantic import BaseModel\n", + "from typing import List, Optional, Dict, Any\n", + "\n", + "class Relationship(BaseModel):\n", + " type: str\n", + " properties: Optional[Dict[str, Any]] = None\n", + "\n", + "class Task(BaseModel):\n", + " task_id: str\n", + " name: str\n", + " description: Optional[str] = None\n", + " subtasks: List['Task'] = []\n", + " default_relationship: Relationship = Relationship(type='part_of')\n", + "\n", + "Task.update_forward_refs()\n", + "\n", + "class ProjectType(BaseModel):\n", + " type_id: str\n", + " name: str\n", + " default_relationship: Relationship = Relationship(type='is_project_type')\n", + "\n", + "class Project(BaseModel):\n", + " project_id: str\n", + " title: str\n", + " summary: Optional[str] = None\n", + " project_type: ProjectType\n", + " tasks: List[Task]\n", + " default_relationship: Relationship = Relationship(type='contains_project')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "id": "05dd25bc-05c9-4b28-81c5-c7878c6a7a1a", + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate subtasks\n", + "subtask1 = Task(\n", + " task_id=\"subtask1\",\n", + " name=\"Subtask 1\",\n", + " description=\"This is a subtask\",\n", + " default_relationship=Relationship(type=\"subtask_of\")\n", + ")\n", + "\n", + "subtask2 = Task(\n", + " task_id=\"subtask2\",\n", + " name=\"Subtask 2\",\n", + " description=\"This is another subtask\",\n", + " default_relationship=Relationship(type=\"subtask_of\")\n", + ")\n", + "\n", + "# Instantiate tasks with subtasks\n", + "task1 = Task(\n", + " task_id=\"task1\",\n", + " name=\"Task 1\",\n", + " description=\"This is the first main task\",\n", + " subtasks=[subtask1, subtask2],\n", + " default_relationship=Relationship(type=\"task_of\")\n", + ")\n", + "\n", + "task2 = Task(\n", + " task_id=\"task2\",\n", + " name=\"Task 2\",\n", + " description=\"This is the second main task\",\n", + " default_relationship=Relationship(type=\"task_of\")\n", + ")\n", + "\n", + "# Instantiate a project type\n", + "project_type = ProjectType(\n", + " type_id=\"type1\",\n", + " name=\"Software Development\",\n", + " default_relationship=Relationship(type=\"type_of_project\")\n", + ")\n", + "\n", + "# Instantiate a project with tasks and a project type\n", + "project = Project(\n", + " project_id=\"project1\",\n", + " title=\"New Software Development Project\",\n", + " summary=\"This project involves developing a new software application.\",\n", + " project_type=project_type,\n", + " tasks=[task1, task2],\n", + " default_relationship=Relationship(type=\"contains\")\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "id": "b4bf969f-5677-40fc-b8fe-cd3cc12ad809", + "metadata": {}, + "outputs": [], + "source": [ + "import networkx as nx\n", + "\n", + "# Assuming `create_dynamic` function is defined as you provided and `generate_node_id` is implemented\n", + "\n", + "# Create a graph from the project instance\n", + "graph = create_dynamic(project)\n", + "\n", + "# You can now use the graph for various analyses, visualization, etc.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 211, + "id": "4f678734-e615-4ac9-a1a7-3bed128d3df3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nodes and their data:\n", + "Project:default {'project_id': 'project1', 'title': 'New Software Development Project', 'summary': 'This project involves developing a new software application.', 'project_type': {'type_id': 'type1', 'name': 'Software Development', 'default_relationship': {'type': 'type_of_project', 'properties': None}}, 'tasks': [{'task_id': 'task1', 'name': 'Task 1', 'description': 'This is the first main task', 'subtasks': [{'task_id': 'subtask1', 'name': 'Subtask 1', 'description': 'This is a subtask', 'subtasks': [], 'default_relationship': {'type': 'subtask_of', 'properties': None}}, {'task_id': 'subtask2', 'name': 'Subtask 2', 'description': 'This is another subtask', 'subtasks': [], 'default_relationship': {'type': 'subtask_of', 'properties': None}}], 'default_relationship': {'type': 'task_of', 'properties': None}}, {'task_id': 'task2', 'name': 'Task 2', 'description': 'This is the second main task', 'subtasks': [], 'default_relationship': {'type': 'task_of', 'properties': None}}]}\n", + "ProjectType:type1 {'type_id': 'type1', 'name': 'Software Development'}\n", + "Relationship:default {'type': 'contains', 'properties': None}\n", + "Task:default {'task_id': 'task2', 'name': 'Task 2', 'description': 'This is the second main task', 'subtasks': []}\n", + "\n", + "Edges and their data:\n", + "Project:default -> ProjectType:type1 {'type': 'type_of_project', 'properties': None}\n", + "Project:default -> Task:default {'type': 'task_of', 'properties': None}\n", + "Project:default -> Relationship:default {}\n", + "ProjectType:type1 -> Relationship:default {}\n", + "Relationship:default -> Task:default {}\n", + "Task:default -> Task:default {'type': 'subtask_of', 'properties': None}\n" + ] + } + ], + "source": [ + " print(\"Nodes and their data:\")\n", + " for node, data in graph.nodes(data=True):\n", + " print(node, data)\n", + "\n", + " # Print edges with their data\n", + " print(\"\\nEdges and their data:\")\n", + " for source, target, data in graph.edges(data=True):\n", + " print(f\"{source} -> {target} {data}\")" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "675e2037-65a8-4f97-974a-1bfc8789ea78", + "id": "221728b7-4a08-427f-bb35-9db9fe5a4f3f", "metadata": {}, "outputs": [], "source": [] diff --git a/cognitive_architecture/config.py b/cognitive_architecture/config.py index 0ca37d6d6..6139d99a3 100644 --- a/cognitive_architecture/config.py +++ b/cognitive_architecture/config.py @@ -43,7 +43,7 @@ class Config: graph_filename = os.getenv("GRAPH_NAME", "cognee_graph.pkl") # Model parameters - model: str = "gpt-4-1106-preview" + model: str = "gpt-4-0125-preview" model_endpoint: str = "openai" openai_key: Optional[str] = os.getenv("OPENAI_API_KEY") openai_temperature: float = float(os.getenv("OPENAI_TEMPERATURE", 0.0)) diff --git a/cognitive_architecture/infrastructure/databases/graph/get_graph_client.py b/cognitive_architecture/infrastructure/databases/graph/get_graph_client.py index d6a4559a7..5ae1204db 100644 --- a/cognitive_architecture/infrastructure/databases/graph/get_graph_client.py +++ b/cognitive_architecture/infrastructure/databases/graph/get_graph_client.py @@ -3,23 +3,20 @@ from typing import Type from cognitive_architecture.config import Config from .graph_db_interface import GraphDBInterface from .networkx.adapter import NetworXAdapter -# Assuming Neo4jAdapter is defined somewhere # from .neo4j.adapter import Neo4jAdapter from enum import Enum, auto - +from cognitive_architecture.shared.data_models import GraphDBType config = Config() config.load() -class GraphDBType(Enum): - NETWORKX = auto() - NEO4J = auto() -def get_graph_client(graph_type: GraphDBType, graph_filename: str) -> Type[GraphDBInterface]: + +def get_graph_client(graph_type: GraphDBType, graph_filename: str=None) -> GraphDBInterface : """Factory function to get the appropriate graph client based on the graph type.""" - if graph_filename is not None: - config.graph_filename = graph_filename + if graph_filename is None: + graph_filename= config.graph_filename if graph_type == GraphDBType.NETWORKX: - return NetworXAdapter(filename = config.graph_filename) # Adjust as needed for NetworkX adapter configuration + return NetworXAdapter(filename = graph_filename) elif graph_type == GraphDBType.NEO4J: # return Neo4jAdapter(config.neo4j_config) # Uncomment and adjust as needed for Neo4j adapter configuration raise NotImplementedError("Neo4j adapter is not implemented yet.") diff --git a/cognitive_architecture/infrastructure/databases/graph/graph_db_interface.py b/cognitive_architecture/infrastructure/databases/graph/graph_db_interface.py index 111292bbf..bfdbd00b9 100644 --- a/cognitive_architecture/infrastructure/databases/graph/graph_db_interface.py +++ b/cognitive_architecture/infrastructure/databases/graph/graph_db_interface.py @@ -7,33 +7,56 @@ class GraphDBInterface(Protocol): """ Save and Load Graphs """ @abstractmethod - async def save_graph( + async def graph(self): + raise NotImplementedError + + @abstractmethod + async def save_graph_to_file( self, - path: str + file_path: str = None ): raise NotImplementedError @abstractmethod - async def load_graph( + async def load_graph_from_file( self, - path: str + file_path: str = None ): raise NotImplementedError @abstractmethod - async def delete_graph( + async def delete_graph_from_file( self, - path: str + path: str = None ): raise NotImplementedError """ CRUD operations on graph nodes """ @abstractmethod - async def create(self, - user_id:str, - custom_user_properties:str, - required_layers:list, - default_fields:dict + + async def add_node( + self, + id: str, + **kwargs ): raise NotImplementedError + @abstractmethod + async def delete_node( + self, + id: str + ): raise NotImplementedError + + + """ CRUD operations on graph edges """ + + + @abstractmethod + async def add_edge( + self, + from_node: str, + to_node: str, + **kwargs + ): raise NotImplementedError + + # @abstractmethod # async def create_vector_index( # self, @@ -48,13 +71,13 @@ class GraphDBInterface(Protocol): # vector_index_config: object # ): raise NotImplementedError - """ Data points """ - @abstractmethod - async def create_data_points( - self, - collection_name: str, - data_points: List[any] - ): raise NotImplementedError + # """ Data points """ + # @abstractmethod + # async def create_data_points( + # self, + # collection_name: str, + # data_points: List[any] + # ): raise NotImplementedError # @abstractmethod # async def get_data_point( diff --git a/cognitive_architecture/infrastructure/databases/graph/networkx/adapter.py b/cognitive_architecture/infrastructure/databases/graph/networkx/adapter.py index 6b888ecc7..b00ee4408 100644 --- a/cognitive_architecture/infrastructure/databases/graph/networkx/adapter.py +++ b/cognitive_architecture/infrastructure/databases/graph/networkx/adapter.py @@ -1,91 +1,177 @@ +"""Adapter for NetworkX graph database.""" + +import json +import os import pickle from datetime import datetime +from typing import Optional, Dict, Any +import aiofiles.os import aiofiles import networkx as nx from cognitive_architecture.infrastructure.databases.graph.graph_db_interface import GraphDBInterface import logging class NetworXAdapter(GraphDBInterface): + _instance = None # Class variable to store the singleton instance + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super(NetworXAdapter, cls).__new__(cls) + return cls._instance def __init__(self, filename="cognee_graph.pkl"): self.filename = filename self.graph = nx.MultiDiGraph() + async def graph(self): + return self.graph + # G = await client.load_graph_from_file() + # if G is None: + # G = client.graph # Directly access the graph attribute without calling it + # return G - async def save_graph(self, path: str): - """Asynchronously save the graph to a file.""" - if path is not None: - path = self.filename + + async def add_node(self, id: str, **kwargs) -> None: + """Asynchronously add a node to the graph if it doesn't already exist, with given properties.""" + if not self.graph.has_node(id): + self.graph.add_node(id, **kwargs) + await self.save_graph_to_file(self.filename) + + async def add_edge(self, from_node: str, to_node: str, **kwargs ) -> None: + """Asynchronously add an edge between two nodes with optional properties.""" + # properties = properties or {} + self.graph.add_edge(from_node, to_node, **kwargs) + await self.save_graph_to_file(self.filename) + + async def delete_node(self, id: str) -> None: + """Asynchronously delete a node from the graph if it exists.""" + if self.graph.has_node(id): + self.graph.remove_node(id) + await self.save_graph_to_file(self.filename) + + + async def save_graph_to_file(self, file_path: str=None) -> None: + """Asynchronously save the graph to a file in JSON format.""" + if not file_path: + file_path = self.filename + graph_data = nx.readwrite.json_graph.node_link_data(self.graph) + async with aiofiles.open(file_path, 'w') as file: + await file.write(json.dumps(graph_data)) + + async def load_graph_from_file(self, file_path: str = None): + """Asynchronously load the graph from a file in JSON format.""" + if not file_path: + file_path = self.filename try: - async with aiofiles.open(path, "wb") as f: - await f.write(pickle.dumps(self.graph)) - logging.info("Graph saved successfully.") + if os.path.exists(file_path): + async with aiofiles.open(file_path, 'r') as file: + graph_data = json.loads(await file.read()) + self.graph = nx.readwrite.json_graph.node_link_graph(graph_data) + return self.graph + else: + # Log that the file does not exist and an empty graph is initialized + logging.warning(f"File {file_path} not found. Initializing an empty graph.") + self.graph = nx.MultiDiGraph() # Use MultiDiGraph to keep it consistent with __init__ + return self.graph except Exception as e: - logging.error(f"Failed to save graph: {e}") + logging.error(f"Failed to load graph from {file_path}: {e}") + # Consider initializing an empty graph in case of error + self.graph = nx.MultiDiGraph() + return self.graph - async def load_graph(self, path: str): - if path is not None: - path = self.filename + async def delete_graph_from_file(self, path: str = None): + """Asynchronously delete the graph file from the filesystem.""" + if path is None: + path = self.filename # Assuming self.filename is defined elsewhere and holds the default graph file path try: - async with aiofiles.open(path, "rb") as f: - data = await f.read() - self.graph = pickle.loads(data) - logging.info("Graph loaded successfully.") - except Exception as e: - logging.error(f"Failed to load graph: {e}") - - async def delete_graph(self, path: str): - if path is not None: - path = self.filename - try: - async with aiofiles.open(path, "wb") as f: - await f.write(pickle.dumps(self.graph)) + await aiofiles.os.remove(path) # Asynchronously remove the file logging.info("Graph deleted successfully.") except Exception as e: logging.error(f"Failed to delete graph: {e}") - async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None): - """Asynchronously create or update a user content graph based on given parameters.""" - # Assume required_layers is a dictionary-like object; use more robust validation in production - category_name = required_layers['name'] - subgroup_names = [subgroup['name'] for subgroup in required_layers['cognitive_subgroups']] - - # Construct the additional_categories structure - additional_categories = {category_name: subgroup_names} - - # Define default fields for all nodes if not provided - if default_fields is None: - default_fields = { - 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") - } - - # Merge custom user properties with default fields; custom properties take precedence - user_properties = {**default_fields, **(custom_user_properties or {})} - - # Default content categories and update with any additional categories provided - content_categories = { - "Temporal": ["Historical events", "Schedules and timelines"], - "Positional": ["Geographical locations", "Spatial data"], - "Propositions": ["Hypotheses and theories", "Claims and arguments"], - "Personalization": ["User preferences", "User information"] - } - content_categories.update(additional_categories) - - # Ensure the user node exists with properties - self.graph.add_node(user_id, **user_properties, exist=True) - - # Add or update content category nodes and their edges - for category, subclasses in content_categories.items(): - category_properties = {**default_fields, 'type': 'category'} - self.graph.add_node(category, **category_properties, exist=True) - self.graph.add_edge(user_id, category, relationship='created') - - # Add or update subclass nodes and their edges - for subclass in subclasses: - subclass_node_id = f"{category}:{subclass}" - subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass} - self.graph.add_node(subclass_node_id, **subclass_properties, exist=True) - self.graph.add_edge(category, subclass_node_id, relationship='includes') - - # Save the graph asynchronously after modifications - await self.save_graph() \ No newline at end of file + # async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None): + # """Asynchronously create or update a user content graph based on given parameters.""" + # # Assume required_layers is a dictionary-like object; use more robust validation in production + # category_name = required_layers['context_name'] + # subgroup_names = [required_layers['layer_name']] + # + # # Construct the additional_categories structure + # additional_categories = {category_name: subgroup_names} + # + # # Define default fields for all nodes if not provided + # if default_fields is None: + # default_fields = { + # 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + # 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + # } + # + # # Merge custom user properties with default fields; custom properties take precedence + # user_properties = {**default_fields, **(custom_user_properties or {})} + # + # # Default content categories and update with any additional categories provided + # content_categories = { + # "Temporal": ["Historical events", "Schedules and timelines"], + # "Positional": ["Geographical locations", "Spatial data"], + # "Propositions": ["Hypotheses and theories", "Claims and arguments"], + # "Personalization": ["User preferences", "User information"] + # } + # + # content_categories = { + # "Temporal": ["Historical events", "Schedules and timelines"], + # "Positional": ["Geographical locations", "Spatial data"], + # "Propositions": ["Hypotheses and theories", "Claims and arguments"], + # "Personalization": ["User preferences", "User information"] + # } + # + # # Update content categories with any additional categories provided + # if additional_categories: + # content_categories.update(additional_categories) + # + # G = existing_graph if existing_graph else self.graph + # + # # Check if the user node already exists, if not, add the user node with properties + # if not G.has_node(user_id): + # G.add_node(user_id, **user_properties) + # + # # Add or update content category nodes and their edges + # for category, subclasses in content_categories.items(): + # category_properties = {**default_fields, 'type': 'category'} + # + # # Add or update the category node + # if not G.has_node(category): + # G.add_node(category, **category_properties) + # G.add_edge(user_id, category, relationship='created') + # + # # Add or update subclass nodes and their edges + # for subclass in subclasses: + # # Using both category and subclass names to ensure uniqueness within categories + # subclass_node_id = f"{category}:{subclass}" + # + # # Check if subclass node exists before adding, based on node content + # if not any(subclass == data.get('content') for _, data in G.nodes(data=True)): + # subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass} + # G.add_node(subclass_node_id, **subclass_properties) + # G.add_edge(category, subclass_node_id, relationship='includes') + # + # return G + # content_categories.update(additional_categories) + # + # # Ensure the user node exists with properties + # self.graph.add_node(user_id, **user_properties, exist=True) + # + # # Add or update content category nodes and their edges + # for category, subclasses in content_categories.items(): + # category_properties = {**default_fields, 'type': 'category'} + # self.graph.add_node(category, **category_properties, exist=True) + # self.graph.add_edge(user_id, category, relationship='created') + # + # # Add or update subclass nodes and their edges + # for subclass in subclasses: + # subclass_node_id = f"{category}:{subclass}" + # subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass} + # self.graph.add_node(subclass_node_id, **subclass_properties, exist=True) + # self.graph.add_edge(category, subclass_node_id, relationship='includes') + # + # # Save the graph asynchronously after modifications + # # await self.save_graph() + # + # return self.graph \ No newline at end of file diff --git a/cognitive_architecture/modules/cognify/graph/create_semantic_graph.py b/cognitive_architecture/modules/cognify/graph/create_semantic_graph.py index 7317bbebc..1992f97cd 100644 --- a/cognitive_architecture/modules/cognify/graph/create_semantic_graph.py +++ b/cognitive_architecture/modules/cognify/graph/create_semantic_graph.py @@ -1,17 +1,146 @@ -from cognitive_architecture.infrastructure.graph.get_graph_client import get_graph_client +""" This module is responsible for creating a semantic graph """ +from datetime import datetime +from enum import Enum, auto +from typing import Type, Optional, Any +from pydantic import BaseModel +from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client +from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation -def create_semantic_graph( - text_input: str, - filename: str, - context, - response_model: Type[BaseModel] -) -> KnowledgeGraph: - graph_type = GraphDBType.NEO4J +async def generate_node_id(instance: BaseModel) -> str: + for field in ['id', 'doc_id', 'location_id', 'type_id']: + if hasattr(instance, field): + return f"{instance.__class__.__name__}:{getattr(instance, field)}" + return f"{instance.__class__.__name__}:default" +async def add_node_and_edge(client, parent_id: Optional[str], node_id: str, node_data: dict, relationship_data: dict): + await client.add_node(node_id, **node_data) # Add the current node with its data + if parent_id: + # Add an edge between the parent node and the current node with the correct relationship data + await client.add_edge(parent_id, node_id, **relationship_data) + + +async def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any): + if isinstance(value, BaseModel): + node_id = await generate_node_id(value) + node_data = value.dict(exclude={'default_relationship'}) + # Use the specified default relationship for the edge between the parent node and the current node + relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {} + await add_node_and_edge(G, parent_id, node_id, node_data, relationship_data) + + # Recursively process nested attributes to ensure all nodes and relationships are added to the graph + for sub_attr, sub_val in value.__dict__.items(): # Access attributes and their values directly + await process_attribute(G, node_id, sub_attr, sub_val) + + elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value): + # For lists of BaseModel instances, process each item in the list + for item in value: + await process_attribute(G, parent_id, attribute, item) + +async def create_dynamic(graph_model, client) : + await client.load_graph_from_file() + root_id = await generate_node_id(graph_model) + node_data = graph_model.dict(exclude={'default_relationship', 'id'}) + print(node_data) + await client.add_node(root_id, **node_data) + + for attribute_name, attribute_value in graph_model: + await process_attribute(client, root_id, attribute_name, attribute_value) + + return client + +async def create_semantic_graph( +): + graph_type = GraphDBType.NETWORKX # Call the get_graph_client function with the selected graph type graph_client = get_graph_client(graph_type) -GraphDBInterface \ No newline at end of file + print(graph_client) + + await graph_client.load_graph_from_file() + # + # + # + # b = await graph_client.add_node("23ds", { + # 'username': 'exampleUser', + # 'email': 'user@example.com' + # }) + # + # await graph_client.save_graph_to_file(b) + graph_model_instance = DefaultGraphModel( + id="user123", + documents=[ + Document( + doc_id="doc1", + title="Document 1", + summary="Summary of Document 1", + content_id="content_id_for_doc1", # Assuming external content storage ID + doc_type=DocumentType(type_id="PDF", description="Portable Document Format"), + categories=[ + Category(category_id="finance", name="Finance", + default_relationship=Relationship(type="belongs_to")), + Category(category_id="tech", name="Technology", + default_relationship=Relationship(type="belongs_to")) + ], + default_relationship=Relationship(type='has_document') + ), + Document( + doc_id="doc2", + title="Document 2", + summary="Summary of Document 2", + content_id="content_id_for_doc2", + doc_type=DocumentType(type_id="TXT", description="Text File"), + categories=[ + Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")), + Category(category_id="wellness", name="Wellness", + default_relationship=Relationship(type="belongs_to")) + ], + default_relationship=Relationship(type='has_document') + ) + ], + user_properties=UserProperties( + custom_properties={"age": "30"}, + location=UserLocation(location_id="ny", description="New York", + default_relationship=Relationship(type='located_in')) + ), + default_fields={ + 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + } + ) + + G = await create_dynamic(graph_model_instance, graph_client) + + # print("Nodes and their data:") + # for node, data in G.graph.nodes(data=True): + # print(node, data) + # + # # Print edges with their data + # print("\nEdges and their data:") + # for source, target, data in G.graph.edges(data=True): + # print(f"{source} -> {target} {data}") + # print(G) + + + + + + + + + + + # return await graph_client.create( user_id = user_id, custom_user_properties=custom_user_properties, required_layers=required_layers, default_fields=default_fields, existing_graph=existing_graph) + + +if __name__ == "__main__": + import asyncio + + user_id = 'user123' + custom_user_properties = { + 'username': 'exampleUser', + 'email': 'user@example.com' + } + asyncio.run(create_semantic_graph()) \ No newline at end of file diff --git a/cognitive_architecture/modules/cognify/llm/classify_content.py b/cognitive_architecture/modules/cognify/llm/classify_content.py index 69ebb9b4f..e8576551d 100644 --- a/cognitive_architecture/modules/cognify/llm/classify_content.py +++ b/cognitive_architecture/modules/cognify/llm/classify_content.py @@ -2,7 +2,7 @@ from pydantic import BaseModel from typing import Type from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client -from cognitive_architecture.shared.data_models import ContentPrediction +from cognitive_architecture.shared.data_models import DefaultContentPrediction from cognitive_architecture.utils import read_query_prompt async def classify_into_categories(text_input: str, system_prompt_path:str, response_model: Type[BaseModel]): diff --git a/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py index f3edd711a..acd54afae 100644 --- a/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py +++ b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py @@ -2,7 +2,7 @@ from typing import Type from pydantic import BaseModel from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client -from cognitive_architecture.shared.data_models import CognitiveLayer +from cognitive_architecture.shared.data_models import DefaultCognitiveLayer from cognitive_architecture.utils import async_render_template async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]): diff --git a/cognitive_architecture/modules/cognify/llm/content_to_propositions.py b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py index a90cec760..9c66f1e72 100644 --- a/cognitive_architecture/modules/cognify/llm/content_to_propositions.py +++ b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py @@ -1,4 +1,5 @@ """ This module is responsible for converting content to cognitive layers. """ +import json from typing import Type from pydantic import BaseModel from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client @@ -10,7 +11,13 @@ async def generate_graph(text_input:str, filename: str,context, response_model: llm_client = get_llm_client() formatted_text_input = await async_render_template(filename, context) - return await llm_client.acreate_structured_output(text_input,formatted_text_input, response_model) + output = await llm_client.acreate_structured_output(text_input, formatted_text_input, response_model) + + + context_key = json.dumps(context, sort_keys=True) + + # Returning a dictionary with context as the key and the awaited output as its value + return {context_key: output} if __name__ == "__main__": diff --git a/cognitive_architecture/shared/data_models.py b/cognitive_architecture/shared/data_models.py index 99b668cb3..bb2a8936e 100644 --- a/cognitive_architecture/shared/data_models.py +++ b/cognitive_architecture/shared/data_models.py @@ -1,6 +1,6 @@ """Data models for the cognitive architecture.""" -from enum import Enum -from typing import Optional, List, Union +from enum import Enum, auto +from typing import Optional, List, Union, Dict, Any from pydantic import BaseModel, Field @@ -161,7 +161,7 @@ class ProceduralContent(ContentType): type:str = "PROCEDURAL" subclass: List[ProceduralSubclass] -class ContentPrediction(BaseModel): +class DefaultContentPrediction(BaseModel): """Class for a single class label prediction.""" label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent] @@ -174,8 +174,53 @@ class CognitiveLayerSubgroup(BaseModel): description: str -class CognitiveLayer(BaseModel): +class DefaultCognitiveLayer(BaseModel): """Cognitive layer""" category_name:str cognitive_layers: List[CognitiveLayerSubgroup] = Field(..., default_factory=list) + +class GraphDBType(Enum): + NETWORKX = auto() + NEO4J = auto() + + +# Models for representing different entities +class Relationship(BaseModel): + type: str + properties: Optional[Dict[str, Any]] = None + +class DocumentType(BaseModel): + type_id: str + description: str + default_relationship: Relationship = Relationship(type='is_type') + +class Category(BaseModel): + category_id: str + name: str + default_relationship: Relationship = Relationship(type='categorized_as') + +class Document(BaseModel): + doc_id: str + title: str + summary: Optional[str] = None + content_id: Optional[str] = None + doc_type: Optional[DocumentType] = None + categories: List[Category] = [] + default_relationship: Relationship = Relationship(type='has_document') + +class UserLocation(BaseModel): + location_id: str + description: str + default_relationship: Relationship = Relationship(type='located_in') + +class UserProperties(BaseModel): + custom_properties: Optional[Dict[str, Any]] = None + location: Optional[UserLocation] = None + +class DefaultGraphModel(BaseModel): + id: str + user_properties: UserProperties = UserProperties() + documents: List[Document] = [] + default_fields: Optional[Dict[str, Any]] = {} +