Fixes to llm calls and general logic updates

2024-03-07 15:37:17 +01:00 · 2024-03-07 15:37:17 +01:00 · 5a4935f0dc
commit 5a4935f0dc
parent 06f701eb23
11 changed files with 118 additions and 59 deletions
--- a/Demo_graph.ipynb
+++ b/Demo_graph.ipynb
@ -366,44 +366,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/Users/vasa/Projects/cognee\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "print(os.getcwd())"
-   ]
+   "outputs": [],
+   "source": []
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "f97f11f1-4490-49ea-b193-1f858e72893b",
   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'cognitive_architecture.modules.cognify.create_vector_memory'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodules\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcognify\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclassify_content\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m content_to_cog_layers\n",
-      "File \u001b[0;32m~/Projects/cognee/cognitive_architecture/modules/cognify/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcreate_vector_memory\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_vector_memory\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.modules.cognify.create_vector_memory'"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
-    "from cognitive_architecture.modules.cognify.llm.classify_content import content_to_cog_layers"
+    "from cognitive_architecture.modules.cognify.llm.classify_content import classify_into_categories"
   ]
  },
  {
--- a/cognitive_architecture/infrastructure/llm/llm_interface.py
+++ b/cognitive_architecture/infrastructure/llm/llm_interface.py
@ -29,7 +29,12 @@ class LLMInterface(Protocol):
    @abstractmethod
    async def acreate_structured_output(self,
                                        text_input: str,
-                                        system_prompt_path: str,
+                                        system_prompt: str,
                                        response_model: Type[BaseModel]) -> BaseModel:
        """To get structured output, import/call this function"""
        raise NotImplementedError
+
+    @abstractmethod
+    def show_prompt(self, text_input: str, system_prompt_path: str) -> str:
+        """To get structured output, import/call this function"""
+        raise NotImplementedError
--- a/cognitive_architecture/infrastructure/llm/openai/adapter.py
+++ b/cognitive_architecture/infrastructure/llm/openai/adapter.py
@ -180,9 +180,9 @@ class OpenAIAdapter(LLMInterface):

        return embeddings

-    async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel:
+    async def acreate_structured_output(self, text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel:
        """Generate a response from a user query."""
-        system_prompt = read_query_prompt(system_prompt_path)
+

        return await self.aclient.chat.completions.create(
            model=self.model,
@ -196,3 +196,14 @@ class OpenAIAdapter(LLMInterface):
            ],
            response_model=response_model,
        )
+
+    def show_prompt(self, text_input: str, system_prompt_path: str) -> str:
+        """Format and display the prompt for a user query."""
+        if not text_input:
+            text_input= "No user input provided."
+        if not system_prompt_path:
+            raise ValueError("No system prompt path provided.")
+        system_prompt = read_query_prompt(system_prompt_path)
+
+        formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
+        return formatted_prompt
--- a/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt
+++ b/cognitive_architecture/infrastructure/llm/prompts/generate_cog_layers.txt
@ -1,8 +1,14 @@
-    You are tasked with analyzing a {{data_type}} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {{data_type}}
-    These layers can help in understanding the content, context, and characteristics of the {{data_type}}
-    Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
-    Approach this task by considering the unique characteristics and inherent properties of the data at hand.
-    VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {{layer_name}}
-    Guidelines for Layer Extraction:
-    Take into account: The content type that in this case is: {{layer_name}} should play a major role in how you decompose into layers.
-    Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
+You are tasked with analyzing `{{ data_type }}` files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction. Various layers can be incorporated to capture the depth and breadth of information contained within the {{ data_type }}.
+
+These layers can help in understanding the content, context, and characteristics of the `{{ data_type }}`.
+
+Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
+
+Approach this task by considering the unique characteristics and inherent properties of the data at hand.
+
+VERY IMPORTANT: The context you are working in is `{{ layer_name }}` and the specific domain you are extracting data on is `{{ layer_name }}`.
+
+Guidelines for Layer Extraction:
+Take into account: The content type, in this case, is: `{{ layer_name }}`, should play a major role in how you decompose into layers.
+
+Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
--- a/cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt
+++ b/cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt
@ -4,7 +4,7 @@ designed for extracting information in structured formats to build a knowledge g
 - **Edges** represent relationships between concepts. They're akin to Wikipedia links.
 - The aim is to achieve simplicity and clarity in the
 knowledge graph, making it accessible for a vast audience.
-YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {{layer}}
+YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `{{layer}}`
 ## 2. Labeling Nodes
 - **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person,
--- a/cognitive_architecture/modules/cognify/init.py
+++ b/cognitive_architecture/modules/cognify/init.py
@ -1 +1 @@
-from .create_vector_memory import create_vector_memory
+
--- a/cognitive_architecture/modules/cognify/llm/init.py
+++ b/cognitive_architecture/modules/cognify/llm/init.py
@ -1 +0,0 @@
-from .create_vector_memory import create_vector_memory
--- a/cognitive_architecture/modules/cognify/llm/classify_content.py
+++ b/cognitive_architecture/modules/cognify/llm/classify_content.py
@ -2,15 +2,18 @@ from pydantic import BaseModel
 from typing import Type
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
 from cognitive_architecture.shared.data_models import ContentPrediction
+from utils import read_query_prompt

-async def content_to_cog_layers(text_input: str,system_prompt_path:str, response_model: Type[BaseModel]):
+async def classify_into_categories(text_input: str,system_prompt_path:str, response_model: Type[BaseModel]):
    llm_client = get_llm_client()

+    system_prompt = await read_query_prompt(system_prompt_path)
+
    # data_points = list()
    # for point in map(create_data_point, payload):
    #     data_points.append(await point)

-    return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)
+    return await llm_client.acreate_structured_output(text_input,system_prompt, response_model)



@ -19,7 +22,7 @@ async def content_to_cog_layers(text_input: str,system_prompt_path:str, response

 if __name__ == "__main__":
    import asyncio
-    asyncio.run(content_to_cog_layers("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
+    asyncio.run(classify_into_categories("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
 The Russian foreign ministry said on Thursday it had summoned the US ambassador in Moscow and warned her against “attempts to interfere in the internal affairs of the Russian Federation”, reports Reuters.

 Ahead of a March presidential election, it said in a statement that such behaviour would be “firmly and resolutely suppressed, up to and including the expulsion as ‘persona non grata’ of US embassy staff involved in such actions”.""", "classify_content.txt", ContentPrediction))
--- a/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py
+++ b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py
@ -2,19 +2,27 @@ from typing import Type
 from pydantic import BaseModel
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
 from cognitive_architecture.shared.data_models import CognitiveLayer
+from cognitive_architecture.utils import async_render_template

-async def content_to_cog_layers(text_input: str,system_prompt_path:str, response_model: Type[BaseModel]):
+async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]):
    llm_client = get_llm_client()

+    formatted_text_input = await async_render_template(filename, context)
+
+
    # data_points = list()
    # for point in map(create_data_point, payload):
    #     data_points.append(await point)

-    return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)
+    return await llm_client.acreate_structured_output(formatted_text_input,formatted_text_input, response_model)

 if __name__ == "__main__":
-
-    content_to_cog_layers("test", "test", response_model=CognitiveLayer)
+    import asyncio
+    asyncio.run(content_to_cog_layers("generate_cog_layers.txt", {
+        'data_type': 'text',
+        'context_name': 'Scientific Research',
+        'layer_name': 'Content Layer'
+    }, response_model=CognitiveLayer))



--- a/cognitive_architecture/modules/cognify/llm/content_to_propositions.py
+++ b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py
@ -3,14 +3,19 @@ from typing import Type
 from pydantic import BaseModel
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
 from cognitive_architecture.shared.data_models import KnowledgeGraph
+from cognitive_architecture.utils import async_render_template
+
+async def generate_graph(filename: str,context, response_model: Type[BaseModel]):

-async def generate_graph(text_input:str,system_prompt_path:str, response_model: Type[BaseModel]):
-    doc_path = "cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt"
    llm_client = get_llm_client()

-    return await llm_client.generate_graph(text_input,system_prompt_path, response_model)
+    formatted_text_input = await async_render_template(filename, context)
+    return await llm_client.acreate_structured_output(formatted_text_input,formatted_text_input, response_model)


 if __name__ == "__main__":
-    generate_graph("test", "test", response_model=KnowledgeGraph)
+    import asyncio
+    asyncio.run(generate_graph("generate_graph_prompt.txt", {
+        'layer': 'text'
+    }, response_model=KnowledgeGraph))

--- a/cognitive_architecture/utils.py
+++ b/cognitive_architecture/utils.py
@ -5,7 +5,7 @@ import random
 import string
 import uuid
 from pathlib import Path
-
+from jinja2 import Environment, FileSystemLoader, select_autoescape
 from graphviz import Digraph
 from sqlalchemy import or_
 from sqlalchemy.orm import contains_eager
@ -285,8 +285,11 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
        return None


-def read_query_prompt(filename: str) -> str:
-    """Read a query prompt from a file."""
+async def read_query_prompt(filename: str) -> str:
+    """Read a query prompt from a file.
+    :param filename: The name of the file to read.
+    :return: The content of the file as a string.
+    """
    script_directory = Path(__file__).parent

    # Set the base directory relative to the script's directory
@ -300,4 +303,47 @@ def read_query_prompt(filename: str) -> str:
        logging.error(f"File not found: {file_path.absolute()}")
    except Exception as e:
        logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}")
-    return None
+    return None
+
+
+
+async def print_file_content(file_path):
+    # Create a Path object for the file path
+    path = Path(file_path)
+
+    # Check if the file exists
+    if path.is_file():
+        # Open and read the file, then print its content
+        with path.open('r') as file:
+            print(file.read())
+    else:
+        # Print an error message if the file does not exist
+        print(f"The file '{file_path}' does not exist.")
+
+async def async_render_template(filename: str,  context: dict) -> str:
+    """Render a Jinja2 template asynchronously.
+    :param filename: The name of the template file to render.
+    :param context: The context to render the template with.
+    :return: The rendered template as a string."""
+    # Initialize the Jinja2 environment to load templates from the filesystem
+    script_directory = Path(__file__).parent
+
+    # Set the base directory relative to the script's directory
+    base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts"
+
+
+    # Construct the full file path
+    file_path = base_directory / filename
+
+    env = Environment(
+        loader=FileSystemLoader(base_directory),
+        autoescape=select_autoescape(['html', 'xml', 'txt'])
+    )
+
+    # Load the template by name
+    template = env.get_template(filename)
+
+    # Render the template with the provided context
+    rendered_template = template.render(context)
+
+    return rendered_template
				`@ -1 +1 @@`
				`from .create_vector_memory import create_vector_memory`
				`@ -1 +0,0 @@`
				`from .create_vector_memory import create_vector_memory`