Fixes to llm calls and general logic updates

This commit is contained in:
Vasilije 2024-03-07 15:37:17 +01:00
parent 06f701eb23
commit 5a4935f0dc
11 changed files with 118 additions and 59 deletions

View file

@ -366,44 +366,20 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/vasa/Projects/cognee\n"
]
}
],
"source": [
"import os\n",
"print(os.getcwd())"
]
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "f97f11f1-4490-49ea-b193-1f858e72893b",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'cognitive_architecture.modules.cognify.create_vector_memory'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodules\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcognify\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclassify_content\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m content_to_cog_layers\n",
"File \u001b[0;32m~/Projects/cognee/cognitive_architecture/modules/cognify/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcreate_vector_memory\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_vector_memory\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.modules.cognify.create_vector_memory'"
]
}
],
"outputs": [],
"source": [
"from cognitive_architecture.modules.cognify.llm.classify_content import content_to_cog_layers"
"from cognitive_architecture.modules.cognify.llm.classify_content import classify_into_categories"
]
},
{

View file

@ -29,7 +29,12 @@ class LLMInterface(Protocol):
@abstractmethod
async def acreate_structured_output(self,
text_input: str,
system_prompt_path: str,
system_prompt: str,
response_model: Type[BaseModel]) -> BaseModel:
"""To get structured output, import/call this function"""
raise NotImplementedError
@abstractmethod
def show_prompt(self, text_input: str, system_prompt_path: str) -> str:
"""To get structured output, import/call this function"""
raise NotImplementedError

View file

@ -180,9 +180,9 @@ class OpenAIAdapter(LLMInterface):
return embeddings
async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel:
async def acreate_structured_output(self, text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel:
"""Generate a response from a user query."""
system_prompt = read_query_prompt(system_prompt_path)
return await self.aclient.chat.completions.create(
model=self.model,
@ -196,3 +196,14 @@ class OpenAIAdapter(LLMInterface):
],
response_model=response_model,
)
def show_prompt(self, text_input: str, system_prompt_path: str) -> str:
"""Format and display the prompt for a user query."""
if not text_input:
text_input= "No user input provided."
if not system_prompt_path:
raise ValueError("No system prompt path provided.")
system_prompt = read_query_prompt(system_prompt_path)
formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
return formatted_prompt

View file

@ -1,8 +1,14 @@
You are tasked with analyzing a {{data_type}} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {{data_type}}
These layers can help in understanding the content, context, and characteristics of the {{data_type}}
Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
Approach this task by considering the unique characteristics and inherent properties of the data at hand.
VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {{layer_name}}
Guidelines for Layer Extraction:
Take into account: The content type that in this case is: {{layer_name}} should play a major role in how you decompose into layers.
Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
You are tasked with analyzing `{{ data_type }}` files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction. Various layers can be incorporated to capture the depth and breadth of information contained within the {{ data_type }}.
These layers can help in understanding the content, context, and characteristics of the `{{ data_type }}`.
Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
Approach this task by considering the unique characteristics and inherent properties of the data at hand.
VERY IMPORTANT: The context you are working in is `{{ layer_name }}` and the specific domain you are extracting data on is `{{ layer_name }}`.
Guidelines for Layer Extraction:
Take into account: The content type, in this case, is: `{{ layer_name }}`, should play a major role in how you decompose into layers.
Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.

View file

@ -4,7 +4,7 @@ designed for extracting information in structured formats to build a knowledge g
- **Edges** represent relationships between concepts. They're akin to Wikipedia links.
- The aim is to achieve simplicity and clarity in the
knowledge graph, making it accessible for a vast audience.
YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {{layer}}
YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER `{{layer}}`
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person,

View file

@ -1 +1 @@
from .create_vector_memory import create_vector_memory

View file

@ -1 +0,0 @@
from .create_vector_memory import create_vector_memory

View file

@ -2,15 +2,18 @@ from pydantic import BaseModel
from typing import Type
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import ContentPrediction
from utils import read_query_prompt
async def content_to_cog_layers(text_input: str,system_prompt_path:str, response_model: Type[BaseModel]):
async def classify_into_categories(text_input: str,system_prompt_path:str, response_model: Type[BaseModel]):
llm_client = get_llm_client()
system_prompt = await read_query_prompt(system_prompt_path)
# data_points = list()
# for point in map(create_data_point, payload):
# data_points.append(await point)
return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)
return await llm_client.acreate_structured_output(text_input,system_prompt, response_model)
@ -19,7 +22,7 @@ async def content_to_cog_layers(text_input: str,system_prompt_path:str, response
if __name__ == "__main__":
import asyncio
asyncio.run(content_to_cog_layers("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
asyncio.run(classify_into_categories("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
The Russian foreign ministry said on Thursday it had summoned the US ambassador in Moscow and warned her against attempts to interfere in the internal affairs of the Russian Federation, reports Reuters.
Ahead of a March presidential election, it said in a statement that such behaviour would be firmly and resolutely suppressed, up to and including the expulsion as persona non grata of US embassy staff involved in such actions.""", "classify_content.txt", ContentPrediction))

View file

@ -2,19 +2,27 @@ from typing import Type
from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import CognitiveLayer
from cognitive_architecture.utils import async_render_template
async def content_to_cog_layers(text_input: str,system_prompt_path:str, response_model: Type[BaseModel]):
async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]):
llm_client = get_llm_client()
formatted_text_input = await async_render_template(filename, context)
# data_points = list()
# for point in map(create_data_point, payload):
# data_points.append(await point)
return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)
return await llm_client.acreate_structured_output(formatted_text_input,formatted_text_input, response_model)
if __name__ == "__main__":
content_to_cog_layers("test", "test", response_model=CognitiveLayer)
import asyncio
asyncio.run(content_to_cog_layers("generate_cog_layers.txt", {
'data_type': 'text',
'context_name': 'Scientific Research',
'layer_name': 'Content Layer'
}, response_model=CognitiveLayer))

View file

@ -3,14 +3,19 @@ from typing import Type
from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import KnowledgeGraph
from cognitive_architecture.utils import async_render_template
async def generate_graph(filename: str,context, response_model: Type[BaseModel]):
async def generate_graph(text_input:str,system_prompt_path:str, response_model: Type[BaseModel]):
doc_path = "cognitive_architecture/infrastructure/llm/prompts/generate_graph_prompt.txt"
llm_client = get_llm_client()
return await llm_client.generate_graph(text_input,system_prompt_path, response_model)
formatted_text_input = await async_render_template(filename, context)
return await llm_client.acreate_structured_output(formatted_text_input,formatted_text_input, response_model)
if __name__ == "__main__":
generate_graph("test", "test", response_model=KnowledgeGraph)
import asyncio
asyncio.run(generate_graph("generate_graph_prompt.txt", {
'layer': 'text'
}, response_model=KnowledgeGraph))

View file

@ -5,7 +5,7 @@ import random
import string
import uuid
from pathlib import Path
from jinja2 import Environment, FileSystemLoader, select_autoescape
from graphviz import Digraph
from sqlalchemy import or_
from sqlalchemy.orm import contains_eager
@ -285,8 +285,11 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
return None
def read_query_prompt(filename: str) -> str:
"""Read a query prompt from a file."""
async def read_query_prompt(filename: str) -> str:
"""Read a query prompt from a file.
:param filename: The name of the file to read.
:return: The content of the file as a string.
"""
script_directory = Path(__file__).parent
# Set the base directory relative to the script's directory
@ -300,4 +303,47 @@ def read_query_prompt(filename: str) -> str:
logging.error(f"File not found: {file_path.absolute()}")
except Exception as e:
logging.error(f"An error of type {type(e).__name__} occurred while reading file: {file_path.absolute()}. Error message: {e}")
return None
return None
async def print_file_content(file_path):
# Create a Path object for the file path
path = Path(file_path)
# Check if the file exists
if path.is_file():
# Open and read the file, then print its content
with path.open('r') as file:
print(file.read())
else:
# Print an error message if the file does not exist
print(f"The file '{file_path}' does not exist.")
async def async_render_template(filename: str, context: dict) -> str:
"""Render a Jinja2 template asynchronously.
:param filename: The name of the template file to render.
:param context: The context to render the template with.
:return: The rendered template as a string."""
# Initialize the Jinja2 environment to load templates from the filesystem
script_directory = Path(__file__).parent
# Set the base directory relative to the script's directory
base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts"
# Construct the full file path
file_path = base_directory / filename
env = Environment(
loader=FileSystemLoader(base_directory),
autoescape=select_autoescape(['html', 'xml', 'txt'])
)
# Load the template by name
template = env.get_template(filename)
# Render the template with the provided context
rendered_template = template.render(context)
return rendered_template