DummyLLMAdapter WIP

This commit is contained in:
Leon Luithlen 2024-11-28 11:49:28 +01:00
parent 2408fd7a01
commit a2ff42332e
7 changed files with 59 additions and 65 deletions

View file

@ -4,7 +4,6 @@ import instructor
from tenacity import retry, stop_after_attempt
import anthropic
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
class AnthropicAdapter(LLMInterface):
@ -38,17 +37,3 @@ class AnthropicAdapter(LLMInterface):
}],
response_model = response_model,
)
def show_prompt(self, text_input: str, system_prompt: str) -> str:
"""Format and display the prompt for a user query."""
if not text_input:
text_input = "No user input provided."
if not system_prompt:
raise ValueError("No system prompt path provided.")
system_prompt = read_query_prompt(system_prompt)
formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" if system_prompt else None
return formatted_prompt

View file

@ -6,7 +6,6 @@ import instructor
from tenacity import retry, stop_after_attempt
import openai
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.shared.data_models import MonitoringTool
from cognee.base_config import get_base_config
from cognee.infrastructure.llm.config import get_llm_config
@ -123,13 +122,3 @@ class GenericAPIAdapter(LLMInterface):
response_model = response_model,
)
def show_prompt(self, text_input: str, system_prompt: str) -> str:
"""Format and display the prompt for a user query."""
if not text_input:
text_input = "No user input provided."
if not system_prompt:
raise ValueError("No system prompt path provided.")
system_prompt = read_query_prompt(system_prompt)
formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" if system_prompt else None
return formatted_prompt

View file

@ -3,6 +3,8 @@
from typing import Type, Protocol
from abc import abstractmethod
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import read_query_prompt
class LLMInterface(Protocol):
""" LLM Interface """
@ -16,5 +18,13 @@ class LLMInterface(Protocol):
@abstractmethod
def show_prompt(self, text_input: str, system_prompt: str) -> str:
"""To get structured output, import/call this function"""
raise NotImplementedError
"""Format and display the prompt for a user query."""
if not text_input:
text_input = "No user input provided."
if not system_prompt:
raise ValueError("No system prompt path provided.")
system_prompt = read_query_prompt(system_prompt)
formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
return formatted_prompt

View file

@ -8,7 +8,6 @@ import instructor
from pydantic import BaseModel
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
class OpenAIAdapter(LLMInterface):
name = "OpenAI"
@ -121,14 +120,3 @@ class OpenAIAdapter(LLMInterface):
max_tokens = 300,
max_retries = 5,
)
def show_prompt(self, text_input: str, system_prompt: str) -> str:
"""Format and display the prompt for a user query."""
if not text_input:
text_input = "No user input provided."
if not system_prompt:
raise ValueError("No system prompt path provided.")
system_prompt = read_query_prompt(system_prompt)
formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" if system_prompt else None
return formatted_prompt

View file

@ -1,14 +0,0 @@
You are tasked with analyzing `{{ data_type }}` files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction. Various layers can be incorporated to capture the depth and breadth of information contained within the {{ data_type }}.
These layers can help in understanding the content, context, and characteristics of the `{{ data_type }}`.
Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
Approach this task by considering the unique characteristics and inherent properties of the data at hand.
VERY IMPORTANT: The context you are working in is `{{ category_name }}` and the specific domain you are extracting data on is `{{ category_name }}`.
Guidelines for Layer Extraction:
Take into account: The content type, in this case, is: `{{ category_name }}`, should play a major role in how you decompose into layers.
Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.

View file

@ -1,11 +0,0 @@
from typing import Type, Dict
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import render_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
async def extract_cognitive_layers(content: str, category: Dict, response_model: Type[BaseModel]):
llm_client = get_llm_client()
system_prompt = render_prompt("generate_cog_layers.txt", category)
return await llm_client.acreate_structured_output(content, system_prompt, response_model)

View file

@ -0,0 +1,47 @@
import spacy
import textacy
from typing import Type
from uuid import uuid4
from pydantic import BaseModel
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.shared.data_models import SummarizedContent
from cognee.shared.data_models import KnowledgeGraph, Node, Edge
class DummyLLMAdapter(LLMInterface):
nlp = spacy.load('en_core_web_sm')
async def acreate_structured_output(self,
text_input: str,
system_prompt: str,
response_model: Type[BaseModel]) -> BaseModel:
if isinstance(response_model, SummarizedContent):
return(dummy_summarize_content(text_input))
elif isinstance(response_model, KnowledgeGraph):
return(dummy_extract_knowledge_graph(text_input, nlp))
else:
raise Exception("Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph")
def dummy_extract_knowledge_graph(text, nlp):
doc = nlp(text)
triples = list(textacy.extract.subject_verb_object_triples(doc))
nodes = {}
edges = []
for triple in triples:
source = "_".join([str(e) for e in triple.subject])
target = "_".join([str(e) for e in triple.object])
nodes[source] = nodes.get(source, Node(id=str(uuid4()), name=source, type="object", description="") )
nodes[target] = nodes.get(target, Node(id=str(uuid4()), name=target, type="object", description="") )
edge_type = "_".join([str(e) for e in triple.verb])
edges.append(Edge(source_node_id=nodes[source].id, target_node_id=nodes[target].id, relationship_name=edge_type))
return(KnowledgeGraph(nodes=list(nodes.keys()), edges=edges))
def dummy_summarize_content(text):
words = [(word, len(word)) for word in set(text.split(" "))]
words = sorted(words, key=lambda x: x[1], reverse=True)
summary = " ".join([word for word, _ in words[:100]])
description = " ".join([word for word, _ in words[:10]])
return(SummarizedContent(summary=summary, description=description))