DummyLLMAdapter WIP

2024-11-28 11:49:28 +01:00 · 2024-11-28 11:49:28 +01:00 · a2ff42332e
commit a2ff42332e
parent 2408fd7a01
7 changed files with 59 additions and 65 deletions
--- a/cognee/infrastructure/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/anthropic/adapter.py
@ -4,7 +4,6 @@ import instructor
 from tenacity import retry, stop_after_attempt
 import anthropic
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.infrastructure.llm.prompts import read_query_prompt
 class AnthropicAdapter(LLMInterface):
@ -38,17 +37,3 @@ class AnthropicAdapter(LLMInterface):
            }],
            response_model = response_model,
        )
    def show_prompt(self, text_input: str, system_prompt: str) -> str:
        """Format and display the prompt for a user query."""
        if not text_input:
            text_input = "No user input provided."
        if not system_prompt:
            raise ValueError("No system prompt path provided.")
        system_prompt = read_query_prompt(system_prompt)
        formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" if system_prompt else None
        return formatted_prompt
--- a/cognee/infrastructure/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/generic_llm_api/adapter.py
@ -6,7 +6,6 @@ import instructor
 from tenacity import retry, stop_after_attempt
 import openai
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.infrastructure.llm.prompts import read_query_prompt
 from cognee.shared.data_models import MonitoringTool
 from cognee.base_config import get_base_config
 from cognee.infrastructure.llm.config import get_llm_config
@ -123,13 +122,3 @@ class GenericAPIAdapter(LLMInterface):
            response_model = response_model,
        )
    def show_prompt(self, text_input: str, system_prompt: str) -> str:
        """Format and display the prompt for a user query."""
        if not text_input:
            text_input = "No user input provided."
        if not system_prompt:
            raise ValueError("No system prompt path provided.")
        system_prompt = read_query_prompt(system_prompt)
        formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" if system_prompt else None
        return formatted_prompt
--- a/cognee/infrastructure/llm/llm_interface.py
+++ b/cognee/infrastructure/llm/llm_interface.py
@ -3,6 +3,8 @@
 from typing import Type, Protocol
 from abc import abstractmethod
 from pydantic import BaseModel
 from cognee.infrastructure.llm.prompts import read_query_prompt
 class LLMInterface(Protocol):
    """ LLM Interface """
@ -16,5 +18,13 @@ class LLMInterface(Protocol):
    @abstractmethod
    def show_prompt(self, text_input: str, system_prompt: str) -> str:
-        """To get structured output, import/call this function"""
+        """Format and display the prompt for a user query."""
-        raise NotImplementedError
+        if not text_input:
            text_input = "No user input provided."
        if not system_prompt:
            raise ValueError("No system prompt path provided.")
        system_prompt = read_query_prompt(system_prompt)
        formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
        return formatted_prompt
--- a/cognee/infrastructure/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/openai/adapter.py
@ -8,7 +8,6 @@ import instructor
 from pydantic import BaseModel
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.infrastructure.llm.prompts import read_query_prompt
 class OpenAIAdapter(LLMInterface):
    name = "OpenAI"
@ -121,14 +120,3 @@ class OpenAIAdapter(LLMInterface):
            max_tokens = 300,
            max_retries = 5,
        )
    def show_prompt(self, text_input: str, system_prompt: str) -> str:
        """Format and display the prompt for a user query."""
        if not text_input:
            text_input = "No user input provided."
        if not system_prompt:
            raise ValueError("No system prompt path provided.")
        system_prompt = read_query_prompt(system_prompt)
        formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" if system_prompt else None
        return formatted_prompt
--- a/cognee/infrastructure/llm/prompts/generate_cog_layers.txt
+++ b/cognee/infrastructure/llm/prompts/generate_cog_layers.txt
@ -1,14 +0,0 @@
 You are tasked with analyzing `{{ data_type }}` files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction. Various layers can be incorporated to capture the depth and breadth of information contained within the {{ data_type }}.
 These layers can help in understanding the content, context, and characteristics of the `{{ data_type }}`.
 Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
 Approach this task by considering the unique characteristics and inherent properties of the data at hand.
 VERY IMPORTANT: The context you are working in is `{{ category_name }}` and the specific domain you are extracting data on is `{{ category_name }}`.
 Guidelines for Layer Extraction:
 Take into account: The content type, in this case, is: `{{ category_name }}`, should play a major role in how you decompose into layers.
 Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
--- a/cognee/modules/data/extraction/extract_cognitive_layers.py
+++ b/cognee/modules/data/extraction/extract_cognitive_layers.py
@ -1,11 +0,0 @@
 from typing import Type, Dict
 from pydantic import BaseModel
 from cognee.infrastructure.llm.prompts import render_prompt
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 async def extract_cognitive_layers(content: str, category: Dict, response_model: Type[BaseModel]):
    llm_client = get_llm_client()
    system_prompt = render_prompt("generate_cog_layers.txt", category)
    return await llm_client.acreate_structured_output(content, system_prompt, response_model)
--- a/profiling/util/DummyLLMAdapter.py
+++ b/profiling/util/DummyLLMAdapter.py
@ -0,0 +1,47 @@
 import spacy
 import textacy
 from typing import Type
 from uuid import uuid4
 from pydantic import BaseModel
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.shared.data_models import SummarizedContent
 from cognee.shared.data_models import KnowledgeGraph, Node, Edge
 class DummyLLMAdapter(LLMInterface):
    nlp = spacy.load('en_core_web_sm')
    async def acreate_structured_output(self,
                                        text_input: str,
                                        system_prompt: str,
                                        response_model: Type[BaseModel]) -> BaseModel:
        if isinstance(response_model, SummarizedContent):
            return(dummy_summarize_content(text_input))
        elif isinstance(response_model, KnowledgeGraph):
            return(dummy_extract_knowledge_graph(text_input, nlp))
        else:
            raise Exception("Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph")
 def dummy_extract_knowledge_graph(text, nlp):
    doc = nlp(text)
    triples = list(textacy.extract.subject_verb_object_triples(doc))
    nodes = {}
    edges = []
    for triple in triples:
        source = "_".join([str(e) for e in triple.subject])
        target  = "_".join([str(e) for e in triple.object])
        nodes[source] = nodes.get(source, Node(id=str(uuid4()), name=source, type="object", description="")   )
        nodes[target] = nodes.get(target,  Node(id=str(uuid4()), name=target, type="object", description="")  )
        edge_type = "_".join([str(e) for e in triple.verb])
        edges.append(Edge(source_node_id=nodes[source].id, target_node_id=nodes[target].id, relationship_name=edge_type))
    return(KnowledgeGraph(nodes=list(nodes.keys()), edges=edges))
 def dummy_summarize_content(text):
    words = [(word, len(word)) for word in set(text.split(" "))]
    words = sorted(words, key=lambda x: x[1], reverse=True)
    summary = " ".join([word for word, _ in words[:100]])
    description =  " ".join([word for word, _ in words[:10]])
    return(SummarizedContent(summary=summary, description=description))