import spacy import textacy from typing import Type from uuid import uuid4 from pydantic import BaseModel from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.shared.data_models import SummarizedContent from cognee.shared.data_models import KnowledgeGraph, Node, Edge class DummyLLMAdapter(LLMInterface): nlp = spacy.load('en_core_web_sm') async def acreate_structured_output(self, text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel: if isinstance(response_model, SummarizedContent): return(dummy_summarize_content(text_input)) elif isinstance(response_model, KnowledgeGraph): return(dummy_extract_knowledge_graph(text_input, nlp)) else: raise Exception("Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph") def dummy_extract_knowledge_graph(text, nlp): doc = nlp(text) triples = list(textacy.extract.subject_verb_object_triples(doc)) nodes = {} edges = [] for triple in triples: source = "_".join([str(e) for e in triple.subject]) target = "_".join([str(e) for e in triple.object]) nodes[source] = nodes.get(source, Node(id=str(uuid4()), name=source, type="object", description="") ) nodes[target] = nodes.get(target, Node(id=str(uuid4()), name=target, type="object", description="") ) edge_type = "_".join([str(e) for e in triple.verb]) edges.append(Edge(source_node_id=nodes[source].id, target_node_id=nodes[target].id, relationship_name=edge_type)) return(KnowledgeGraph(nodes=list(nodes.keys()), edges=edges)) def dummy_summarize_content(text): words = [(word, len(word)) for word in set(text.split(" "))] words = sorted(words, key=lambda x: x[1], reverse=True) summary = " ".join([word for word, _ in words[:100]]) description = " ".join([word for word, _ in words[:10]]) return(SummarizedContent(summary=summary, description=description))