From c5d132ed14cd0b312acc599c882b144090a818c9 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:49:08 +0100 Subject: [PATCH 1/4] Delete cognee/modules/cognify/evaluate.py --- cognee/modules/cognify/evaluate.py | 65 ------------------------------ 1 file changed, 65 deletions(-) delete mode 100644 cognee/modules/cognify/evaluate.py diff --git a/cognee/modules/cognify/evaluate.py b/cognee/modules/cognify/evaluate.py deleted file mode 100644 index 194e71fe2..000000000 --- a/cognee/modules/cognify/evaluate.py +++ /dev/null @@ -1,65 +0,0 @@ -import dspy -from dspy.evaluate.evaluate import Evaluate -from dspy.primitives.example import Example -from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph -from cognee.root_dir import get_absolute_path -from cognee.shared.data_models import Answer -from cognee.infrastructure.llm import get_llm_config -from cognee.infrastructure.llm.get_llm_client import get_llm_client -from cognee.modules.cognify.dataset import HotPotQA - -def evaluate(): - dataset = HotPotQA( - train_seed = 1, - train_size = 16, - eval_seed = 2023, - dev_size = 8, - test_size = 0, - keep_details = True, - ) - - #Evaluate - evaluate_examples = [ - Example( - base = None, - question = None, - context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]), - answer = example.answer, - ) for example in dataset.dev - ] - - devset = [example.with_inputs("context", "question") for example in evaluate_examples] - - evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096) - - llm_config = get_llm_config() - gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) - compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4) - compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json")) - - def evaluate_answer(example, graph_prediction, trace = None): - llm_client = get_llm_client() - - try: - answer_prediction = llm_client.create_structured_output( - text_input = example.question, - system_prompt = f"""Answer the question by looking at the provided knowledge graph. - Use only the graph to answer the question and be very brief. - This is the knowledge graph: - {graph_prediction.graph.model_dump(mode = "json")}""", - response_model = Answer, - ) - except: - return False - - return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \ - dsp.passage_match([example.answer], [answer_prediction.answer]) - - gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) - dspy.settings.configure(lm = gpt4) - - evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer) - - -if __name__ == "__main__": - evaluate() From cf09a5ea37b777ebc91191e15695f8d3689d1a4b Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:49:23 +0100 Subject: [PATCH 2/4] Delete cognee/modules/cognify/test.py --- cognee/modules/cognify/test.py | 89 ---------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 cognee/modules/cognify/test.py diff --git a/cognee/modules/cognify/test.py b/cognee/modules/cognify/test.py deleted file mode 100644 index c952ae0d5..000000000 --- a/cognee/modules/cognify/test.py +++ /dev/null @@ -1,89 +0,0 @@ -import dspy -from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph -from cognee.root_dir import get_absolute_path -from cognee.infrastructure.llm import get_llm_config - -def run(): - llm_config = get_llm_config() - gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) - compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4) - compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json")) - - text = """The 1985 FA Charity Shield (also known as the General Motors FA - Charity Shield for sponsorship reasons) was the 63rd FA Charity Shield, - an annual football match played between the winners of the previous - season's First Division and FA Cup competitions. The match was played on - 10 August 1985 at Wembley Stadium and contested by Everton, - who had won the 1984\u201385 First Division, and Manchester United, - who had won the 1984\u201385 FA Cup. Everton won 2\u20130 with goals from - Trevor Steven and Adrian Heath. Trevor Steven put Everton into the lead - when he swept home from six yards after a cross from the left in the first half. - The second goal came in the second half when Manchester United goalkeeper - Gary Bailey dropped a cross from the left to allow Adrian Heath to tip the - ball past him into the left corner of the net.\r\nThe 1995 FA Charity Shield - (also known as the Littlewoods FA Charity Shield for sponsorship reasons) was the - 73rd FA Charity Shield, an annual football match played between the winners of - the previous season's Premier League and FA Cup competitions. The match was - played on 13 August 1995 at Wembley Stadium and contested by Blackburn Rovers, - who had won the Premier League and FA Cup winners Everton. It was Blackburn's - second successive Charity Shield appearance, while Everton were appearing in - their eleventh and their first since 1987. Everton won the match 1\u20130 - with a goal from Vinny Samways when he caught Tim Flowers off his line and - lifted the ball over him from the left of the penalty area and into the right - corner of the net. Dave Watson lifted the trophy for Everton.\r\nThe 1972 FA - Charity Shield was contested between Manchester City and Aston Villa.\r\nThe - 1997 FA Charity Shield (known as the Littlewoods FA Charity Shield for - sponsorship reasons) was the 75th FA Charity Shield, an annual football match - played between the winners of the previous season's Premier League and - FA Cup competitions. The match was played on 3 August 1997 at Wembley Stadium - and contested by Manchester United, who had won the 1996\u201397 FA Premier League, - and Chelsea, who had won the 1996\u201397 FA Cup. Manchester United won the match - 4\u20132 on penalties after the match had finished at 1\u20131 after 90 minutes. - \r\nThe 1956 FA Charity Shield was the 34th FA Charity Shield, an annual football - match held between the winners of the previous season's Football League and - FA Cup competitions. The match was contested by Manchester United, who had won - the 1955\u201356 Football League, and Manchester City, who had won the - 1955\u201356 FA Cup, at Maine Road, Manchester, on 24 October 1956. Manchester - United won the match 1\u20130, Dennis Viollet scoring the winning goal. - Manchester United goalkeeper David Gaskell made his debut for the club during - the game, taking the place of injured goalkeeper Ray Wood, and, at the age of - 16 years and 19 days, became the youngest player ever to play for the club. - \r\nThe 1937 FA Charity Shield was the 24th FA Charity Shield, a football match - between the winners of the previous season's First Division and FA Cup competitions. - The match was contested by league champions Manchester City and FA Cup winners - Sunderland, and was played at Maine Road, the home ground of Manchester City. - Manchester City won the game, 2\u20130.\r\nThe 2000 FA Charity Shield (also known - as the One 2 One FA Charity Shield for sponsorship reasons) was the - 78th FA Charity Shield, an annual football match played between the winners - of the previous season's Premier League and FA Cup competitions. The match - was played between Manchester United, who won the 1999\u20132000 Premier League, - and Chelsea, who won the 1999\u20132000 FA Cup, and resulted in a 2\u20130 Chelsea win. - The goals were scored by Jimmy Floyd Hasselbaink and Mario Melchiot. Roy Keane - was sent off for a challenge on Gustavo Poyet and was the last person to be - sent off at the old Wembley Stadium.\r\nThe 2001 FA Charity Shield (also known - as the One 2 One FA Charity Shield for sponsorship reasons) was the 79th FA Charity Shield, - an annual football match played between the winners of the previous season's - Premier League and FA Cup. The match was contested between Liverpool, winners of - the 2000\u201301 FA Cup and Manchester United, who won the 2000\u201301 Premier - League on 12 August 2001. It was the first Shield match to be held at the - Millennium Stadium following the closure of Wembley Stadium for reconstruction. - \r\nAston Villa Football Club ( ; nicknamed Villa, The Villa, The Villans - and The Lions) is a professional football club in Aston, Birmingham, that plays - in the Championship, the second level of English football. Founded in 1874, - they have played at their current home ground, Villa Park, since 1897. Aston Villa - were one of the founder members of the Football League in 1888 and of the - Premier League in 1992.\r\nThe 1996 FA Charity Shield (also known as the - Littlewoods FA Charity Shield for sponsorship reasons) was the 74th FA Charity Shield, - an annual football match played between the winners of the previous season's Premier - League and FA Cup competitions. The match was played on 11 August 1996 at Wembley - Stadium and contested by Manchester United, who had won the Double of Premier League - and FA Cup in 1995\u201396, and Newcastle United, who had finished as runners-up - in the Premier League. Manchester United won the match 4\u20130 with goals from - Eric Cantona, Nicky Butt, David Beckham and Roy Keane.""" - - prediction = compiled_extract_knowledge_graph(context = text, question = "") - - print(prediction.graph) - -if __name__ == "__main__": - run() From 8e9040815fd605f429b06af0752fd3cae47acb6e Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:49:34 +0100 Subject: [PATCH 3/4] Delete cognee/modules/cognify/train.py --- cognee/modules/cognify/train.py | 68 --------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 cognee/modules/cognify/train.py diff --git a/cognee/modules/cognify/train.py b/cognee/modules/cognify/train.py deleted file mode 100644 index 5b2a058d0..000000000 --- a/cognee/modules/cognify/train.py +++ /dev/null @@ -1,68 +0,0 @@ -import dspy -from dspy.teleprompt import BootstrapFewShot -from dspy.primitives.example import Example -from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph -from cognee.root_dir import get_absolute_path -from cognee.infrastructure.files.storage import LocalStorage -from cognee.shared.data_models import Answer -from cognee.infrastructure.llm.get_llm_client import get_llm_client -from cognee.modules.cognify.dataset import HotPotQA -from cognee.infrastructure.llm import get_llm_config - -def train(): - colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts") - - dspy.configure(rm = colbertv2_wiki17_abstracts) - - def evaluate_answer(example, graph_prediction, trace = None): - llm_client = get_llm_client() - - try: - answer_prediction = llm_client.create_structured_output( - text_input = example.question, - system_prompt = f"""Answer the question by looking at the provided knowledge graph. - Use only the graph to answer the question and be very brief. - This is the knowledge graph: - {graph_prediction.graph.model_dump(mode = "json")}""", - response_model = Answer, - ) - except: - return False - - return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \ - dsp.passage_match([example.answer], [answer_prediction.answer]) - - optimizer = BootstrapFewShot(metric = evaluate_answer) - - dataset = HotPotQA( - train_seed = 1, - train_size = 16, - eval_seed = 2023, - dev_size = 8, - test_size = 0, - keep_details = True, - ) - - # Train - train_examples = [ - Example( - base = None, - question = example.question, - context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]), - answer = example.answer, - ) for example in dataset.train - ] - - trainset = [example.with_inputs("context", "question") for example in train_examples] - - llm_config = get_llm_config() - gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) - - compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset) - - # Save program - LocalStorage.ensure_directory_exists(get_absolute_path("./programs/extract_knowledge_graph")) - compiled_extract_knowledge_graph.save(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json")) - -if __name__ == "__main__": - train() From 36ada5974d8b6bd611141bf957b14bae0892d20b Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:49:45 +0100 Subject: [PATCH 4/4] Delete cognee/modules/cognify/dataset.py --- cognee/modules/cognify/dataset.py | 84 ------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 cognee/modules/cognify/dataset.py diff --git a/cognee/modules/cognify/dataset.py b/cognee/modules/cognify/dataset.py deleted file mode 100644 index 04684ac57..000000000 --- a/cognee/modules/cognify/dataset.py +++ /dev/null @@ -1,84 +0,0 @@ -import random - -from datasets import load_dataset - -from dspy.datasets.dataset import Dataset - - -class HotPotQA(Dataset): - def __init__(self, *args, only_hard_examples=True, keep_details='dev_titles', unofficial_dev=True, **kwargs) -> None: - super().__init__(*args, **kwargs) - assert only_hard_examples, "Care must be taken when adding support for easy examples." \ - "Dev must be all hard to match official dev, but training can be flexible." - - hf_official_train = load_dataset("hotpot_qa", 'fullwiki', split='train') - hf_official_dev = load_dataset("hotpot_qa", 'fullwiki', split='validation') - - official_train = [] - for raw_example in hf_official_train: - if raw_example['level'] == 'hard': - if keep_details is True: - keys = ['id', 'question', 'answer', 'type', 'supporting_facts', 'context'] - elif keep_details == 'dev_titles': - keys = ['question', 'answer', 'supporting_facts'] - else: - keys = ['question', 'answer'] - - example = {k: raw_example[k] for k in keys} - - if 'supporting_facts' in example: - example['gold_titles'] = set(example['supporting_facts']['title']) - del example['supporting_facts'] - - official_train.append(example) - - rng = random.Random(0) - rng.shuffle(official_train) - - self._train = official_train[:len(official_train)*75//100] - - if unofficial_dev: - self._dev = official_train[len(official_train)*75//100:] - else: - self._dev = None - - for example in self._train: - if keep_details == 'dev_titles': - del example['gold_titles'] - - test = [] - for raw_example in hf_official_dev: - assert raw_example['level'] == 'hard' - example = {k: raw_example[k] for k in ['id', 'question', 'answer', 'type', 'supporting_facts']} - if 'supporting_facts' in example: - example['gold_titles'] = set(example['supporting_facts']['title']) - del example['supporting_facts'] - test.append(example) - - self._test = test - - -if __name__ == '__main__': - from dsp.utils import dotdict - - data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200*5, test_size=0) - dataset = HotPotQA(**data_args) - - print(dataset) - print(dataset.train[0].question) - print(dataset.train[15].question) - - print(len(dataset.train), len(dataset.dev), len(dataset.test)) - - print(dataset.dev[0].question) - print(dataset.dev[340].question) - print(dataset.dev[937].question) - -""" -What was the population of the city where Woodward Avenue ends in 2010? -Where did the star , who is also an executive producer, of the Mick begin her carrer? -16 1000 0 -Both London and German have seen attacks during war, there was one specific type of attack that Germany called the blitz, what did London call a similar attack? -Pre-Madonna was a collection of demos by the singer who was a leading presence during the emergence of what network? -Alan Mills composed the classic folk song that tells the story of what? -"""