Merge pull request #215 from topoteretes/clean_dspy

Remove dspy logic that confuses
This commit is contained in:
Vasilije 2024-11-14 14:51:51 +01:00 committed by GitHub
commit 535d8281b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 0 additions and 306 deletions

View file

@ -1,84 +0,0 @@
import random
from datasets import load_dataset
from dspy.datasets.dataset import Dataset
class HotPotQA(Dataset):
def __init__(self, *args, only_hard_examples=True, keep_details='dev_titles', unofficial_dev=True, **kwargs) -> None:
super().__init__(*args, **kwargs)
assert only_hard_examples, "Care must be taken when adding support for easy examples." \
"Dev must be all hard to match official dev, but training can be flexible."
hf_official_train = load_dataset("hotpot_qa", 'fullwiki', split='train')
hf_official_dev = load_dataset("hotpot_qa", 'fullwiki', split='validation')
official_train = []
for raw_example in hf_official_train:
if raw_example['level'] == 'hard':
if keep_details is True:
keys = ['id', 'question', 'answer', 'type', 'supporting_facts', 'context']
elif keep_details == 'dev_titles':
keys = ['question', 'answer', 'supporting_facts']
else:
keys = ['question', 'answer']
example = {k: raw_example[k] for k in keys}
if 'supporting_facts' in example:
example['gold_titles'] = set(example['supporting_facts']['title'])
del example['supporting_facts']
official_train.append(example)
rng = random.Random(0)
rng.shuffle(official_train)
self._train = official_train[:len(official_train)*75//100]
if unofficial_dev:
self._dev = official_train[len(official_train)*75//100:]
else:
self._dev = None
for example in self._train:
if keep_details == 'dev_titles':
del example['gold_titles']
test = []
for raw_example in hf_official_dev:
assert raw_example['level'] == 'hard'
example = {k: raw_example[k] for k in ['id', 'question', 'answer', 'type', 'supporting_facts']}
if 'supporting_facts' in example:
example['gold_titles'] = set(example['supporting_facts']['title'])
del example['supporting_facts']
test.append(example)
self._test = test
if __name__ == '__main__':
from dsp.utils import dotdict
data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200*5, test_size=0)
dataset = HotPotQA(**data_args)
print(dataset)
print(dataset.train[0].question)
print(dataset.train[15].question)
print(len(dataset.train), len(dataset.dev), len(dataset.test))
print(dataset.dev[0].question)
print(dataset.dev[340].question)
print(dataset.dev[937].question)
"""
What was the population of the city where Woodward Avenue ends in 2010?
Where did the star , who is also an executive producer, of the Mick begin her carrer?
16 1000 0
Both London and German have seen attacks during war, there was one specific type of attack that Germany called the blitz, what did London call a similar attack?
Pre-Madonna was a collection of demos by the singer who was a leading presence during the emergence of what network?
Alan Mills composed the classic folk song that tells the story of what?
"""

View file

@ -1,65 +0,0 @@
import dspy
from dspy.evaluate.evaluate import Evaluate
from dspy.primitives.example import Example
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import Answer
from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.dataset import HotPotQA
def evaluate():
dataset = HotPotQA(
train_seed = 1,
train_size = 16,
eval_seed = 2023,
dev_size = 8,
test_size = 0,
keep_details = True,
)
#Evaluate
evaluate_examples = [
Example(
base = None,
question = None,
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
answer = example.answer,
) for example in dataset.dev
]
devset = [example.with_inputs("context", "question") for example in evaluate_examples]
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
def evaluate_answer(example, graph_prediction, trace = None):
llm_client = get_llm_client()
try:
answer_prediction = llm_client.create_structured_output(
text_input = example.question,
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
Use only the graph to answer the question and be very brief.
This is the knowledge graph:
{graph_prediction.graph.model_dump(mode = "json")}""",
response_model = Answer,
)
except:
return False
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
dsp.passage_match([example.answer], [answer_prediction.answer])
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
dspy.settings.configure(lm = gpt4)
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
if __name__ == "__main__":
evaluate()

View file

@ -1,89 +0,0 @@
import dspy
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.infrastructure.llm import get_llm_config
def run():
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
text = """The 1985 FA Charity Shield (also known as the General Motors FA
Charity Shield for sponsorship reasons) was the 63rd FA Charity Shield,
an annual football match played between the winners of the previous
season's First Division and FA Cup competitions. The match was played on
10 August 1985 at Wembley Stadium and contested by Everton,
who had won the 1984\u201385 First Division, and Manchester United,
who had won the 1984\u201385 FA Cup. Everton won 2\u20130 with goals from
Trevor Steven and Adrian Heath. Trevor Steven put Everton into the lead
when he swept home from six yards after a cross from the left in the first half.
The second goal came in the second half when Manchester United goalkeeper
Gary Bailey dropped a cross from the left to allow Adrian Heath to tip the
ball past him into the left corner of the net.\r\nThe 1995 FA Charity Shield
(also known as the Littlewoods FA Charity Shield for sponsorship reasons) was the
73rd FA Charity Shield, an annual football match played between the winners of
the previous season's Premier League and FA Cup competitions. The match was
played on 13 August 1995 at Wembley Stadium and contested by Blackburn Rovers,
who had won the Premier League and FA Cup winners Everton. It was Blackburn's
second successive Charity Shield appearance, while Everton were appearing in
their eleventh and their first since 1987. Everton won the match 1\u20130
with a goal from Vinny Samways when he caught Tim Flowers off his line and
lifted the ball over him from the left of the penalty area and into the right
corner of the net. Dave Watson lifted the trophy for Everton.\r\nThe 1972 FA
Charity Shield was contested between Manchester City and Aston Villa.\r\nThe
1997 FA Charity Shield (known as the Littlewoods FA Charity Shield for
sponsorship reasons) was the 75th FA Charity Shield, an annual football match
played between the winners of the previous season's Premier League and
FA Cup competitions. The match was played on 3 August 1997 at Wembley Stadium
and contested by Manchester United, who had won the 1996\u201397 FA Premier League,
and Chelsea, who had won the 1996\u201397 FA Cup. Manchester United won the match
4\u20132 on penalties after the match had finished at 1\u20131 after 90 minutes.
\r\nThe 1956 FA Charity Shield was the 34th FA Charity Shield, an annual football
match held between the winners of the previous season's Football League and
FA Cup competitions. The match was contested by Manchester United, who had won
the 1955\u201356 Football League, and Manchester City, who had won the
1955\u201356 FA Cup, at Maine Road, Manchester, on 24 October 1956. Manchester
United won the match 1\u20130, Dennis Viollet scoring the winning goal.
Manchester United goalkeeper David Gaskell made his debut for the club during
the game, taking the place of injured goalkeeper Ray Wood, and, at the age of
16 years and 19 days, became the youngest player ever to play for the club.
\r\nThe 1937 FA Charity Shield was the 24th FA Charity Shield, a football match
between the winners of the previous season's First Division and FA Cup competitions.
The match was contested by league champions Manchester City and FA Cup winners
Sunderland, and was played at Maine Road, the home ground of Manchester City.
Manchester City won the game, 2\u20130.\r\nThe 2000 FA Charity Shield (also known
as the One 2 One FA Charity Shield for sponsorship reasons) was the
78th FA Charity Shield, an annual football match played between the winners
of the previous season's Premier League and FA Cup competitions. The match
was played between Manchester United, who won the 1999\u20132000 Premier League,
and Chelsea, who won the 1999\u20132000 FA Cup, and resulted in a 2\u20130 Chelsea win.
The goals were scored by Jimmy Floyd Hasselbaink and Mario Melchiot. Roy Keane
was sent off for a challenge on Gustavo Poyet and was the last person to be
sent off at the old Wembley Stadium.\r\nThe 2001 FA Charity Shield (also known
as the One 2 One FA Charity Shield for sponsorship reasons) was the 79th FA Charity Shield,
an annual football match played between the winners of the previous season's
Premier League and FA Cup. The match was contested between Liverpool, winners of
the 2000\u201301 FA Cup and Manchester United, who won the 2000\u201301 Premier
League on 12 August 2001. It was the first Shield match to be held at the
Millennium Stadium following the closure of Wembley Stadium for reconstruction.
\r\nAston Villa Football Club ( ; nicknamed Villa, The Villa, The Villans
and The Lions) is a professional football club in Aston, Birmingham, that plays
in the Championship, the second level of English football. Founded in 1874,
they have played at their current home ground, Villa Park, since 1897. Aston Villa
were one of the founder members of the Football League in 1888 and of the
Premier League in 1992.\r\nThe 1996 FA Charity Shield (also known as the
Littlewoods FA Charity Shield for sponsorship reasons) was the 74th FA Charity Shield,
an annual football match played between the winners of the previous season's Premier
League and FA Cup competitions. The match was played on 11 August 1996 at Wembley
Stadium and contested by Manchester United, who had won the Double of Premier League
and FA Cup in 1995\u201396, and Newcastle United, who had finished as runners-up
in the Premier League. Manchester United won the match 4\u20130 with goals from
Eric Cantona, Nicky Butt, David Beckham and Roy Keane."""
prediction = compiled_extract_knowledge_graph(context = text, question = "")
print(prediction.graph)
if __name__ == "__main__":
run()

View file

@ -1,68 +0,0 @@
import dspy
from dspy.teleprompt import BootstrapFewShot
from dspy.primitives.example import Example
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.infrastructure.files.storage import LocalStorage
from cognee.shared.data_models import Answer
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.dataset import HotPotQA
from cognee.infrastructure.llm import get_llm_config
def train():
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
dspy.configure(rm = colbertv2_wiki17_abstracts)
def evaluate_answer(example, graph_prediction, trace = None):
llm_client = get_llm_client()
try:
answer_prediction = llm_client.create_structured_output(
text_input = example.question,
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
Use only the graph to answer the question and be very brief.
This is the knowledge graph:
{graph_prediction.graph.model_dump(mode = "json")}""",
response_model = Answer,
)
except:
return False
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
dsp.passage_match([example.answer], [answer_prediction.answer])
optimizer = BootstrapFewShot(metric = evaluate_answer)
dataset = HotPotQA(
train_seed = 1,
train_size = 16,
eval_seed = 2023,
dev_size = 8,
test_size = 0,
keep_details = True,
)
# Train
train_examples = [
Example(
base = None,
question = example.question,
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
answer = example.answer,
) for example in dataset.train
]
trainset = [example.with_inputs("context", "question") for example in train_examples]
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
# Save program
LocalStorage.ensure_directory_exists(get_absolute_path("./programs/extract_knowledge_graph"))
compiled_extract_knowledge_graph.save(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
if __name__ == "__main__":
train()