Merge pull request #215 from topoteretes/clean_dspy
Remove dspy logic that confuses
This commit is contained in:
commit
535d8281b4
4 changed files with 0 additions and 306 deletions
|
|
@ -1,84 +0,0 @@
|
||||||
import random
|
|
||||||
|
|
||||||
from datasets import load_dataset
|
|
||||||
|
|
||||||
from dspy.datasets.dataset import Dataset
|
|
||||||
|
|
||||||
|
|
||||||
class HotPotQA(Dataset):
|
|
||||||
def __init__(self, *args, only_hard_examples=True, keep_details='dev_titles', unofficial_dev=True, **kwargs) -> None:
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
assert only_hard_examples, "Care must be taken when adding support for easy examples." \
|
|
||||||
"Dev must be all hard to match official dev, but training can be flexible."
|
|
||||||
|
|
||||||
hf_official_train = load_dataset("hotpot_qa", 'fullwiki', split='train')
|
|
||||||
hf_official_dev = load_dataset("hotpot_qa", 'fullwiki', split='validation')
|
|
||||||
|
|
||||||
official_train = []
|
|
||||||
for raw_example in hf_official_train:
|
|
||||||
if raw_example['level'] == 'hard':
|
|
||||||
if keep_details is True:
|
|
||||||
keys = ['id', 'question', 'answer', 'type', 'supporting_facts', 'context']
|
|
||||||
elif keep_details == 'dev_titles':
|
|
||||||
keys = ['question', 'answer', 'supporting_facts']
|
|
||||||
else:
|
|
||||||
keys = ['question', 'answer']
|
|
||||||
|
|
||||||
example = {k: raw_example[k] for k in keys}
|
|
||||||
|
|
||||||
if 'supporting_facts' in example:
|
|
||||||
example['gold_titles'] = set(example['supporting_facts']['title'])
|
|
||||||
del example['supporting_facts']
|
|
||||||
|
|
||||||
official_train.append(example)
|
|
||||||
|
|
||||||
rng = random.Random(0)
|
|
||||||
rng.shuffle(official_train)
|
|
||||||
|
|
||||||
self._train = official_train[:len(official_train)*75//100]
|
|
||||||
|
|
||||||
if unofficial_dev:
|
|
||||||
self._dev = official_train[len(official_train)*75//100:]
|
|
||||||
else:
|
|
||||||
self._dev = None
|
|
||||||
|
|
||||||
for example in self._train:
|
|
||||||
if keep_details == 'dev_titles':
|
|
||||||
del example['gold_titles']
|
|
||||||
|
|
||||||
test = []
|
|
||||||
for raw_example in hf_official_dev:
|
|
||||||
assert raw_example['level'] == 'hard'
|
|
||||||
example = {k: raw_example[k] for k in ['id', 'question', 'answer', 'type', 'supporting_facts']}
|
|
||||||
if 'supporting_facts' in example:
|
|
||||||
example['gold_titles'] = set(example['supporting_facts']['title'])
|
|
||||||
del example['supporting_facts']
|
|
||||||
test.append(example)
|
|
||||||
|
|
||||||
self._test = test
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
from dsp.utils import dotdict
|
|
||||||
|
|
||||||
data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200*5, test_size=0)
|
|
||||||
dataset = HotPotQA(**data_args)
|
|
||||||
|
|
||||||
print(dataset)
|
|
||||||
print(dataset.train[0].question)
|
|
||||||
print(dataset.train[15].question)
|
|
||||||
|
|
||||||
print(len(dataset.train), len(dataset.dev), len(dataset.test))
|
|
||||||
|
|
||||||
print(dataset.dev[0].question)
|
|
||||||
print(dataset.dev[340].question)
|
|
||||||
print(dataset.dev[937].question)
|
|
||||||
|
|
||||||
"""
|
|
||||||
What was the population of the city where Woodward Avenue ends in 2010?
|
|
||||||
Where did the star , who is also an executive producer, of the Mick begin her carrer?
|
|
||||||
16 1000 0
|
|
||||||
Both London and German have seen attacks during war, there was one specific type of attack that Germany called the blitz, what did London call a similar attack?
|
|
||||||
Pre-Madonna was a collection of demos by the singer who was a leading presence during the emergence of what network?
|
|
||||||
Alan Mills composed the classic folk song that tells the story of what?
|
|
||||||
"""
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
||||||
import dspy
|
|
||||||
from dspy.evaluate.evaluate import Evaluate
|
|
||||||
from dspy.primitives.example import Example
|
|
||||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
|
||||||
from cognee.root_dir import get_absolute_path
|
|
||||||
from cognee.shared.data_models import Answer
|
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
||||||
from cognee.modules.cognify.dataset import HotPotQA
|
|
||||||
|
|
||||||
def evaluate():
|
|
||||||
dataset = HotPotQA(
|
|
||||||
train_seed = 1,
|
|
||||||
train_size = 16,
|
|
||||||
eval_seed = 2023,
|
|
||||||
dev_size = 8,
|
|
||||||
test_size = 0,
|
|
||||||
keep_details = True,
|
|
||||||
)
|
|
||||||
|
|
||||||
#Evaluate
|
|
||||||
evaluate_examples = [
|
|
||||||
Example(
|
|
||||||
base = None,
|
|
||||||
question = None,
|
|
||||||
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
|
|
||||||
answer = example.answer,
|
|
||||||
) for example in dataset.dev
|
|
||||||
]
|
|
||||||
|
|
||||||
devset = [example.with_inputs("context", "question") for example in evaluate_examples]
|
|
||||||
|
|
||||||
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
|
|
||||||
|
|
||||||
llm_config = get_llm_config()
|
|
||||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
|
||||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
|
||||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
|
||||||
|
|
||||||
def evaluate_answer(example, graph_prediction, trace = None):
|
|
||||||
llm_client = get_llm_client()
|
|
||||||
|
|
||||||
try:
|
|
||||||
answer_prediction = llm_client.create_structured_output(
|
|
||||||
text_input = example.question,
|
|
||||||
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
|
|
||||||
Use only the graph to answer the question and be very brief.
|
|
||||||
This is the knowledge graph:
|
|
||||||
{graph_prediction.graph.model_dump(mode = "json")}""",
|
|
||||||
response_model = Answer,
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
|
||||||
dsp.passage_match([example.answer], [answer_prediction.answer])
|
|
||||||
|
|
||||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
|
||||||
dspy.settings.configure(lm = gpt4)
|
|
||||||
|
|
||||||
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
evaluate()
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
import dspy
|
|
||||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
|
||||||
from cognee.root_dir import get_absolute_path
|
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
|
||||||
|
|
||||||
def run():
|
|
||||||
llm_config = get_llm_config()
|
|
||||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
|
||||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
|
||||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
|
||||||
|
|
||||||
text = """The 1985 FA Charity Shield (also known as the General Motors FA
|
|
||||||
Charity Shield for sponsorship reasons) was the 63rd FA Charity Shield,
|
|
||||||
an annual football match played between the winners of the previous
|
|
||||||
season's First Division and FA Cup competitions. The match was played on
|
|
||||||
10 August 1985 at Wembley Stadium and contested by Everton,
|
|
||||||
who had won the 1984\u201385 First Division, and Manchester United,
|
|
||||||
who had won the 1984\u201385 FA Cup. Everton won 2\u20130 with goals from
|
|
||||||
Trevor Steven and Adrian Heath. Trevor Steven put Everton into the lead
|
|
||||||
when he swept home from six yards after a cross from the left in the first half.
|
|
||||||
The second goal came in the second half when Manchester United goalkeeper
|
|
||||||
Gary Bailey dropped a cross from the left to allow Adrian Heath to tip the
|
|
||||||
ball past him into the left corner of the net.\r\nThe 1995 FA Charity Shield
|
|
||||||
(also known as the Littlewoods FA Charity Shield for sponsorship reasons) was the
|
|
||||||
73rd FA Charity Shield, an annual football match played between the winners of
|
|
||||||
the previous season's Premier League and FA Cup competitions. The match was
|
|
||||||
played on 13 August 1995 at Wembley Stadium and contested by Blackburn Rovers,
|
|
||||||
who had won the Premier League and FA Cup winners Everton. It was Blackburn's
|
|
||||||
second successive Charity Shield appearance, while Everton were appearing in
|
|
||||||
their eleventh and their first since 1987. Everton won the match 1\u20130
|
|
||||||
with a goal from Vinny Samways when he caught Tim Flowers off his line and
|
|
||||||
lifted the ball over him from the left of the penalty area and into the right
|
|
||||||
corner of the net. Dave Watson lifted the trophy for Everton.\r\nThe 1972 FA
|
|
||||||
Charity Shield was contested between Manchester City and Aston Villa.\r\nThe
|
|
||||||
1997 FA Charity Shield (known as the Littlewoods FA Charity Shield for
|
|
||||||
sponsorship reasons) was the 75th FA Charity Shield, an annual football match
|
|
||||||
played between the winners of the previous season's Premier League and
|
|
||||||
FA Cup competitions. The match was played on 3 August 1997 at Wembley Stadium
|
|
||||||
and contested by Manchester United, who had won the 1996\u201397 FA Premier League,
|
|
||||||
and Chelsea, who had won the 1996\u201397 FA Cup. Manchester United won the match
|
|
||||||
4\u20132 on penalties after the match had finished at 1\u20131 after 90 minutes.
|
|
||||||
\r\nThe 1956 FA Charity Shield was the 34th FA Charity Shield, an annual football
|
|
||||||
match held between the winners of the previous season's Football League and
|
|
||||||
FA Cup competitions. The match was contested by Manchester United, who had won
|
|
||||||
the 1955\u201356 Football League, and Manchester City, who had won the
|
|
||||||
1955\u201356 FA Cup, at Maine Road, Manchester, on 24 October 1956. Manchester
|
|
||||||
United won the match 1\u20130, Dennis Viollet scoring the winning goal.
|
|
||||||
Manchester United goalkeeper David Gaskell made his debut for the club during
|
|
||||||
the game, taking the place of injured goalkeeper Ray Wood, and, at the age of
|
|
||||||
16 years and 19 days, became the youngest player ever to play for the club.
|
|
||||||
\r\nThe 1937 FA Charity Shield was the 24th FA Charity Shield, a football match
|
|
||||||
between the winners of the previous season's First Division and FA Cup competitions.
|
|
||||||
The match was contested by league champions Manchester City and FA Cup winners
|
|
||||||
Sunderland, and was played at Maine Road, the home ground of Manchester City.
|
|
||||||
Manchester City won the game, 2\u20130.\r\nThe 2000 FA Charity Shield (also known
|
|
||||||
as the One 2 One FA Charity Shield for sponsorship reasons) was the
|
|
||||||
78th FA Charity Shield, an annual football match played between the winners
|
|
||||||
of the previous season's Premier League and FA Cup competitions. The match
|
|
||||||
was played between Manchester United, who won the 1999\u20132000 Premier League,
|
|
||||||
and Chelsea, who won the 1999\u20132000 FA Cup, and resulted in a 2\u20130 Chelsea win.
|
|
||||||
The goals were scored by Jimmy Floyd Hasselbaink and Mario Melchiot. Roy Keane
|
|
||||||
was sent off for a challenge on Gustavo Poyet and was the last person to be
|
|
||||||
sent off at the old Wembley Stadium.\r\nThe 2001 FA Charity Shield (also known
|
|
||||||
as the One 2 One FA Charity Shield for sponsorship reasons) was the 79th FA Charity Shield,
|
|
||||||
an annual football match played between the winners of the previous season's
|
|
||||||
Premier League and FA Cup. The match was contested between Liverpool, winners of
|
|
||||||
the 2000\u201301 FA Cup and Manchester United, who won the 2000\u201301 Premier
|
|
||||||
League on 12 August 2001. It was the first Shield match to be held at the
|
|
||||||
Millennium Stadium following the closure of Wembley Stadium for reconstruction.
|
|
||||||
\r\nAston Villa Football Club ( ; nicknamed Villa, The Villa, The Villans
|
|
||||||
and The Lions) is a professional football club in Aston, Birmingham, that plays
|
|
||||||
in the Championship, the second level of English football. Founded in 1874,
|
|
||||||
they have played at their current home ground, Villa Park, since 1897. Aston Villa
|
|
||||||
were one of the founder members of the Football League in 1888 and of the
|
|
||||||
Premier League in 1992.\r\nThe 1996 FA Charity Shield (also known as the
|
|
||||||
Littlewoods FA Charity Shield for sponsorship reasons) was the 74th FA Charity Shield,
|
|
||||||
an annual football match played between the winners of the previous season's Premier
|
|
||||||
League and FA Cup competitions. The match was played on 11 August 1996 at Wembley
|
|
||||||
Stadium and contested by Manchester United, who had won the Double of Premier League
|
|
||||||
and FA Cup in 1995\u201396, and Newcastle United, who had finished as runners-up
|
|
||||||
in the Premier League. Manchester United won the match 4\u20130 with goals from
|
|
||||||
Eric Cantona, Nicky Butt, David Beckham and Roy Keane."""
|
|
||||||
|
|
||||||
prediction = compiled_extract_knowledge_graph(context = text, question = "")
|
|
||||||
|
|
||||||
print(prediction.graph)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run()
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
import dspy
|
|
||||||
from dspy.teleprompt import BootstrapFewShot
|
|
||||||
from dspy.primitives.example import Example
|
|
||||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
|
|
||||||
from cognee.root_dir import get_absolute_path
|
|
||||||
from cognee.infrastructure.files.storage import LocalStorage
|
|
||||||
from cognee.shared.data_models import Answer
|
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
||||||
from cognee.modules.cognify.dataset import HotPotQA
|
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
|
||||||
|
|
||||||
def train():
|
|
||||||
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
|
|
||||||
|
|
||||||
dspy.configure(rm = colbertv2_wiki17_abstracts)
|
|
||||||
|
|
||||||
def evaluate_answer(example, graph_prediction, trace = None):
|
|
||||||
llm_client = get_llm_client()
|
|
||||||
|
|
||||||
try:
|
|
||||||
answer_prediction = llm_client.create_structured_output(
|
|
||||||
text_input = example.question,
|
|
||||||
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
|
|
||||||
Use only the graph to answer the question and be very brief.
|
|
||||||
This is the knowledge graph:
|
|
||||||
{graph_prediction.graph.model_dump(mode = "json")}""",
|
|
||||||
response_model = Answer,
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
|
||||||
dsp.passage_match([example.answer], [answer_prediction.answer])
|
|
||||||
|
|
||||||
optimizer = BootstrapFewShot(metric = evaluate_answer)
|
|
||||||
|
|
||||||
dataset = HotPotQA(
|
|
||||||
train_seed = 1,
|
|
||||||
train_size = 16,
|
|
||||||
eval_seed = 2023,
|
|
||||||
dev_size = 8,
|
|
||||||
test_size = 0,
|
|
||||||
keep_details = True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Train
|
|
||||||
train_examples = [
|
|
||||||
Example(
|
|
||||||
base = None,
|
|
||||||
question = example.question,
|
|
||||||
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
|
|
||||||
answer = example.answer,
|
|
||||||
) for example in dataset.train
|
|
||||||
]
|
|
||||||
|
|
||||||
trainset = [example.with_inputs("context", "question") for example in train_examples]
|
|
||||||
|
|
||||||
llm_config = get_llm_config()
|
|
||||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
|
||||||
|
|
||||||
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
|
|
||||||
|
|
||||||
# Save program
|
|
||||||
LocalStorage.ensure_directory_exists(get_absolute_path("./programs/extract_knowledge_graph"))
|
|
||||||
compiled_extract_knowledge_graph.save(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
train()
|
|
||||||
Loading…
Add table
Reference in a new issue