Merge pull request #215 from topoteretes/clean_dspy
Remove dspy logic that confuses
This commit is contained in:
commit
535d8281b4
4 changed files with 0 additions and 306 deletions
|
|
@ -1,84 +0,0 @@
|
|||
import random
|
||||
|
||||
from datasets import load_dataset
|
||||
|
||||
from dspy.datasets.dataset import Dataset
|
||||
|
||||
|
||||
class HotPotQA(Dataset):
|
||||
def __init__(self, *args, only_hard_examples=True, keep_details='dev_titles', unofficial_dev=True, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
assert only_hard_examples, "Care must be taken when adding support for easy examples." \
|
||||
"Dev must be all hard to match official dev, but training can be flexible."
|
||||
|
||||
hf_official_train = load_dataset("hotpot_qa", 'fullwiki', split='train')
|
||||
hf_official_dev = load_dataset("hotpot_qa", 'fullwiki', split='validation')
|
||||
|
||||
official_train = []
|
||||
for raw_example in hf_official_train:
|
||||
if raw_example['level'] == 'hard':
|
||||
if keep_details is True:
|
||||
keys = ['id', 'question', 'answer', 'type', 'supporting_facts', 'context']
|
||||
elif keep_details == 'dev_titles':
|
||||
keys = ['question', 'answer', 'supporting_facts']
|
||||
else:
|
||||
keys = ['question', 'answer']
|
||||
|
||||
example = {k: raw_example[k] for k in keys}
|
||||
|
||||
if 'supporting_facts' in example:
|
||||
example['gold_titles'] = set(example['supporting_facts']['title'])
|
||||
del example['supporting_facts']
|
||||
|
||||
official_train.append(example)
|
||||
|
||||
rng = random.Random(0)
|
||||
rng.shuffle(official_train)
|
||||
|
||||
self._train = official_train[:len(official_train)*75//100]
|
||||
|
||||
if unofficial_dev:
|
||||
self._dev = official_train[len(official_train)*75//100:]
|
||||
else:
|
||||
self._dev = None
|
||||
|
||||
for example in self._train:
|
||||
if keep_details == 'dev_titles':
|
||||
del example['gold_titles']
|
||||
|
||||
test = []
|
||||
for raw_example in hf_official_dev:
|
||||
assert raw_example['level'] == 'hard'
|
||||
example = {k: raw_example[k] for k in ['id', 'question', 'answer', 'type', 'supporting_facts']}
|
||||
if 'supporting_facts' in example:
|
||||
example['gold_titles'] = set(example['supporting_facts']['title'])
|
||||
del example['supporting_facts']
|
||||
test.append(example)
|
||||
|
||||
self._test = test
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from dsp.utils import dotdict
|
||||
|
||||
data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200*5, test_size=0)
|
||||
dataset = HotPotQA(**data_args)
|
||||
|
||||
print(dataset)
|
||||
print(dataset.train[0].question)
|
||||
print(dataset.train[15].question)
|
||||
|
||||
print(len(dataset.train), len(dataset.dev), len(dataset.test))
|
||||
|
||||
print(dataset.dev[0].question)
|
||||
print(dataset.dev[340].question)
|
||||
print(dataset.dev[937].question)
|
||||
|
||||
"""
|
||||
What was the population of the city where Woodward Avenue ends in 2010?
|
||||
Where did the star , who is also an executive producer, of the Mick begin her carrer?
|
||||
16 1000 0
|
||||
Both London and German have seen attacks during war, there was one specific type of attack that Germany called the blitz, what did London call a similar attack?
|
||||
Pre-Madonna was a collection of demos by the singer who was a leading presence during the emergence of what network?
|
||||
Alan Mills composed the classic folk song that tells the story of what?
|
||||
"""
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
import dspy
|
||||
from dspy.evaluate.evaluate import Evaluate
|
||||
from dspy.primitives.example import Example
|
||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.shared.data_models import Answer
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognee.modules.cognify.dataset import HotPotQA
|
||||
|
||||
def evaluate():
|
||||
dataset = HotPotQA(
|
||||
train_seed = 1,
|
||||
train_size = 16,
|
||||
eval_seed = 2023,
|
||||
dev_size = 8,
|
||||
test_size = 0,
|
||||
keep_details = True,
|
||||
)
|
||||
|
||||
#Evaluate
|
||||
evaluate_examples = [
|
||||
Example(
|
||||
base = None,
|
||||
question = None,
|
||||
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
|
||||
answer = example.answer,
|
||||
) for example in dataset.dev
|
||||
]
|
||||
|
||||
devset = [example.with_inputs("context", "question") for example in evaluate_examples]
|
||||
|
||||
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
|
||||
|
||||
llm_config = get_llm_config()
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||
|
||||
def evaluate_answer(example, graph_prediction, trace = None):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
try:
|
||||
answer_prediction = llm_client.create_structured_output(
|
||||
text_input = example.question,
|
||||
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
|
||||
Use only the graph to answer the question and be very brief.
|
||||
This is the knowledge graph:
|
||||
{graph_prediction.graph.model_dump(mode = "json")}""",
|
||||
response_model = Answer,
|
||||
)
|
||||
except:
|
||||
return False
|
||||
|
||||
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
||||
dsp.passage_match([example.answer], [answer_prediction.answer])
|
||||
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
dspy.settings.configure(lm = gpt4)
|
||||
|
||||
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
evaluate()
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
import dspy
|
||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
def run():
|
||||
llm_config = get_llm_config()
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||
|
||||
text = """The 1985 FA Charity Shield (also known as the General Motors FA
|
||||
Charity Shield for sponsorship reasons) was the 63rd FA Charity Shield,
|
||||
an annual football match played between the winners of the previous
|
||||
season's First Division and FA Cup competitions. The match was played on
|
||||
10 August 1985 at Wembley Stadium and contested by Everton,
|
||||
who had won the 1984\u201385 First Division, and Manchester United,
|
||||
who had won the 1984\u201385 FA Cup. Everton won 2\u20130 with goals from
|
||||
Trevor Steven and Adrian Heath. Trevor Steven put Everton into the lead
|
||||
when he swept home from six yards after a cross from the left in the first half.
|
||||
The second goal came in the second half when Manchester United goalkeeper
|
||||
Gary Bailey dropped a cross from the left to allow Adrian Heath to tip the
|
||||
ball past him into the left corner of the net.\r\nThe 1995 FA Charity Shield
|
||||
(also known as the Littlewoods FA Charity Shield for sponsorship reasons) was the
|
||||
73rd FA Charity Shield, an annual football match played between the winners of
|
||||
the previous season's Premier League and FA Cup competitions. The match was
|
||||
played on 13 August 1995 at Wembley Stadium and contested by Blackburn Rovers,
|
||||
who had won the Premier League and FA Cup winners Everton. It was Blackburn's
|
||||
second successive Charity Shield appearance, while Everton were appearing in
|
||||
their eleventh and their first since 1987. Everton won the match 1\u20130
|
||||
with a goal from Vinny Samways when he caught Tim Flowers off his line and
|
||||
lifted the ball over him from the left of the penalty area and into the right
|
||||
corner of the net. Dave Watson lifted the trophy for Everton.\r\nThe 1972 FA
|
||||
Charity Shield was contested between Manchester City and Aston Villa.\r\nThe
|
||||
1997 FA Charity Shield (known as the Littlewoods FA Charity Shield for
|
||||
sponsorship reasons) was the 75th FA Charity Shield, an annual football match
|
||||
played between the winners of the previous season's Premier League and
|
||||
FA Cup competitions. The match was played on 3 August 1997 at Wembley Stadium
|
||||
and contested by Manchester United, who had won the 1996\u201397 FA Premier League,
|
||||
and Chelsea, who had won the 1996\u201397 FA Cup. Manchester United won the match
|
||||
4\u20132 on penalties after the match had finished at 1\u20131 after 90 minutes.
|
||||
\r\nThe 1956 FA Charity Shield was the 34th FA Charity Shield, an annual football
|
||||
match held between the winners of the previous season's Football League and
|
||||
FA Cup competitions. The match was contested by Manchester United, who had won
|
||||
the 1955\u201356 Football League, and Manchester City, who had won the
|
||||
1955\u201356 FA Cup, at Maine Road, Manchester, on 24 October 1956. Manchester
|
||||
United won the match 1\u20130, Dennis Viollet scoring the winning goal.
|
||||
Manchester United goalkeeper David Gaskell made his debut for the club during
|
||||
the game, taking the place of injured goalkeeper Ray Wood, and, at the age of
|
||||
16 years and 19 days, became the youngest player ever to play for the club.
|
||||
\r\nThe 1937 FA Charity Shield was the 24th FA Charity Shield, a football match
|
||||
between the winners of the previous season's First Division and FA Cup competitions.
|
||||
The match was contested by league champions Manchester City and FA Cup winners
|
||||
Sunderland, and was played at Maine Road, the home ground of Manchester City.
|
||||
Manchester City won the game, 2\u20130.\r\nThe 2000 FA Charity Shield (also known
|
||||
as the One 2 One FA Charity Shield for sponsorship reasons) was the
|
||||
78th FA Charity Shield, an annual football match played between the winners
|
||||
of the previous season's Premier League and FA Cup competitions. The match
|
||||
was played between Manchester United, who won the 1999\u20132000 Premier League,
|
||||
and Chelsea, who won the 1999\u20132000 FA Cup, and resulted in a 2\u20130 Chelsea win.
|
||||
The goals were scored by Jimmy Floyd Hasselbaink and Mario Melchiot. Roy Keane
|
||||
was sent off for a challenge on Gustavo Poyet and was the last person to be
|
||||
sent off at the old Wembley Stadium.\r\nThe 2001 FA Charity Shield (also known
|
||||
as the One 2 One FA Charity Shield for sponsorship reasons) was the 79th FA Charity Shield,
|
||||
an annual football match played between the winners of the previous season's
|
||||
Premier League and FA Cup. The match was contested between Liverpool, winners of
|
||||
the 2000\u201301 FA Cup and Manchester United, who won the 2000\u201301 Premier
|
||||
League on 12 August 2001. It was the first Shield match to be held at the
|
||||
Millennium Stadium following the closure of Wembley Stadium for reconstruction.
|
||||
\r\nAston Villa Football Club ( ; nicknamed Villa, The Villa, The Villans
|
||||
and The Lions) is a professional football club in Aston, Birmingham, that plays
|
||||
in the Championship, the second level of English football. Founded in 1874,
|
||||
they have played at their current home ground, Villa Park, since 1897. Aston Villa
|
||||
were one of the founder members of the Football League in 1888 and of the
|
||||
Premier League in 1992.\r\nThe 1996 FA Charity Shield (also known as the
|
||||
Littlewoods FA Charity Shield for sponsorship reasons) was the 74th FA Charity Shield,
|
||||
an annual football match played between the winners of the previous season's Premier
|
||||
League and FA Cup competitions. The match was played on 11 August 1996 at Wembley
|
||||
Stadium and contested by Manchester United, who had won the Double of Premier League
|
||||
and FA Cup in 1995\u201396, and Newcastle United, who had finished as runners-up
|
||||
in the Premier League. Manchester United won the match 4\u20130 with goals from
|
||||
Eric Cantona, Nicky Butt, David Beckham and Roy Keane."""
|
||||
|
||||
prediction = compiled_extract_knowledge_graph(context = text, question = "")
|
||||
|
||||
print(prediction.graph)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
import dspy
|
||||
from dspy.teleprompt import BootstrapFewShot
|
||||
from dspy.primitives.example import Example
|
||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
from cognee.shared.data_models import Answer
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognee.modules.cognify.dataset import HotPotQA
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
def train():
|
||||
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
|
||||
|
||||
dspy.configure(rm = colbertv2_wiki17_abstracts)
|
||||
|
||||
def evaluate_answer(example, graph_prediction, trace = None):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
try:
|
||||
answer_prediction = llm_client.create_structured_output(
|
||||
text_input = example.question,
|
||||
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
|
||||
Use only the graph to answer the question and be very brief.
|
||||
This is the knowledge graph:
|
||||
{graph_prediction.graph.model_dump(mode = "json")}""",
|
||||
response_model = Answer,
|
||||
)
|
||||
except:
|
||||
return False
|
||||
|
||||
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
||||
dsp.passage_match([example.answer], [answer_prediction.answer])
|
||||
|
||||
optimizer = BootstrapFewShot(metric = evaluate_answer)
|
||||
|
||||
dataset = HotPotQA(
|
||||
train_seed = 1,
|
||||
train_size = 16,
|
||||
eval_seed = 2023,
|
||||
dev_size = 8,
|
||||
test_size = 0,
|
||||
keep_details = True,
|
||||
)
|
||||
|
||||
# Train
|
||||
train_examples = [
|
||||
Example(
|
||||
base = None,
|
||||
question = example.question,
|
||||
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
|
||||
answer = example.answer,
|
||||
) for example in dataset.train
|
||||
]
|
||||
|
||||
trainset = [example.with_inputs("context", "question") for example in train_examples]
|
||||
|
||||
llm_config = get_llm_config()
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
|
||||
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
|
||||
|
||||
# Save program
|
||||
LocalStorage.ensure_directory_exists(get_absolute_path("./programs/extract_knowledge_graph"))
|
||||
compiled_extract_knowledge_graph.save(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
||||
Loading…
Add table
Reference in a new issue