Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions lm_eval/tasks/gsm_symbolic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from lm_eval.api.task import TaskConfig, get_task_dict
from lm_eval.tasks.gsm_symbolic.gsm_symbolic import *
from lm_eval.tasks.gsm_symbolic.gsm_symbolic_cot import *

CONFIG = TaskConfig(
name="gsm_symbolic",
description="GSM-Symbolic math word problems dataset",
keywords=["math", "word problems", "symbolic reasoning"],
metrics=["exact_match"],
)

CONFIG_COT = TaskConfig(
name="gsm_symbolic_cot",
description="GSM-Symbolic math word problems dataset with Chain-of-Thought prompting",
keywords=["math", "word problems", "symbolic reasoning", "chain of thought"],
metrics=["exact_match"],
)
45 changes: 45 additions & 0 deletions lm_eval/tasks/gsm_symbolic/gsm_symbolic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
tag:
- math_word_problems
task: gsm_symbolic
dataset_path: apple/GSM-Symbolic
dataset_name: p1
output_type: generate_until
training_split: test
fewshot_split: test
test_split: test
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{answer}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: false
regexes_to_ignore:
- ","
- "\\$"
- "(?s).*#### "
- "\\.$"
generation_kwargs:
until:
- "Question:"
- "</s>"
- "<|end|>"
do_sample: false
temperature: 0.0
repeats: 1
num_fewshot: 5
filter_list:
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "#### (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: -1
regex_pattern: "(-?[$0-9.,]{2,})|(-?[0-9]+)"
- function: "take_first"
metadata:
version: 1.0
46 changes: 46 additions & 0 deletions lm_eval/tasks/gsm_symbolic/gsm_symbolic_cot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
tag:
- math_word_problems
- chain_of_thought
task: gsm_symbolic_cot
dataset_path: apple/GSM-Symbolic
dataset_name: p1
output_type: generate_until
training_split: test
fewshot_split: test
test_split: test
doc_to_text: "Question: {{question}}\nAnswer: Let's think step by step."
doc_to_target: "{{answer}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: false
regexes_to_ignore:
- ","
- "\\$"
- "(?s).*#### "
- "\\.$"
generation_kwargs:
until:
- "Question:"
- "</s>"
- "<|end|>"
do_sample: false
temperature: 0.0
repeats: 1
num_fewshot: 5
filter_list:
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "#### (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: -1
regex_pattern: "(-?[$0-9.,]{2,})|(-?[0-9]+)"
- function: "take_first"
metadata:
version: 1.0