Skip to content

Commit

Permalink
[Feature] Longbench dataset update
Browse files Browse the repository at this point in the history
  • Loading branch information
MaiziXiao authored Sep 6, 2024
1 parent 928d0cf commit 87ffa71
Show file tree
Hide file tree
Showing 64 changed files with 730 additions and 437 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_2wikimqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
dict(
role='HUMAN',
prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=32)
inferencer=dict(type=GenInferencer, max_out_len=32),
)

LongBench_2wikimqa_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT'
)

LongBench_2wikimqa_datasets = [
dict(
type=LongBench2wikimqaDataset,
abbr='LongBench_2wikimqa',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='2wikimqa',
reader_cfg=LongBench_2wikimqa_reader_cfg,
infer_cfg=LongBench_2wikimqa_infer_cfg,
eval_cfg=LongBench_2wikimqa_eval_cfg)
eval_cfg=LongBench_2wikimqa_eval_cfg,
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_dureader_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:'),
], )),
dict(
role='HUMAN',
prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=128)
inferencer=dict(type=GenInferencer, max_out_len=128),
)

LongBench_dureader_eval_cfg = dict(
evaluator=dict(type=LongBenchRougeEvaluator, language='zh'),
pred_role='BOT'
evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), pred_role='BOT'
)

LongBench_dureader_datasets = [
dict(
type=LongBenchdureaderDataset,
abbr='LongBench_dureader',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='dureader',
reader_cfg=LongBench_dureader_reader_cfg,
infer_cfg=LongBench_dureader_infer_cfg,
eval_cfg=LongBench_dureader_eval_cfg)
eval_cfg=LongBench_dureader_eval_cfg,
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_gov_report_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:'),
], )),
dict(
role='HUMAN',
prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
inferencer=dict(type=GenInferencer, max_out_len=512),
)

LongBench_gov_report_eval_cfg = dict(
evaluator=dict(type=LongBenchRougeEvaluator),
pred_role='BOT'
evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT'
)

LongBench_gov_report_datasets = [
dict(
type=LongBenchgov_reportDataset,
abbr='LongBench_gov_report',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='gov_report',
reader_cfg=LongBench_gov_report_reader_cfg,
infer_cfg=LongBench_gov_report_infer_cfg,
eval_cfg=LongBench_gov_report_eval_cfg)
eval_cfg=LongBench_gov_report_eval_cfg,
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_hotpotqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
dict(
role='HUMAN',
prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=32)
inferencer=dict(type=GenInferencer, max_out_len=32),
)

LongBench_hotpotqa_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT'
)

LongBench_hotpotqa_datasets = [
dict(
type=LongBenchhotpotqaDataset,
abbr='LongBench_hotpotqa',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='hotpotqa',
reader_cfg=LongBench_hotpotqa_reader_cfg,
infer_cfg=LongBench_hotpotqa_infer_cfg,
eval_cfg=LongBench_hotpotqa_eval_cfg)
eval_cfg=LongBench_hotpotqa_eval_cfg,
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_lcc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Please complete the code given below. \n{context}Next line of code:\n'),
], )),
dict(
role='HUMAN',
prompt='Please complete the code given below. \n{context}Next line of code:\n',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
inferencer=dict(type=GenInferencer, max_out_len=64),
)

LongBench_lcc_eval_cfg = dict(
evaluator=dict(type=LongBenchCodeSimEvaluator),
pred_role='BOT'
evaluator=dict(type=LongBenchCodeSimEvaluator), pred_role='BOT'
)

LongBench_lcc_datasets = [
dict(
type=LongBenchlccDataset,
abbr='LongBench_lcc',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='lcc',
reader_cfg=LongBench_lcc_reader_cfg,
infer_cfg=LongBench_lcc_infer_cfg,
eval_cfg=LongBench_lcc_eval_cfg)
eval_cfg=LongBench_lcc_eval_cfg,
)
]
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchlshtDataset, lsht_postprocess
from opencompass.datasets import (
LongBenchClassificationEvaluator,
LongBenchlshtDataset,
lsht_postprocess,
)

LongBench_lsht_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='all_labels',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_lsht_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}'),
], )),
dict(
role='HUMAN',
prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
inferencer=dict(type=GenInferencer, max_out_len=64),
)

LongBench_lsht_eval_cfg = dict(
Expand All @@ -31,9 +40,10 @@
dict(
type=LongBenchlshtDataset,
abbr='LongBench_lsht',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='lsht',
reader_cfg=LongBench_lsht_reader_cfg,
infer_cfg=LongBench_lsht_infer_cfg,
eval_cfg=LongBench_lsht_eval_cfg)
eval_cfg=LongBench_lsht_eval_cfg,
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_multi_news_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n'),
], )),
dict(
role='HUMAN',
prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
inferencer=dict(type=GenInferencer, max_out_len=512),
)

LongBench_multi_news_eval_cfg = dict(
evaluator=dict(type=LongBenchRougeEvaluator),
pred_role='BOT'
evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT'
)

LongBench_multi_news_datasets = [
dict(
type=LongBenchmulti_newsDataset,
abbr='LongBench_multi_news',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='multi_news',
reader_cfg=LongBench_multi_news_reader_cfg,
infer_cfg=LongBench_multi_news_infer_cfg,
eval_cfg=LongBench_multi_news_eval_cfg)
eval_cfg=LongBench_multi_news_eval_cfg,
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,37 @@
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
test_split='test',
)

LongBench_multifieldqa_en_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
dict(
role='HUMAN',
prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
inferencer=dict(type=GenInferencer, max_out_len=64),
)

LongBench_multifieldqa_en_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT'
)

LongBench_multifieldqa_en_datasets = [
dict(
type=LongBenchmultifieldqa_enDataset,
abbr='LongBench_multifieldqa_en',
path='THUDM/LongBench',
path='opencompass/Longbench',
name='multifieldqa_en',
reader_cfg=LongBench_multifieldqa_en_reader_cfg,
infer_cfg=LongBench_multifieldqa_en_infer_cfg,
eval_cfg=LongBench_multifieldqa_en_eval_cfg)
eval_cfg=LongBench_multifieldqa_en_eval_cfg,
)
]
Loading

0 comments on commit 87ffa71

Please sign in to comment.