forked from alibaba-damo-academy/MedEvalKit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmarks.py
More file actions
119 lines (93 loc) · 4.11 KB
/
benchmarks.py
File metadata and controls
119 lines (93 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from utils import (
Medbullets_op4,
Medbullets_op5,
MedXpertQA,
MMMU,
OmniMedVQA,
PATH_VQA,
PMC_VQA,
SLAKE,
SuperGPQA,
VQA_RAD,
HealthBench,
PubMedQA,
MedMCQA,
MedQA_USMLE,
MMLU,
CMB,
CMExam,
MedQA_MCMLE,
CMMLU,
IU_XRAY,
CheXpert_Plus,
MIMIC_CXR,
MedFrameQA,
Radrestruct,
)
#eval_MedQA_USMLE
def prepare_benchmark(model,eval_dataset,eval_dataset_path,eval_output_path):
# Hallu,Geometry3k
supported_dataset = ["MMMU-Medical-test","MMMU-Medical-val","PATH_VQA","PMC_VQA","VQA_RAD","SLAKE","MedQA_USMLE","MedMCQA","PubMedQA","OmniMedVQA","Medbullets_op4","Medbullets_op5","MedXpertQA-Text","MedXpertQA-MM","SuperGPQA""HealthBench","IU_XRAY","CheXpert_Plus","MIMIC_CXR","CMB","CMExam","CMMLU","MedQA_MCMLE","MedFrameQA"]
if eval_dataset in ["MMMU-Medical-test", "MMMU-Medical-val"]:
if eval_dataset_path:
eval_dataset_path = eval_dataset_path.replace(eval_dataset,"MMMU")
_ , subset , split = eval_dataset.split("-")
dataset = MMMU(model,eval_dataset_path,eval_output_path,split,subset)
elif eval_dataset == "PATH_VQA":
dataset = PATH_VQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "PMC_VQA":
dataset = PMC_VQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "VQA_RAD":
dataset = VQA_RAD(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "SLAKE":
dataset = SLAKE(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "OmniMedVQA":
dataset = OmniMedVQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "Medbullets_op4":
dataset = Medbullets_op4(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "Medbullets_op5":
dataset = Medbullets_op5(model,eval_dataset_path,eval_output_path)
elif eval_dataset in ["MedXpertQA-Text","MedXpertQA-MM"]:
if eval_dataset_path:
eval_dataset_path = eval_dataset_path.replace(eval_dataset,"MedXpertQA")
_,split = eval_dataset.split("-")
dataset = MedXpertQA(model,eval_dataset_path,eval_output_path,split)
elif eval_dataset == "SuperGPQA":
dataset = SuperGPQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "HealthBench":
dataset =HealthBench(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "PubMedQA":
dataset = PubMedQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "MedMCQA":
dataset = MedMCQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "MedQA_USMLE":
dataset = MedQA_USMLE(model,eval_dataset_path,eval_output_path)
elif eval_dataset in ["MMLU-medical","MMLU-all"]:
if eval_dataset_path:
eval_dataset_path = eval_dataset_path.replace(eval_dataset,"MMLU")
_,subject = eval_dataset.split("-")
dataset = MMLU(model,eval_dataset_path,eval_output_path,subject)
elif eval_dataset == "CMB":
dataset = CMB(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "CMExam":
dataset = CMExam(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "MedQA_MCMLE":
dataset = MedQA_MCMLE(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "CMMLU":
dataset = CMMLU(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "IU_XRAY":
dataset = IU_XRAY(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "CheXpert_Plus":
dataset = CheXpert_Plus(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "MIMIC_CXR":
dataset = MIMIC_CXR(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "MedFrameQA":
dataset = MedFrameQA(model,eval_dataset_path,eval_output_path)
elif eval_dataset == "Radrestruct":
dataset = Radrestruct(model,eval_dataset_path,eval_output_path)
else:
print(f"unknown eval dataset {eval_dataset}, we only support {supported_dataset}")
dataset = None
return dataset
if __name__ == '__main__':
pass