-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregate_metrics
3 lines (3 loc) · 1.21 KB
/
aggregate_metrics
1
2
3
{"aggregate_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.mmlu_scenario.MMLUScenario", "args": {"subject": "philosophy"}}, "split": "valid"}, "overlap_protocol_spec": {"n": 13}}, "part": "input"}, "metric_scores": [1.0], "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 0, "weighting": false}}}
{"aggregate_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.mmlu_scenario.MMLUScenario", "args": {"subject": "philosophy"}}, "split": "valid"}, "overlap_protocol_spec": {"n": 13}}, "part": "input"}, "metric_scores": [0.9545454545454546], "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": false}}}
{"aggregate_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.mmlu_scenario.MMLUScenario", "args": {"subject": "philosophy"}}, "split": "valid"}, "overlap_protocol_spec": {"n": 13}}, "part": "input"}, "metric_scores": [0.9705882352941176], "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": false}}}