-
Notifications
You must be signed in to change notification settings - Fork 0
/
sample_metrics_file
40 lines (40 loc) · 19.9 KB
/
sample_metrics_file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.007751937984496124, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.003875968992248062, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.09219858156028368, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.04609929078014184, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.007751937984496124, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.003875968992248062, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.09219858156028368, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id103"}, "overlap_metric": {"metric_score": 0.04609929078014184, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.375, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.375, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.6428571428571429, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.6428571428571429, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.375, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.375, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.6428571428571429, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "intermediate_algebra", "level": 1, "use_official_examples": false, "use_chain_of_thought": true}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "references", "instance_id": "id105"}, "overlap_metric": {"metric_score": 0.6428571428571429, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.2, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.045, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.6363636363636364, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.1590909090909091, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.2, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.045, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.6363636363636364, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.math_scenario.MATHScenario", "args": {"subject": "number_theory", "level": 1, "use_official_examples": true, "use_chain_of_thought": false}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id32"}, "overlap_metric": {"metric_score": 0.1590909090909091, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.11764705882352941, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.11764705882352941, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.4827586206896552, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.4827586206896552, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 0, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 1, "metric_protocol_spec": {"partial_overlap_spec": 0, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.11764705882352941, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.11764705882352941, "metric_protocol_spec": {"partial_overlap_spec": 1, "frequency_spec": {"filter_value": 10, "weighting": true}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.4827586206896552, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": false}}}}
{"entry_data_overlap_key": {"stats_key": {"light_scenario_key": {"scenario_spec": {"class_name": "helm.benchmark.scenarios.raft_scenario.RAFTScenario", "args": {"subset": "overruling"}}, "split": "test"}, "overlap_protocol_spec": {"n": 13}}, "part": "input", "instance_id": "id24"}, "overlap_metric": {"metric_score": 0.4827586206896552, "metric_protocol_spec": {"partial_overlap_spec": 2, "frequency_spec": {"filter_value": 10, "weighting": true}}}}