-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflair-log-parser-pos.py
89 lines (58 loc) · 2.33 KB
/
flair-log-parser-pos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import re
import sys
import numpy as np
from collections import defaultdict
from pathlib import Path
pattern = sys.argv[1]
log_dirs = Path("./").rglob(f"{pattern}")
dev_results = defaultdict(list)
test_results = defaultdict(list)
for log_dir in log_dirs:
training_log = log_dir / "training.log"
if not training_log.exists():
print(f"No training.log found in {log_dir}")
matches = re.match(".*(bs.*?)-(ws.*?)-(e.*?)-(lr.*?)-layers-1-crfFalse-(\d+)", str(log_dir))
batch_size = matches.group(1)
ws = matches.group(2)
epochs = matches.group(3)
lr = matches.group(4)
seed = matches.group(5)
result_identifier = f"{ws}-{batch_size}-{epochs}-{lr}"
with open(training_log, "rt") as f_p:
all_dev_results = []
for line in f_p:
line = line.rstrip()
if "accuracy (micro avg)" in line:
dev_result = float(line.split(" ")[-1])
all_dev_results.append(dev_result)
if "- Accuracy" in line:
test_result = float(line.split(" ")[-1])
test_results[result_identifier].append(test_result)
best_dev_result = max(all_dev_results)
dev_results[result_identifier].append(best_dev_result)
mean_dev_results = {}
print("Debug:", dev_results)
for dev_result in dev_results.items():
result_identifier, results = dev_result
mean_result = np.mean(results)
mean_dev_results[result_identifier] = mean_result
print("Averaged Development Results:")
sorted_mean_dev_results = dict(sorted(mean_dev_results.items(), key=lambda item: item[1], reverse=True))
for mean_dev_config, score in sorted_mean_dev_results.items():
print(f"{mean_dev_config} : {round(score * 100, 2)}")
best_dev_configuration = max(mean_dev_results, key=mean_dev_results.get)
print("")
print("Best configuration:", best_dev_configuration)
print("")
print("Best Development Score:",
round(mean_dev_results[best_dev_configuration] * 100, 2),
"±",
round(np.std(dev_results[best_dev_configuration]) * 100, 2)
)
print("")
final_test_results = test_results[best_dev_configuration]
print("Final Test Score (based on best configuration on development set):",
round(np.mean(final_test_results) * 100, 2),
"±",
round(np.std(final_test_results) * 100, 2)
)