forked from pengzhiliang/MAE-pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pretrain_large_0.75_400e.txt
399 lines (399 loc) · 103 KB
/
pretrain_large_0.75_400e.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
{"train_lr": 2.9906242487378794e-05, "train_min_lr": 2.9906242487378794e-05, "train_loss": 0.992053489266441, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.045492654669289596, "epoch": 0, "n_parameters": 329209088}
{"train_lr": 8.991105056494909e-05, "train_min_lr": 8.991105056494909e-05, "train_loss": 0.9692792168830354, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.1093189464464115, "epoch": 1, "n_parameters": 329209088}
{"train_lr": 0.0001499158586425194, "train_min_lr": 0.0001499158586425194, "train_loss": 0.9407013587486477, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.16605017766451988, "epoch": 2, "n_parameters": 329209088}
{"train_lr": 0.00020992066672008975, "train_min_lr": 0.00020992066672008975, "train_loss": 0.8689059213830683, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.28281013352366596, "epoch": 3, "n_parameters": 329209088}
{"train_lr": 0.00026992547479766013, "train_min_lr": 0.00026992547479766013, "train_loss": 0.8189738675032575, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.26077120684278315, "epoch": 4, "n_parameters": 329209088}
{"train_lr": 0.00032993028287523027, "train_min_lr": 0.00032993028287523027, "train_loss": 0.7901057703778721, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.20059755356170428, "epoch": 5, "n_parameters": 329209088}
{"train_lr": 0.0003899350909528006, "train_min_lr": 0.0003899350909528006, "train_loss": 0.7649512290715789, "train_loss_scale": 104395.48717948717, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.1551318422007637, "epoch": 6, "n_parameters": 329209088}
{"train_lr": 0.00044993989903037104, "train_min_lr": 0.00044993989903037104, "train_loss": 0.7463964715970155, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.12646535741022, "epoch": 7, "n_parameters": 329209088}
{"train_lr": 0.0005099447071079412, "train_min_lr": 0.0005099447071079412, "train_loss": 0.7324579154284527, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.11009813217111887, "epoch": 8, "n_parameters": 329209088}
{"train_lr": 0.0005699495151855116, "train_min_lr": 0.0005699495151855116, "train_loss": 0.722336692060941, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0991063874740249, "epoch": 9, "n_parameters": 329209088}
{"train_lr": 0.0006299543232630819, "train_min_lr": 0.0006299543232630819, "train_loss": 0.7136334531570379, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.08660566013975021, "epoch": 10, "n_parameters": 329209088}
{"train_lr": 0.0006899591313406521, "train_min_lr": 0.0006899591313406521, "train_loss": 0.7089173261983654, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0938262621848247, "epoch": 11, "n_parameters": 329209088}
{"train_lr": 0.0007499639394182229, "train_min_lr": 0.0007499639394182229, "train_loss": 0.701731875914937, "train_loss_scale": 155017.84615384616, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.07424946780053851, "epoch": 12, "n_parameters": 329209088}
{"train_lr": 0.0008099687474957929, "train_min_lr": 0.0008099687474957929, "train_loss": 0.6992002461666766, "train_loss_scale": 223914.66666666666, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 13, "n_parameters": 329209088}
{"train_lr": 0.0008699735555733632, "train_min_lr": 0.0008699735555733632, "train_loss": 0.6940862127305127, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0673549733697795, "epoch": 14, "n_parameters": 329209088}
{"train_lr": 0.0009299783636509335, "train_min_lr": 0.0009299783636509335, "train_loss": 0.6911372066690371, "train_loss_scale": 113427.69230769231, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 15, "n_parameters": 329209088}
{"train_lr": 0.0009899831717285039, "train_min_lr": 0.0009899831717285039, "train_loss": 0.687774730577635, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06226646937191104, "epoch": 16, "n_parameters": 329209088}
{"train_lr": 0.001049987979806074, "train_min_lr": 0.001049987979806074, "train_loss": 0.6849430643607122, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05856762180486933, "epoch": 17, "n_parameters": 329209088}
{"train_lr": 0.0011099927878836444, "train_min_lr": 0.0011099927878836444, "train_loss": 0.6832775899973245, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06119824143556448, "epoch": 18, "n_parameters": 329209088}
{"train_lr": 0.0011699975959612145, "train_min_lr": 0.0011699975959612145, "train_loss": 0.6807932547496583, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.055511064015519924, "epoch": 19, "n_parameters": 329209088}
{"train_lr": 0.0012300024040387849, "train_min_lr": 0.0012300024040387849, "train_loss": 0.6789753519488164, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05152321100617067, "epoch": 20, "n_parameters": 329209088}
{"train_lr": 0.0012900072121163552, "train_min_lr": 0.0012900072121163552, "train_loss": 0.6773042290005833, "train_loss_scale": 65536.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05118310434194521, "epoch": 21, "n_parameters": 329209088}
{"train_lr": 0.0013500120201939251, "train_min_lr": 0.0013500120201939251, "train_loss": 0.6757971871441278, "train_loss_scale": 121829.7435897436, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04732887418224262, "epoch": 22, "n_parameters": 329209088}
{"train_lr": 0.0014100168282714964, "train_min_lr": 0.0014100168282714964, "train_loss": 0.674330414374335, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04591951510295845, "epoch": 23, "n_parameters": 329209088}
{"train_lr": 0.001470021636349066, "train_min_lr": 0.001470021636349066, "train_loss": 0.6730096518444136, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04374958989091027, "epoch": 24, "n_parameters": 329209088}
{"train_lr": 0.0015300264444266366, "train_min_lr": 0.0015300264444266366, "train_loss": 0.6722819894516411, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.048778077181524195, "epoch": 25, "n_parameters": 329209088}
{"train_lr": 0.0015900312525042061, "train_min_lr": 0.0015900312525042061, "train_loss": 0.6709313489950429, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04038288148764808, "epoch": 26, "n_parameters": 329209088}
{"train_lr": 0.0016500360605817771, "train_min_lr": 0.0016500360605817771, "train_loss": 0.6697047311478318, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0391668945682259, "epoch": 27, "n_parameters": 329209088}
{"train_lr": 0.0017100408686593481, "train_min_lr": 0.0017100408686593481, "train_loss": 0.6692729077648181, "train_loss_scale": 189886.35897435897, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03930207984283184, "epoch": 28, "n_parameters": 329209088}
{"train_lr": 0.0017700456767369176, "train_min_lr": 0.0017700456767369176, "train_loss": 0.6682791179881837, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.037641401313102014, "epoch": 29, "n_parameters": 329209088}
{"train_lr": 0.0018300504848144882, "train_min_lr": 0.0018300504848144882, "train_loss": 0.6675869743100916, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.035335088757654794, "epoch": 30, "n_parameters": 329209088}
{"train_lr": 0.001890055292892058, "train_min_lr": 0.001890055292892058, "train_loss": 0.6667553564617171, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03407868015197798, "epoch": 31, "n_parameters": 329209088}
{"train_lr": 0.0019500601009696296, "train_min_lr": 0.0019500601009696296, "train_loss": 0.6660889951416697, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.032953488831527725, "epoch": 32, "n_parameters": 329209088}
{"train_lr": 0.0020100649090471997, "train_min_lr": 0.0020100649090471997, "train_loss": 0.6653759362868582, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03271053924273031, "epoch": 33, "n_parameters": 329209088}
{"train_lr": 0.002070069717124769, "train_min_lr": 0.002070069717124769, "train_loss": 0.6649193805284225, "train_loss_scale": 272226.46153846156, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.031562969082584366, "epoch": 34, "n_parameters": 329209088}
{"train_lr": 0.0021300745252023395, "train_min_lr": 0.0021300745252023395, "train_loss": 0.664195560474092, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030752416294163618, "epoch": 35, "n_parameters": 329209088}
{"train_lr": 0.0021900793332799103, "train_min_lr": 0.0021900793332799103, "train_loss": 0.6637147109394368, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030690982066190396, "epoch": 36, "n_parameters": 329209088}
{"train_lr": 0.00225008414135748, "train_min_lr": 0.00225008414135748, "train_loss": 0.6631887465333327, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02963579850844466, "epoch": 37, "n_parameters": 329209088}
{"train_lr": 0.002310088949435051, "train_min_lr": 0.002310088949435051, "train_loss": 0.6626990107664218, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02957984510785303, "epoch": 38, "n_parameters": 329209088}
{"train_lr": 0.0023700937575126205, "train_min_lr": 0.0023700937575126205, "train_loss": 0.6624330086335062, "train_loss_scale": 454130.8717948718, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 39, "n_parameters": 329209088}
{"train_lr": 0.002399984905490592, "train_min_lr": 0.002399984905490592, "train_loss": 0.6618415443047595, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027928925704402037, "epoch": 40, "n_parameters": 329209088}
{"train_lr": 0.002399894048603015, "train_min_lr": 0.002399894048603015, "train_loss": 0.6611817150125996, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027318577664211776, "epoch": 41, "n_parameters": 329209088}
{"train_lr": 0.002399712195907412, "train_min_lr": 0.002399712195907412, "train_loss": 0.6604067583222133, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0273476915481763, "epoch": 42, "n_parameters": 329209088}
{"train_lr": 0.0023994393612525775, "train_min_lr": 0.0023994393612525775, "train_loss": 0.6599090463386323, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02735026071492869, "epoch": 43, "n_parameters": 329209088}
{"train_lr": 0.0023990755654159225, "train_min_lr": 0.0023990755654159225, "train_loss": 0.6592303584926785, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02645150800116169, "epoch": 44, "n_parameters": 329209088}
{"train_lr": 0.00239862083610191, "train_min_lr": 0.00239862083610191, "train_loss": 0.6587562142596699, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026439578869403936, "epoch": 45, "n_parameters": 329209088}
{"train_lr": 0.002398075207939935, "train_min_lr": 0.002398075207939935, "train_loss": 0.6579734718200202, "train_loss_scale": 231476.5128205128, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025895795038638588, "epoch": 46, "n_parameters": 329209088}
{"train_lr": 0.002397438722481704, "train_min_lr": 0.002397438722481704, "train_loss": 0.6576415425500809, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025771908939648897, "epoch": 47, "n_parameters": 329209088}
{"train_lr": 0.002396711428198033, "train_min_lr": 0.002396711428198033, "train_loss": 0.6571225086883761, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025198384188115597, "epoch": 48, "n_parameters": 329209088}
{"train_lr": 0.00239589338047521, "train_min_lr": 0.00239589338047521, "train_loss": 0.6565848011105584, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025053061526985124, "epoch": 49, "n_parameters": 329209088}
{"train_lr": 0.0023949846416107326, "train_min_lr": 0.0023949846416107326, "train_loss": 0.6561868981016465, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02536077789651851, "epoch": 50, "n_parameters": 329209088}
{"train_lr": 0.002393985280808584, "train_min_lr": 0.002393985280808584, "train_loss": 0.6558876298248577, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025169681184566937, "epoch": 51, "n_parameters": 329209088}
{"train_lr": 0.002392895374173956, "train_min_lr": 0.002392895374173956, "train_loss": 0.6553867289808412, "train_loss_scale": 355406.76923076925, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025806471914387286, "epoch": 52, "n_parameters": 329209088}
{"train_lr": 0.002391715004707465, "train_min_lr": 0.002391715004707465, "train_loss": 0.6549916125547427, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02560123818544432, "epoch": 53, "n_parameters": 329209088}
{"train_lr": 0.0023904442622988075, "train_min_lr": 0.0023904442622988075, "train_loss": 0.6546838582159044, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025092326988203403, "epoch": 54, "n_parameters": 329209088}
{"train_lr": 0.002389083243719943, "train_min_lr": 0.002389083243719943, "train_loss": 0.6543559112872642, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025713752799977858, "epoch": 55, "n_parameters": 329209088}
{"train_lr": 0.002387632052617705, "train_min_lr": 0.002387632052617705, "train_loss": 0.6539474247345844, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025064760765347343, "epoch": 56, "n_parameters": 329209088}
{"train_lr": 0.002386090799505915, "train_min_lr": 0.002386090799505915, "train_loss": 0.6536016984818838, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024740835968166206, "epoch": 57, "n_parameters": 329209088}
{"train_lr": 0.002384459601756962, "train_min_lr": 0.002384459601756962, "train_loss": 0.6533582593815831, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024702626334216733, "epoch": 58, "n_parameters": 329209088}
{"train_lr": 0.0023827385835928716, "train_min_lr": 0.0023827385835928716, "train_loss": 0.6529816572124568, "train_loss_scale": 1020009.0256410256, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02484755742793473, "epoch": 59, "n_parameters": 329209088}
{"train_lr": 0.0023809278760758418, "train_min_lr": 0.0023809278760758418, "train_loss": 0.6530672328916785, "train_loss_scale": 1003204.9230769231, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 60, "n_parameters": 329209088}
{"train_lr": 0.0023790276170982585, "train_min_lr": 0.0023790276170982585, "train_loss": 0.6524543827459311, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02469511776087949, "epoch": 61, "n_parameters": 329209088}
{"train_lr": 0.002377037951372201, "train_min_lr": 0.002377037951372201, "train_loss": 0.6522959073205502, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024549558048303693, "epoch": 62, "n_parameters": 329209088}
{"train_lr": 0.0023749590304184146, "train_min_lr": 0.0023749590304184146, "train_loss": 0.652119710151918, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024972357673761554, "epoch": 63, "n_parameters": 329209088}
{"train_lr": 0.002372791012554783, "train_min_lr": 0.002372791012554783, "train_loss": 0.6516253335926777, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02455379637794044, "epoch": 64, "n_parameters": 329209088}
{"train_lr": 0.0023705340628842582, "train_min_lr": 0.0023705340628842582, "train_loss": 0.6515252530061377, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025259264303036988, "epoch": 65, "n_parameters": 329209088}
{"train_lr": 0.002368188353282295, "train_min_lr": 0.002368188353282295, "train_loss": 0.6514551702600259, "train_loss_scale": 147035.89743589744, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 66, "n_parameters": 329209088}
{"train_lr": 0.002365754062383764, "train_min_lr": 0.002365754062383764, "train_loss": 0.6509612936060876, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02523881210706746, "epoch": 67, "n_parameters": 329209088}
{"train_lr": 0.0023632313755693403, "train_min_lr": 0.0023632313755693403, "train_loss": 0.6509679737978448, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026225544983903185, "epoch": 68, "n_parameters": 329209088}
{"train_lr": 0.0023606204849513923, "train_min_lr": 0.0023606204849513923, "train_loss": 0.6506234402589214, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024925658968874276, "epoch": 69, "n_parameters": 329209088}
{"train_lr": 0.002357921589359349, "train_min_lr": 0.002357921589359349, "train_loss": 0.6505284958328001, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024706828360183116, "epoch": 70, "n_parameters": 329209088}
{"train_lr": 0.002355134894324556, "train_min_lr": 0.002355134894324556, "train_loss": 0.6501374820318933, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025143304326308843, "epoch": 71, "n_parameters": 329209088}
{"train_lr": 0.002352260612064637, "train_min_lr": 0.002352260612064637, "train_loss": 0.6499877903335847, "train_loss_scale": 192406.97435897434, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025103452866180584, "epoch": 72, "n_parameters": 329209088}
{"train_lr": 0.002349298961467303, "train_min_lr": 0.002349298961467303, "train_loss": 0.6498041582198288, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024874905625788066, "epoch": 73, "n_parameters": 329209088}
{"train_lr": 0.0023462501680737214, "train_min_lr": 0.0023462501680737214, "train_loss": 0.6496891575949028, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0250762593275748, "epoch": 74, "n_parameters": 329209088}
{"train_lr": 0.002343114464061315, "train_min_lr": 0.002343114464061315, "train_loss": 0.6497200881369795, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026134809634337824, "epoch": 75, "n_parameters": 329209088}
{"train_lr": 0.0023398920882260785, "train_min_lr": 0.0023398920882260785, "train_loss": 0.649291190211303, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025238819754658602, "epoch": 76, "n_parameters": 329209088}
{"train_lr": 0.002336583285964409, "train_min_lr": 0.002336583285964409, "train_loss": 0.6491293019662874, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.024897558375810966, "epoch": 77, "n_parameters": 329209088}
{"train_lr": 0.0023331883092544124, "train_min_lr": 0.0023331883092544124, "train_loss": 0.6490245730592272, "train_loss_scale": 277267.6923076923, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025258563841000583, "epoch": 78, "n_parameters": 329209088}
{"train_lr": 0.0023297074166367046, "train_min_lr": 0.0023297074166367046, "train_loss": 0.6488851320523864, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02498620083460059, "epoch": 79, "n_parameters": 329209088}
{"train_lr": 0.0023261408731947413, "train_min_lr": 0.0023261408731947413, "train_loss": 0.6487186426858012, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026786060567993004, "epoch": 80, "n_parameters": 329209088}
{"train_lr": 0.002322488950534608, "train_min_lr": 0.002322488950534608, "train_loss": 0.6485617144379574, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025100841831702452, "epoch": 81, "n_parameters": 329209088}
{"train_lr": 0.0023187519267643627, "train_min_lr": 0.0023187519267643627, "train_loss": 0.6483423545932732, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02578068504898021, "epoch": 82, "n_parameters": 329209088}
{"train_lr": 0.0023149300864728226, "train_min_lr": 0.0023149300864728226, "train_loss": 0.6483991448170482, "train_loss_scale": 321798.5641025641, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 83, "n_parameters": 329209088}
{"train_lr": 0.00231102372070793, "train_min_lr": 0.00231102372070793, "train_loss": 0.6483515744897513, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026304703420744494, "epoch": 84, "n_parameters": 329209088}
{"train_lr": 0.002307033126954561, "train_min_lr": 0.002307033126954561, "train_loss": 0.6480086933200558, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02522221313885007, "epoch": 85, "n_parameters": 329209088}
{"train_lr": 0.002302958609111882, "train_min_lr": 0.002302958609111882, "train_loss": 0.6478776008320543, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026340058545032755, "epoch": 86, "n_parameters": 329209088}
{"train_lr": 0.002298800477470194, "train_min_lr": 0.002298800477470194, "train_loss": 0.6476758741475164, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025405832703631274, "epoch": 87, "n_parameters": 329209088}
{"train_lr": 0.00229455904868733, "train_min_lr": 0.00229455904868733, "train_loss": 0.6476719009522827, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025368646569311235, "epoch": 88, "n_parameters": 329209088}
{"train_lr": 0.0022902346457645086, "train_min_lr": 0.0022902346457645086, "train_loss": 0.6475359046270546, "train_loss_scale": 357087.1794871795, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025890456590180595, "epoch": 89, "n_parameters": 329209088}
{"train_lr": 0.0022858275980217526, "train_min_lr": 0.0022858275980217526, "train_loss": 0.6474509262957443, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027318660677291263, "epoch": 90, "n_parameters": 329209088}
{"train_lr": 0.0022813382410728175, "train_min_lr": 0.0022813382410728175, "train_loss": 0.6472241824361472, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025515205352208935, "epoch": 91, "n_parameters": 329209088}
{"train_lr": 0.0022767669167996097, "train_min_lr": 0.0022767669167996097, "train_loss": 0.6470353854461931, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025716939881348457, "epoch": 92, "n_parameters": 329209088}
{"train_lr": 0.0022721139733261745, "train_min_lr": 0.0022721139733261745, "train_loss": 0.6468503166837857, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025778588217993576, "epoch": 93, "n_parameters": 329209088}
{"train_lr": 0.00226737976499217, "train_min_lr": 0.00226737976499217, "train_loss": 0.6468086960152365, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02574768583051478, "epoch": 94, "n_parameters": 329209088}
{"train_lr": 0.0022625646523258902, "train_min_lr": 0.0022625646523258902, "train_loss": 0.6466721153925531, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02605553028675226, "epoch": 95, "n_parameters": 329209088}
{"train_lr": 0.002257669002016808, "train_min_lr": 0.002257669002016808, "train_loss": 0.6466963238751468, "train_loss_scale": 720896.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 96, "n_parameters": 329209088}
{"train_lr": 0.002252693186887647, "train_min_lr": 0.002252693186887647, "train_loss": 0.6463773876476364, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0254454029401621, "epoch": 97, "n_parameters": 329209088}
{"train_lr": 0.0022476375858659953, "train_min_lr": 0.0022476375858659953, "train_loss": 0.6464182320468796, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02652996536105489, "epoch": 98, "n_parameters": 329209088}
{"train_lr": 0.0022425025839554473, "train_min_lr": 0.0022425025839554473, "train_loss": 0.6462093366918942, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02581791605394429, "epoch": 99, "n_parameters": 329209088}
{"train_lr": 0.002237288572206275, "train_min_lr": 0.002237288572206275, "train_loss": 0.6461507878385675, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02692990412768454, "epoch": 100, "n_parameters": 329209088}
{"train_lr": 0.00223199594768566, "train_min_lr": 0.00223199594768566, "train_loss": 0.6463096185694807, "train_loss_scale": 492360.2051282051, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 101, "n_parameters": 329209088}
{"train_lr": 0.002226625113447457, "train_min_lr": 0.002226625113447457, "train_loss": 0.6458756862900769, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02566970758832609, "epoch": 102, "n_parameters": 329209088}
{"train_lr": 0.0022211764785014763, "train_min_lr": 0.0022211764785014763, "train_loss": 0.6458334285371865, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02635402318973763, "epoch": 103, "n_parameters": 329209088}
{"train_lr": 0.0022156504577823745, "train_min_lr": 0.0022156504577823745, "train_loss": 0.6456220043787303, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026304775789284553, "epoch": 104, "n_parameters": 329209088}
{"train_lr": 0.0022100474721180197, "train_min_lr": 0.0022100474721180197, "train_loss": 0.6457335386747638, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026793836389119044, "epoch": 105, "n_parameters": 329209088}
{"train_lr": 0.002204367948197461, "train_min_lr": 0.002204367948197461, "train_loss": 0.6454476715149119, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.025944610627797935, "epoch": 106, "n_parameters": 329209088}
{"train_lr": 0.0021986123185384417, "train_min_lr": 0.0021986123185384417, "train_loss": 0.6453419021235254, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02678964802852044, "epoch": 107, "n_parameters": 329209088}
{"train_lr": 0.0021927810214544354, "train_min_lr": 0.0021927810214544354, "train_loss": 0.6456970633843389, "train_loss_scale": 165940.5128205128, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 108, "n_parameters": 329209088}
{"train_lr": 0.0021868745010212983, "train_min_lr": 0.0021868745010212983, "train_loss": 0.6452146430428212, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026297068336787514, "epoch": 109, "n_parameters": 329209088}
{"train_lr": 0.0021808932070434225, "train_min_lr": 0.0021808932070434225, "train_loss": 0.6451982517535679, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028141063226100344, "epoch": 110, "n_parameters": 329209088}
{"train_lr": 0.0021748375950195096, "train_min_lr": 0.0021748375950195096, "train_loss": 0.6447996499900444, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02630796065577903, "epoch": 111, "n_parameters": 329209088}
{"train_lr": 0.0021687081261078573, "train_min_lr": 0.0021687081261078573, "train_loss": 0.6448169751212192, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027201349781348538, "epoch": 112, "n_parameters": 329209088}
{"train_lr": 0.0021625052670912527, "train_min_lr": 0.0021625052670912527, "train_loss": 0.6447285132076687, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02671114440696935, "epoch": 113, "n_parameters": 329209088}
{"train_lr": 0.0021562294903414267, "train_min_lr": 0.0021562294903414267, "train_loss": 0.644528079407815, "train_loss_scale": 173502.35897435897, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02700735426818331, "epoch": 114, "n_parameters": 329209088}
{"train_lr": 0.002149881273783077, "train_min_lr": 0.002149881273783077, "train_loss": 0.644735168856688, "train_loss_scale": 209631.1794871795, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 115, "n_parameters": 329209088}
{"train_lr": 0.0021434611008574723, "train_min_lr": 0.0021434611008574723, "train_loss": 0.6443473706141305, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02649587496685294, "epoch": 116, "n_parameters": 329209088}
{"train_lr": 0.002136969460485639, "train_min_lr": 0.002136969460485639, "train_loss": 0.6443055792448994, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027255071219629966, "epoch": 117, "n_parameters": 329209088}
{"train_lr": 0.002130406847031118, "train_min_lr": 0.002130406847031118, "train_loss": 0.6440789520394248, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027710106474562332, "epoch": 118, "n_parameters": 329209088}
{"train_lr": 0.002123773760262341, "train_min_lr": 0.002123773760262341, "train_loss": 0.6441220993008943, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026872598136273716, "epoch": 119, "n_parameters": 329209088}
{"train_lr": 0.0021170707053145433, "train_min_lr": 0.0021170707053145433, "train_loss": 0.6440309921745211, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027187776214514788, "epoch": 120, "n_parameters": 329209088}
{"train_lr": 0.0021102981926513073, "train_min_lr": 0.0021102981926513073, "train_loss": 0.6437878189190553, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.026802275407438476, "epoch": 121, "n_parameters": 329209088}
{"train_lr": 0.0021034567380257023, "train_min_lr": 0.0021034567380257023, "train_loss": 0.6438411706640648, "train_loss_scale": 260883.6923076923, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027098244187445976, "epoch": 122, "n_parameters": 329209088}
{"train_lr": 0.0020965468624409753, "train_min_lr": 0.0020965468624409753, "train_loss": 0.643669240248318, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02758789159811269, "epoch": 123, "n_parameters": 329209088}
{"train_lr": 0.002089569092110911, "train_min_lr": 0.002089569092110911, "train_loss": 0.6440405128284906, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.031389404189749025, "epoch": 124, "n_parameters": 329209088}
{"train_lr": 0.0020825239584197322, "train_min_lr": 0.0020825239584197322, "train_loss": 0.6435283252623123, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028030019372892685, "epoch": 125, "n_parameters": 329209088}
{"train_lr": 0.0020754119978816502, "train_min_lr": 0.0020754119978816502, "train_loss": 0.6432989513119444, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027008474129849136, "epoch": 126, "n_parameters": 329209088}
{"train_lr": 0.0020682337520999913, "train_min_lr": 0.0020682337520999913, "train_loss": 0.6432904241761813, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028780500828407895, "epoch": 127, "n_parameters": 329209088}
{"train_lr": 0.0020609897677259627, "train_min_lr": 0.0020609897677259627, "train_loss": 0.6433132893393915, "train_loss_scale": 414221.1282051282, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027734479001269508, "epoch": 128, "n_parameters": 329209088}
{"train_lr": 0.0020536805964170256, "train_min_lr": 0.0020536805964170256, "train_loss": 0.6431535486645328, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02747440369775853, "epoch": 129, "n_parameters": 329209088}
{"train_lr": 0.00204630679479487, "train_min_lr": 0.00204630679479487, "train_loss": 0.6430675516752765, "train_loss_scale": 314236.71794871794, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 130, "n_parameters": 329209088}
{"train_lr": 0.002038868924403038, "train_min_lr": 0.002038868924403038, "train_loss": 0.6429448448336468, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02757032032315739, "epoch": 131, "n_parameters": 329209088}
{"train_lr": 0.0020313675516641576, "train_min_lr": 0.0020313675516641576, "train_loss": 0.6428246178532927, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02760386790554875, "epoch": 132, "n_parameters": 329209088}
{"train_lr": 0.002023803247836806, "train_min_lr": 0.002023803247836806, "train_loss": 0.6428641294403814, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028993207758340318, "epoch": 133, "n_parameters": 329209088}
{"train_lr": 0.002016176588972008, "train_min_lr": 0.002016176588972008, "train_loss": 0.6425923044817188, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027386506995520532, "epoch": 134, "n_parameters": 329209088}
{"train_lr": 0.002008488155869361, "train_min_lr": 0.002008488155869361, "train_loss": 0.6427169597516649, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030019870750271738, "epoch": 135, "n_parameters": 329209088}
{"train_lr": 0.002000738534032814, "train_min_lr": 0.002000738534032814, "train_loss": 0.6424605526650945, "train_loss_scale": 364649.0256410256, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027799924089310642, "epoch": 136, "n_parameters": 329209088}
{"train_lr": 0.001992928313626073, "train_min_lr": 0.001992928313626073, "train_loss": 0.6424023582397077, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028908150801912714, "epoch": 137, "n_parameters": 329209088}
{"train_lr": 0.0019850580894276585, "train_min_lr": 0.0019850580894276585, "train_loss": 0.6429825143482631, "train_loss_scale": 420102.5641025641, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 138, "n_parameters": 329209088}
{"train_lr": 0.001977128460785622, "train_min_lr": 0.001977128460785622, "train_loss": 0.6422227818745737, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.027390267067135144, "epoch": 139, "n_parameters": 329209088}
{"train_lr": 0.0019691400315718726, "train_min_lr": 0.0019691400315718726, "train_loss": 0.6421666500063088, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02835275857255627, "epoch": 140, "n_parameters": 329209088}
{"train_lr": 0.0019610934101362376, "train_min_lr": 0.0019610934101362376, "train_loss": 0.6419671408306712, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028144767698951256, "epoch": 141, "n_parameters": 329209088}
{"train_lr": 0.0019529892092600813, "train_min_lr": 0.0019529892092600813, "train_loss": 0.6419400670267164, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02806395894059768, "epoch": 142, "n_parameters": 329209088}
{"train_lr": 0.001944828046109684, "train_min_lr": 0.001944828046109684, "train_loss": 0.6418093704940895, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028449503251184255, "epoch": 143, "n_parameters": 329209088}
{"train_lr": 0.001936610542189214, "train_min_lr": 0.001936610542189214, "train_loss": 0.6417257706037699, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02862915399078375, "epoch": 144, "n_parameters": 329209088}
{"train_lr": 0.0019283373232934099, "train_min_lr": 0.0019283373232934099, "train_loss": 0.6417567568102803, "train_loss_scale": 339442.8717948718, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 145, "n_parameters": 329209088}
{"train_lr": 0.0019200090194599233, "train_min_lr": 0.0019200090194599233, "train_loss": 0.6414070221213385, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028315486177467764, "epoch": 146, "n_parameters": 329209088}
{"train_lr": 0.0019116262649213377, "train_min_lr": 0.0019116262649213377, "train_loss": 0.6413639460224658, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028422319861606527, "epoch": 147, "n_parameters": 329209088}
{"train_lr": 0.0019031896980568602, "train_min_lr": 0.0019031896980568602, "train_loss": 0.6412632769176688, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.029910444234235164, "epoch": 148, "n_parameters": 329209088}
{"train_lr": 0.001894699961343726, "train_min_lr": 0.001894699961343726, "train_loss": 0.6412548693553664, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02833477193967272, "epoch": 149, "n_parameters": 329209088}
{"train_lr": 0.0018861577013082516, "train_min_lr": 0.0018861577013082516, "train_loss": 0.6411763264152867, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028758050061953373, "epoch": 150, "n_parameters": 329209088}
{"train_lr": 0.0018775635684766133, "train_min_lr": 0.0018775635684766133, "train_loss": 0.6409683011125964, "train_loss_scale": 336082.0512820513, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02854277726668769, "epoch": 151, "n_parameters": 329209088}
{"train_lr": 0.0018689182173253027, "train_min_lr": 0.0018689182173253027, "train_loss": 0.640888780910665, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02893657242664351, "epoch": 152, "n_parameters": 329209088}
{"train_lr": 0.0018602223062312783, "train_min_lr": 0.0018602223062312783, "train_loss": 0.6408331939138663, "train_loss_scale": 280628.5128205128, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 153, "n_parameters": 329209088}
{"train_lr": 0.0018514764974218371, "train_min_lr": 0.0018514764974218371, "train_loss": 0.640822907223199, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02887085449690811, "epoch": 154, "n_parameters": 329209088}
{"train_lr": 0.0018426814569241794, "train_min_lr": 0.0018426814569241794, "train_loss": 0.6406777461942954, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028397786454894602, "epoch": 155, "n_parameters": 329209088}
{"train_lr": 0.0018338378545146976, "train_min_lr": 0.0018338378545146976, "train_loss": 0.6405608079288728, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02968214620024157, "epoch": 156, "n_parameters": 329209088}
{"train_lr": 0.0018249463636679463, "train_min_lr": 0.0018249463636679463, "train_loss": 0.6404920960836208, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02901425060386268, "epoch": 157, "n_parameters": 329209088}
{"train_lr": 0.0018160076615053812, "train_min_lr": 0.0018160076615053812, "train_loss": 0.6403778348810588, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.029200542533101562, "epoch": 158, "n_parameters": 329209088}
{"train_lr": 0.0018070224287437813, "train_min_lr": 0.0018070224287437813, "train_loss": 0.6403555395678641, "train_loss_scale": 398257.23076923075, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02986457129009068, "epoch": 159, "n_parameters": 329209088}
{"train_lr": 0.0017979913496434085, "train_min_lr": 0.0017979913496434085, "train_loss": 0.6401424498404734, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.028987549125957184, "epoch": 160, "n_parameters": 329209088}
{"train_lr": 0.001788915111955901, "train_min_lr": 0.001788915111955901, "train_loss": 0.6400157217688572, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02981394166365648, "epoch": 161, "n_parameters": 329209088}
{"train_lr": 0.0017797944068718974, "train_min_lr": 0.0017797944068718974, "train_loss": 0.6401064885027993, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.029524828719261747, "epoch": 162, "n_parameters": 329209088}
{"train_lr": 0.0017706299289684047, "train_min_lr": 0.0017706299289684047, "train_loss": 0.6400016286374571, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.031317353391876586, "epoch": 163, "n_parameters": 329209088}
{"train_lr": 0.0017614223761558967, "train_min_lr": 0.0017614223761558967, "train_loss": 0.639819734926837, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0297823006955859, "epoch": 164, "n_parameters": 329209088}
{"train_lr": 0.001752172449625165, "train_min_lr": 0.001752172449625165, "train_loss": 0.6397659945695733, "train_loss_scale": 581421.9487179487, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.029121420626791242, "epoch": 165, "n_parameters": 329209088}
{"train_lr": 0.0017428808537939323, "train_min_lr": 0.0017428808537939323, "train_loss": 0.6395678837329913, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02940493517029935, "epoch": 166, "n_parameters": 329209088}
{"train_lr": 0.0017335482962531922, "train_min_lr": 0.0017335482962531922, "train_loss": 0.6394719716806251, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030324584625374813, "epoch": 167, "n_parameters": 329209088}
{"train_lr": 0.0017241754877133318, "train_min_lr": 0.0017241754877133318, "train_loss": 0.6393246623174025, "train_loss_scale": 578061.1282051282, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 168, "n_parameters": 329209088}
{"train_lr": 0.0017147631419500143, "train_min_lr": 0.0017147631419500143, "train_loss": 0.6394077892271945, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03154762937591817, "epoch": 169, "n_parameters": 329209088}
{"train_lr": 0.0017053119757498118, "train_min_lr": 0.0017053119757498118, "train_loss": 0.6392433139650772, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.02979543413489293, "epoch": 170, "n_parameters": 329209088}
{"train_lr": 0.001695822708855617, "train_min_lr": 0.001695822708855617, "train_loss": 0.6391462445593415, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.031601412108359046, "epoch": 171, "n_parameters": 329209088}
{"train_lr": 0.001686296063911845, "train_min_lr": 0.001686296063911845, "train_loss": 0.6389658685940771, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03043923623716602, "epoch": 172, "n_parameters": 329209088}
{"train_lr": 0.0016767327664093945, "train_min_lr": 0.0016767327664093945, "train_loss": 0.638979444357877, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03034274318876366, "epoch": 173, "n_parameters": 329209088}
{"train_lr": 0.0016671335446303921, "train_min_lr": 0.0016671335446303921, "train_loss": 0.6388872074476706, "train_loss_scale": 779710.358974359, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03126039603151954, "epoch": 174, "n_parameters": 329209088}
{"train_lr": 0.0016574991295927436, "train_min_lr": 0.0016574991295927436, "train_loss": 0.6387193798720359, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030189521562976714, "epoch": 175, "n_parameters": 329209088}
{"train_lr": 0.001647830254994458, "train_min_lr": 0.001647830254994458, "train_loss": 0.6386389592363952, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030193925399381, "epoch": 176, "n_parameters": 329209088}
{"train_lr": 0.0016381276571577643, "train_min_lr": 0.0016381276571577643, "train_loss": 0.6385088559263983, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030736114835748687, "epoch": 177, "n_parameters": 329209088}
{"train_lr": 0.0016283920749730564, "train_min_lr": 0.0016283920749730564, "train_loss": 0.6391069796593047, "train_loss_scale": 138213.7435897436, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 178, "n_parameters": 329209088}
{"train_lr": 0.0016186242498426112, "train_min_lr": 0.0016186242498426112, "train_loss": 0.6384479778651626, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030623054800507348, "epoch": 179, "n_parameters": 329209088}
{"train_lr": 0.0016088249256241284, "train_min_lr": 0.0016088249256241284, "train_loss": 0.6382542708172247, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03130309429401771, "epoch": 180, "n_parameters": 329209088}
{"train_lr": 0.0015989948485740878, "train_min_lr": 0.0015989948485740878, "train_loss": 0.6381903741604242, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030516292607316222, "epoch": 181, "n_parameters": 329209088}
{"train_lr": 0.0015891347672909151, "train_min_lr": 0.0015891347672909151, "train_loss": 0.6380550607752341, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030672436036790412, "epoch": 182, "n_parameters": 329209088}
{"train_lr": 0.0015792454326579762, "train_min_lr": 0.0015792454326579762, "train_loss": 0.6379414519104056, "train_loss_scale": 131072.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.030873728098156743, "epoch": 183, "n_parameters": 329209088}
{"train_lr": 0.00156932759778639, "train_min_lr": 0.00156932759778639, "train_loss": 0.637759269018156, "train_loss_scale": 206270.35897435897, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03118793157120355, "epoch": 184, "n_parameters": 329209088}
{"train_lr": 0.00155938201795768, "train_min_lr": 0.00155938201795768, "train_loss": 0.6378241749540067, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.031131858113580026, "epoch": 185, "n_parameters": 329209088}
{"train_lr": 0.0015494094505662558, "train_min_lr": 0.0015494094505662558, "train_loss": 0.6378064515033307, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03346967529983093, "epoch": 186, "n_parameters": 329209088}
{"train_lr": 0.001539410655061736, "train_min_lr": 0.001539410655061736, "train_loss": 0.6375938148166125, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.031042226530515995, "epoch": 187, "n_parameters": 329209088}
{"train_lr": 0.0015293863928911096, "train_min_lr": 0.0015293863928911096, "train_loss": 0.637503370248641, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03165703466854607, "epoch": 188, "n_parameters": 329209088}
{"train_lr": 0.0015193374274407522, "train_min_lr": 0.0015193374274407522, "train_loss": 0.6373424760651034, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.032703791452476226, "epoch": 189, "n_parameters": 329209088}
{"train_lr": 0.00150926452397829, "train_min_lr": 0.00150926452397829, "train_loss": 0.6372924632189844, "train_loss_scale": 304994.46153846156, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03171496963104568, "epoch": 190, "n_parameters": 329209088}
{"train_lr": 0.0014991684495943168, "train_min_lr": 0.0014991684495943168, "train_loss": 0.6371971159839095, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03190691110033255, "epoch": 191, "n_parameters": 329209088}
{"train_lr": 0.0014890499731439859, "train_min_lr": 0.0014890499731439859, "train_loss": 0.6370357151728314, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03221662750897499, "epoch": 192, "n_parameters": 329209088}
{"train_lr": 0.0014789098651884587, "train_min_lr": 0.0014789098651884587, "train_loss": 0.6369895840732333, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03217489603691949, "epoch": 193, "n_parameters": 329209088}
{"train_lr": 0.0014687488979362115, "train_min_lr": 0.0014687488979362115, "train_loss": 0.6369786758023577, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.033310710631597504, "epoch": 194, "n_parameters": 329209088}
{"train_lr": 0.0014585678451842408, "train_min_lr": 0.0014585678451842408, "train_loss": 0.6367120626143729, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03195363310022423, "epoch": 195, "n_parameters": 329209088}
{"train_lr": 0.001448367482259133, "train_min_lr": 0.001448367482259133, "train_loss": 0.6366518784362154, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03254236969858026, "epoch": 196, "n_parameters": 329209088}
{"train_lr": 0.001438148585958014, "train_min_lr": 0.001438148585958014, "train_loss": 0.6365046131078345, "train_loss_scale": 919184.4102564103, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.032628384771016546, "epoch": 197, "n_parameters": 329209088}
{"train_lr": 0.0014279119344894028, "train_min_lr": 0.0014279119344894028, "train_loss": 0.6365064023396908, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.032482835076128445, "epoch": 198, "n_parameters": 329209088}
{"train_lr": 0.001417658307413943, "train_min_lr": 0.001417658307413943, "train_loss": 0.6363673461839939, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03294227854348719, "epoch": 199, "n_parameters": 329209088}
{"train_lr": 0.0014073884855850317, "train_min_lr": 0.0014073884855850317, "train_loss": 0.6362196238448795, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.032725461066151276, "epoch": 200, "n_parameters": 329209088}
{"train_lr": 0.0013971032510893652, "train_min_lr": 0.0013971032510893652, "train_loss": 0.6361585415016191, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0329327087264317, "epoch": 201, "n_parameters": 329209088}
{"train_lr": 0.0013868033871873699, "train_min_lr": 0.0013868033871873699, "train_loss": 0.6360381598441074, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.032554248007587515, "epoch": 202, "n_parameters": 329209088}
{"train_lr": 0.0013764896782535608, "train_min_lr": 0.0013764896782535608, "train_loss": 0.6358811189360821, "train_loss_scale": 1408183.7948717948, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03306842551757701, "epoch": 203, "n_parameters": 329209088}
{"train_lr": 0.0013661629097168016, "train_min_lr": 0.0013661629097168016, "train_loss": 0.6357421993683928, "train_loss_scale": 836844.3076923077, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 204, "n_parameters": 329209088}
{"train_lr": 0.0013558238680005013, "train_min_lr": 0.0013558238680005013, "train_loss": 0.6357626439705013, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03339464004294804, "epoch": 205, "n_parameters": 329209088}
{"train_lr": 0.001345473340462714, "train_min_lr": 0.001345473340462714, "train_loss": 0.6355263965192418, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03334607285423539, "epoch": 206, "n_parameters": 329209088}
{"train_lr": 0.0013351121153361868, "train_min_lr": 0.0013351121153361868, "train_loss": 0.6355979875183831, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.033490136988126695, "epoch": 207, "n_parameters": 329209088}
{"train_lr": 0.0013247409816683248, "train_min_lr": 0.0013247409816683248, "train_loss": 0.6353928542946681, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.033524584132604875, "epoch": 208, "n_parameters": 329209088}
{"train_lr": 0.001314360729261115, "train_min_lr": 0.001314360729261115, "train_loss": 0.6353181417231472, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03348290323255918, "epoch": 209, "n_parameters": 329209088}
{"train_lr": 0.0013039721486109638, "train_min_lr": 0.0013039721486109638, "train_loss": 0.6351780112510403, "train_loss_scale": 725937.2307692308, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03518244367427169, "epoch": 210, "n_parameters": 329209088}
{"train_lr": 0.0012935760308485087, "train_min_lr": 0.0012935760308485087, "train_loss": 0.6349511720622197, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03371304626433322, "epoch": 211, "n_parameters": 329209088}
{"train_lr": 0.001283173167678369, "train_min_lr": 0.001283173167678369, "train_loss": 0.6349017564087915, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03395078774804297, "epoch": 212, "n_parameters": 329209088}
{"train_lr": 0.0012727643513188527, "train_min_lr": 0.0012727643513188527, "train_loss": 0.6347861846443266, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03394038671961962, "epoch": 213, "n_parameters": 329209088}
{"train_lr": 0.0012623503744416211, "train_min_lr": 0.0012623503744416211, "train_loss": 0.6347843402017577, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03449265233193261, "epoch": 214, "n_parameters": 329209088}
{"train_lr": 0.0012519320301113358, "train_min_lr": 0.0012519320301113358, "train_loss": 0.6345033211132082, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03418060139012642, "epoch": 215, "n_parameters": 329209088}
{"train_lr": 0.001241510111725253, "train_min_lr": 0.001241510111725253, "train_loss": 0.6344598844850388, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03476034481532107, "epoch": 216, "n_parameters": 329209088}
{"train_lr": 0.0012310854129528052, "train_min_lr": 0.0012310854129528052, "train_loss": 0.634398295944079, "train_loss_scale": 1377936.4102564103, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 217, "n_parameters": 329209088}
{"train_lr": 0.0012206587276751707, "train_min_lr": 0.0012206587276751707, "train_loss": 0.6343038002303683, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03473580542665262, "epoch": 218, "n_parameters": 329209088}
{"train_lr": 0.0012102308499247973, "train_min_lr": 0.0012102308499247973, "train_loss": 0.6341536501124023, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03448977081988675, "epoch": 219, "n_parameters": 329209088}
{"train_lr": 0.0011998025738249494, "train_min_lr": 0.0011998025738249494, "train_loss": 0.633975476725027, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03488687019293698, "epoch": 220, "n_parameters": 329209088}
{"train_lr": 0.0011893746935292267, "train_min_lr": 0.0011893746935292267, "train_loss": 0.6341097245577914, "train_loss_scale": 572179.6923076923, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 221, "n_parameters": 329209088}
{"train_lr": 0.0011789480031610881, "train_min_lr": 0.0011789480031610881, "train_loss": 0.6338705676189886, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03692805854818569, "epoch": 222, "n_parameters": 329209088}
{"train_lr": 0.0011685232967533748, "train_min_lr": 0.0011685232967533748, "train_loss": 0.6337018027567328, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03534728240293379, "epoch": 223, "n_parameters": 329209088}
{"train_lr": 0.0011581013681878376, "train_min_lr": 0.0011581013681878376, "train_loss": 0.6336842688421408, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03566962817253975, "epoch": 224, "n_parameters": 329209088}
{"train_lr": 0.0011476830111346887, "train_min_lr": 0.0011476830111346887, "train_loss": 0.6334512306329532, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03549365215719892, "epoch": 225, "n_parameters": 329209088}
{"train_lr": 0.0011372690189921531, "train_min_lr": 0.0011372690189921531, "train_loss": 0.6333092775446578, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.035773792053357914, "epoch": 226, "n_parameters": 329209088}
{"train_lr": 0.0011268601848260535, "train_min_lr": 0.0011268601848260535, "train_loss": 0.633405420368012, "train_loss_scale": 311716.10256410256, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.037723404104606464, "epoch": 227, "n_parameters": 329209088}
{"train_lr": 0.0011164573013094075, "train_min_lr": 0.0011164573013094075, "train_loss": 0.6331540410144206, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03605122995586731, "epoch": 228, "n_parameters": 329209088}
{"train_lr": 0.001106061160662077, "train_min_lr": 0.001106061160662077, "train_loss": 0.6329986882419922, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.035730649723122135, "epoch": 229, "n_parameters": 329209088}
{"train_lr": 0.0010956725545904168, "train_min_lr": 0.0010956725545904168, "train_loss": 0.6328313132592788, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03610342946978143, "epoch": 230, "n_parameters": 329209088}
{"train_lr": 0.0010852922742270053, "train_min_lr": 0.0010852922742270053, "train_loss": 0.6328896881761745, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03644952149345325, "epoch": 231, "n_parameters": 329209088}
{"train_lr": 0.0010749211100703794, "train_min_lr": 0.0010749211100703794, "train_loss": 0.6327227253872806, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.036498260899231985, "epoch": 232, "n_parameters": 329209088}
{"train_lr": 0.0010645598519248383, "train_min_lr": 0.0010645598519248383, "train_loss": 0.6326258556248668, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03659982502293319, "epoch": 233, "n_parameters": 329209088}
{"train_lr": 0.0010542092888403117, "train_min_lr": 0.0010542092888403117, "train_loss": 0.6326706966934487, "train_loss_scale": 676365.1282051282, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 234, "n_parameters": 329209088}
{"train_lr": 0.0010438702090522496, "train_min_lr": 0.0010438702090522496, "train_loss": 0.6324654672927676, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03658811449526976, "epoch": 235, "n_parameters": 329209088}
{"train_lr": 0.001033543399921608, "train_min_lr": 0.001033543399921608, "train_loss": 0.6322615499763439, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03713475795200047, "epoch": 236, "n_parameters": 329209088}
{"train_lr": 0.001023229647874884, "train_min_lr": 0.001023229647874884, "train_loss": 0.6321005097960528, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03762158864321044, "epoch": 237, "n_parameters": 329209088}
{"train_lr": 0.0010129297383442272, "train_min_lr": 0.0010129297383442272, "train_loss": 0.6319690402358388, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03752801210309068, "epoch": 238, "n_parameters": 329209088}
{"train_lr": 0.0010026444557076238, "train_min_lr": 0.0010026444557076238, "train_loss": 0.6319760522888734, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03825479864071195, "epoch": 239, "n_parameters": 329209088}
{"train_lr": 0.000992374583229171, "train_min_lr": 0.000992374583229171, "train_loss": 0.6317316137779599, "train_loss_scale": 262144.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.037378734592587136, "epoch": 240, "n_parameters": 329209088}
{"train_lr": 0.0009821209029994167, "train_min_lr": 0.0009821209029994167, "train_loss": 0.6315415556339595, "train_loss_scale": 501602.46153846156, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03766573099897076, "epoch": 241, "n_parameters": 329209088}
{"train_lr": 0.0009718841958758109, "train_min_lr": 0.0009718841958758109, "train_loss": 0.6314926285368319, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.037886448514958225, "epoch": 242, "n_parameters": 329209088}
{"train_lr": 0.0009616652414232358, "train_min_lr": 0.0009616652414232358, "train_loss": 0.6313681731251284, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04038268268012848, "epoch": 243, "n_parameters": 329209088}
{"train_lr": 0.0009514648178546331, "train_min_lr": 0.0009514648178546331, "train_loss": 0.63123246973667, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.038252633124685444, "epoch": 244, "n_parameters": 329209088}
{"train_lr": 0.0009412837019717529, "train_min_lr": 0.0009412837019717529, "train_loss": 0.6311447082194858, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03832736581516189, "epoch": 245, "n_parameters": 329209088}
{"train_lr": 0.0009311226691059865, "train_min_lr": 0.0009311226691059865, "train_loss": 0.6310885321790687, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03854645136743784, "epoch": 246, "n_parameters": 329209088}
{"train_lr": 0.0009209824930593261, "train_min_lr": 0.0009209824930593261, "train_loss": 0.6309239633548527, "train_loss_scale": 788112.4102564103, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.038729565659872234, "epoch": 247, "n_parameters": 329209088}
{"train_lr": 0.0009108639460454382, "train_min_lr": 0.0009108639460454382, "train_loss": 0.6307369192035344, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.038624203662411906, "epoch": 248, "n_parameters": 329209088}
{"train_lr": 0.0009007677986308538, "train_min_lr": 0.0009007677986308538, "train_loss": 0.6306912183570557, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03909732751213969, "epoch": 249, "n_parameters": 329209088}
{"train_lr": 0.0008906948196762859, "train_min_lr": 0.0008906948196762859, "train_loss": 0.6305867205493343, "train_loss_scale": 767947.4871794871, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 250, "n_parameters": 329209088}
{"train_lr": 0.000880645776278082, "train_min_lr": 0.000880645776278082, "train_loss": 0.6305206664419996, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.03939954353830753, "epoch": 251, "n_parameters": 329209088}
{"train_lr": 0.000870621433709802, "train_min_lr": 0.000870621433709802, "train_loss": 0.6303684700769969, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.039426080082567073, "epoch": 252, "n_parameters": 329209088}
{"train_lr": 0.0008606225553639454, "train_min_lr": 0.0008606225553639454, "train_loss": 0.6300959098212516, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.039489052150971614, "epoch": 253, "n_parameters": 329209088}
{"train_lr": 0.0008506499026938082, "train_min_lr": 0.0008506499026938082, "train_loss": 0.6300412956446123, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.040096134365273595, "epoch": 254, "n_parameters": 329209088}
{"train_lr": 0.0008407042351555041, "train_min_lr": 0.0008407042351555041, "train_loss": 0.6299278211947053, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.040457814442328155, "epoch": 255, "n_parameters": 329209088}
{"train_lr": 0.0008307863101501201, "train_min_lr": 0.0008307863101501201, "train_loss": 0.629832218048903, "train_loss_scale": 589824.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04043925850867079, "epoch": 256, "n_parameters": 329209088}
{"train_lr": 0.0008208968829660467, "train_min_lr": 0.0008208968829660467, "train_loss": 0.629643039473404, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04047447609571883, "epoch": 257, "n_parameters": 329209088}
{"train_lr": 0.0008110367067214505, "train_min_lr": 0.0008110367067214505, "train_loss": 0.6294997143559158, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04085779420506114, "epoch": 258, "n_parameters": 329209088}
{"train_lr": 0.0008012065323069283, "train_min_lr": 0.0008012065323069283, "train_loss": 0.6294634166555718, "train_loss_scale": 793153.641025641, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 259, "n_parameters": 329209088}
{"train_lr": 0.0007914071083283216, "train_min_lr": 0.0007914071083283216, "train_loss": 0.6292331363372983, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.041266080809948154, "epoch": 260, "n_parameters": 329209088}
{"train_lr": 0.0007816391810497043, "train_min_lr": 0.0007816391810497043, "train_loss": 0.6290712053827846, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.040999528557921834, "epoch": 261, "n_parameters": 329209088}
{"train_lr": 0.0007719034943365599, "train_min_lr": 0.0007719034943365599, "train_loss": 0.6290712010963128, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04137831624859992, "epoch": 262, "n_parameters": 329209088}
{"train_lr": 0.0007622007895991216, "train_min_lr": 0.0007622007895991216, "train_loss": 0.6289421918933303, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04181592774768479, "epoch": 263, "n_parameters": 329209088}
{"train_lr": 0.0007525318057359234, "train_min_lr": 0.0007525318057359234, "train_loss": 0.6286877748460915, "train_loss_scale": 524288.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.042315053031182825, "epoch": 264, "n_parameters": 329209088}
{"train_lr": 0.0007428972790775184, "train_min_lr": 0.0007428972790775184, "train_loss": 0.6285432770382613, "train_loss_scale": 564617.8461538461, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.041562357905487984, "epoch": 265, "n_parameters": 329209088}
{"train_lr": 0.0007332979433304175, "train_min_lr": 0.0007332979433304175, "train_loss": 0.6284733923175969, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.042345505118226774, "epoch": 266, "n_parameters": 329209088}
{"train_lr": 0.0007237345295211991, "train_min_lr": 0.0007237345295211991, "train_loss": 0.6282531876260271, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04248199032810636, "epoch": 267, "n_parameters": 329209088}
{"train_lr": 0.0007142077659408526, "train_min_lr": 0.0007142077659408526, "train_loss": 0.6282466627979795, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04271088121458888, "epoch": 268, "n_parameters": 329209088}
{"train_lr": 0.0007047183780893101, "train_min_lr": 0.0007047183780893101, "train_loss": 0.6280665331078359, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04285730222741572, "epoch": 269, "n_parameters": 329209088}
{"train_lr": 0.0006952670886201941, "train_min_lr": 0.0006952670886201941, "train_loss": 0.6278973401118165, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04347971167701941, "epoch": 270, "n_parameters": 329209088}
{"train_lr": 0.0006858546172857918, "train_min_lr": 0.0006858546172857918, "train_loss": 0.6276867646926966, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.043224812276327074, "epoch": 271, "n_parameters": 329209088}
{"train_lr": 0.0006764816808822354, "train_min_lr": 0.0006764816808822354, "train_loss": 0.6276818952308252, "train_loss_scale": 1747626.6666666667, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04360416929165904, "epoch": 272, "n_parameters": 329209088}
{"train_lr": 0.0006671489931949225, "train_min_lr": 0.0006671489931949225, "train_loss": 0.6275735991254735, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.043848731280423894, "epoch": 273, "n_parameters": 329209088}
{"train_lr": 0.000657857264944153, "train_min_lr": 0.000657857264944153, "train_loss": 0.6273718408834285, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0437518492138061, "epoch": 274, "n_parameters": 329209088}
{"train_lr": 0.0006486072037310056, "train_min_lr": 0.0006486072037310056, "train_loss": 0.6271319916018118, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04413281639631933, "epoch": 275, "n_parameters": 329209088}
{"train_lr": 0.0006393995139834574, "train_min_lr": 0.0006393995139834574, "train_loss": 0.6272063511364067, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04504153312733158, "epoch": 276, "n_parameters": 329209088}
{"train_lr": 0.0006302348969027305, "train_min_lr": 0.0006302348969027305, "train_loss": 0.6269641043857123, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04458890359800977, "epoch": 277, "n_parameters": 329209088}
{"train_lr": 0.0006211140504098989, "train_min_lr": 0.0006211140504098989, "train_loss": 0.6268549130166857, "train_loss_scale": 2634883.282051282, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04499488609293715, "epoch": 278, "n_parameters": 329209088}
{"train_lr": 0.0006120376690927338, "train_min_lr": 0.0006120376690927338, "train_loss": 0.6266899533056391, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04436291022918736, "epoch": 279, "n_parameters": 329209088}
{"train_lr": 0.0006030064441528149, "train_min_lr": 0.0006030064441528149, "train_loss": 0.6265607187166237, "train_loss_scale": 3475088.4102564105, "train_weight_decay": 0.05000000000000026, "train_grad_norm": NaN, "epoch": 280, "n_parameters": 329209088}
{"train_lr": 0.0005940210633528858, "train_min_lr": 0.0005940210633528858, "train_loss": 0.6264487812176156, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.045277594964807995, "epoch": 281, "n_parameters": 329209088}
{"train_lr": 0.0005850822109644842, "train_min_lr": 0.0005850822109644842, "train_loss": 0.6262761797839537, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04600498614570078, "epoch": 282, "n_parameters": 329209088}
{"train_lr": 0.0005761905677158267, "train_min_lr": 0.0005761905677158267, "train_loss": 0.6261774396034292, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.045497196445910215, "epoch": 283, "n_parameters": 329209088}
{"train_lr": 0.0005673468107399736, "train_min_lr": 0.0005673468107399736, "train_loss": 0.625907331937924, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0460289020736057, "epoch": 284, "n_parameters": 329209088}
{"train_lr": 0.0005585516135232554, "train_min_lr": 0.0005585516135232554, "train_loss": 0.625940507862908, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04634552927783284, "epoch": 285, "n_parameters": 329209088}
{"train_lr": 0.0005498056458539953, "train_min_lr": 0.0005498056458539953, "train_loss": 0.625758196412323, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0464239325792266, "epoch": 286, "n_parameters": 329209088}
{"train_lr": 0.000541109573771491, "train_min_lr": 0.000541109573771491, "train_loss": 0.6255990909830405, "train_loss_scale": 3065068.3076923075, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 287, "n_parameters": 329209088}
{"train_lr": 0.0005324640595153003, "train_min_lr": 0.0005324640595153003, "train_loss": 0.6255322515976448, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04726804811985065, "epoch": 288, "n_parameters": 329209088}
{"train_lr": 0.0005238697614748063, "train_min_lr": 0.0005238697614748063, "train_loss": 0.6253679469406891, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04715808993205428, "epoch": 289, "n_parameters": 329209088}
{"train_lr": 0.0005153273341390795, "train_min_lr": 0.0005153273341390795, "train_loss": 0.6250885888361014, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.047379129578192264, "epoch": 290, "n_parameters": 329209088}
{"train_lr": 0.0005068374280470331, "train_min_lr": 0.0005068374280470331, "train_loss": 0.6250022890117879, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04771568857205029, "epoch": 291, "n_parameters": 329209088}
{"train_lr": 0.0004984006897378886, "train_min_lr": 0.0004984006897378886, "train_loss": 0.6249012614313799, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0476855332007966, "epoch": 292, "n_parameters": 329209088}
{"train_lr": 0.0004900177617019308, "train_min_lr": 0.0004900177617019308, "train_loss": 0.6247432591059269, "train_loss_scale": 2224863.1794871795, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04815057661527625, "epoch": 293, "n_parameters": 329209088}
{"train_lr": 0.00048168928233158535, "train_min_lr": 0.00048168928233158535, "train_loss": 0.6245633363962556, "train_loss_scale": 3347377.230769231, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 294, "n_parameters": 329209088}
{"train_lr": 0.0004734158858728016, "train_min_lr": 0.0004734158858728016, "train_loss": 0.624455152085433, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.048516754956486136, "epoch": 295, "n_parameters": 329209088}
{"train_lr": 0.00046519820237675105, "train_min_lr": 0.00046519820237675105, "train_loss": 0.6243504222112302, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04871210630218952, "epoch": 296, "n_parameters": 329209088}
{"train_lr": 0.0004570368576518498, "train_min_lr": 0.0004570368576518498, "train_loss": 0.6241685947797333, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.049167035069937505, "epoch": 297, "n_parameters": 329209088}
{"train_lr": 0.00044893247321609476, "train_min_lr": 0.00044893247321609476, "train_loss": 0.62405608387855, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04924473509144706, "epoch": 298, "n_parameters": 329209088}
{"train_lr": 0.0004408856662497389, "train_min_lr": 0.0004408856662497389, "train_loss": 0.6238690452674069, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04995741456364974, "epoch": 299, "n_parameters": 329209088}
{"train_lr": 0.00043289704954828676, "train_min_lr": 0.00043289704954828676, "train_loss": 0.623728606205147, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.04916721060633277, "epoch": 300, "n_parameters": 329209088}
{"train_lr": 0.0004249672314758303, "train_min_lr": 0.0004249672314758303, "train_loss": 0.6235619415529072, "train_loss_scale": 4180860.717948718, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.049929130559739396, "epoch": 301, "n_parameters": 329209088}
{"train_lr": 0.0004170968159187159, "train_min_lr": 0.0004170968159187159, "train_loss": 0.6234131246590271, "train_loss_scale": 2386182.564102564, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 302, "n_parameters": 329209088}
{"train_lr": 0.0004092864022395612, "train_min_lr": 0.0004092864022395612, "train_loss": 0.6233146814689136, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05043820763388888, "epoch": 303, "n_parameters": 329209088}
{"train_lr": 0.00040153658523160577, "train_min_lr": 0.00040153658523160577, "train_loss": 0.6232048119179522, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.050879253110346884, "epoch": 304, "n_parameters": 329209088}
{"train_lr": 0.00039384795507342054, "train_min_lr": 0.00039384795507342054, "train_loss": 0.6230469430462481, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.050744131279106326, "epoch": 305, "n_parameters": 329209088}
{"train_lr": 0.0003862210972839592, "train_min_lr": 0.0003862210972839592, "train_loss": 0.6228423380675033, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05175772600640089, "epoch": 306, "n_parameters": 329209088}
{"train_lr": 0.00037865659267797083, "train_min_lr": 0.00037865659267797083, "train_loss": 0.622784810128789, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05081591845896, "epoch": 307, "n_parameters": 329209088}
{"train_lr": 0.00037115501732176904, "train_min_lr": 0.00037115501732176904, "train_loss": 0.6225659077366194, "train_loss_scale": 3044903.3846153845, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.051942331620898, "epoch": 308, "n_parameters": 329209088}
{"train_lr": 0.00036371694248936003, "train_min_lr": 0.00036371694248936003, "train_loss": 0.6224814775961045, "train_loss_scale": 2762594.4615384615, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 309, "n_parameters": 329209088}
{"train_lr": 0.00035634293461894045, "train_min_lr": 0.00035634293461894045, "train_loss": 0.6223886375351307, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05241642351477192, "epoch": 310, "n_parameters": 329209088}
{"train_lr": 0.00034903355526975867, "train_min_lr": 0.00034903355526975867, "train_loss": 0.6221648496396553, "train_loss_scale": 1636719.5897435897, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 311, "n_parameters": 329209088}
{"train_lr": 0.00034178936107935213, "train_min_lr": 0.00034178936107935213, "train_loss": 0.6220305213310684, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05290654545220045, "epoch": 312, "n_parameters": 329209088}
{"train_lr": 0.00033461090372115536, "train_min_lr": 0.00033461090372115536, "train_loss": 0.6218693003368875, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05271053246150796, "epoch": 313, "n_parameters": 329209088}
{"train_lr": 0.0003274987298624888, "train_min_lr": 0.0003274987298624888, "train_loss": 0.6217108014493417, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0534327949086825, "epoch": 314, "n_parameters": 329209088}
{"train_lr": 0.0003204533811229274, "train_min_lr": 0.0003204533811229274, "train_loss": 0.6215678360253477, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05305291585719738, "epoch": 315, "n_parameters": 329209088}
{"train_lr": 0.0003134753940330548, "train_min_lr": 0.0003134753940330548, "train_loss": 0.6214922519209676, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.053655578062320367, "epoch": 316, "n_parameters": 329209088}
{"train_lr": 0.00030656529999360446, "train_min_lr": 0.00030656529999360446, "train_loss": 0.6213508629335616, "train_loss_scale": 1078823.3846153845, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.054097659778423034, "epoch": 317, "n_parameters": 329209088}
{"train_lr": 0.0002997236252349912, "train_min_lr": 0.0002997236252349912, "train_loss": 0.6212262413285387, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05381574500829746, "epoch": 318, "n_parameters": 329209088}
{"train_lr": 0.00029295089077723625, "train_min_lr": 0.00029295089077723625, "train_loss": 0.6209992122872231, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05416175249056556, "epoch": 319, "n_parameters": 329209088}
{"train_lr": 0.00028624761239028984, "train_min_lr": 0.00028624761239028984, "train_loss": 0.620830089540388, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.055238924717578367, "epoch": 320, "n_parameters": 329209088}
{"train_lr": 0.0002796143005547551, "train_min_lr": 0.0002796143005547551, "train_loss": 0.6207279697096405, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05459649208933115, "epoch": 321, "n_parameters": 329209088}
{"train_lr": 0.00027305146042300914, "train_min_lr": 0.00027305146042300914, "train_loss": 0.6206980775683545, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05529561992257069, "epoch": 322, "n_parameters": 329209088}
{"train_lr": 0.0002665595917807374, "train_min_lr": 0.0002665595917807374, "train_loss": 0.6204912179113867, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05496582534546272, "epoch": 323, "n_parameters": 329209088}
{"train_lr": 0.00026013918900887165, "train_min_lr": 0.00026013918900887165, "train_loss": 0.6202278277084518, "train_loss_scale": 3394428.717948718, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.054788462817668915, "epoch": 324, "n_parameters": 329209088}
{"train_lr": 0.00025379074104594005, "train_min_lr": 0.00025379074104594005, "train_loss": 0.6201493390000019, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05601183489824717, "epoch": 325, "n_parameters": 329209088}
{"train_lr": 0.00024751473135083417, "train_min_lr": 0.00024751473135083417, "train_loss": 0.620142216125551, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05611464600914564, "epoch": 326, "n_parameters": 329209088}
{"train_lr": 0.00024131163786599073, "train_min_lr": 0.00024131163786599073, "train_loss": 0.6198461944213471, "train_loss_scale": 3044903.3846153845, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 327, "n_parameters": 329209088}
{"train_lr": 0.0002351819329809949, "train_min_lr": 0.0002351819329809949, "train_loss": 0.6197900402311904, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05608035231200166, "epoch": 328, "n_parameters": 329209088}
{"train_lr": 0.00022912608349660654, "train_min_lr": 0.00022912608349660654, "train_loss": 0.6196882854908322, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05585487320637091, "epoch": 329, "n_parameters": 329209088}
{"train_lr": 0.0002231445505892088, "train_min_lr": 0.0002231445505892088, "train_loss": 0.6195478991378481, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05689439606160308, "epoch": 330, "n_parameters": 329209088}
{"train_lr": 0.00021723778977569177, "train_min_lr": 0.00021723778977569177, "train_loss": 0.6194377177729248, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.057099204223889574, "epoch": 331, "n_parameters": 329209088}
{"train_lr": 0.00021140625087876029, "train_min_lr": 0.00021140625087876029, "train_loss": 0.6191904116780139, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.057785087277014285, "epoch": 332, "n_parameters": 329209088}
{"train_lr": 0.0002056503779926791, "train_min_lr": 0.0002056503779926791, "train_loss": 0.6190972226158453, "train_loss_scale": 2386182.564102564, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.056569273225389995, "epoch": 333, "n_parameters": 329209088}
{"train_lr": 0.00019997060944945298, "train_min_lr": 0.00019997060944945298, "train_loss": 0.6189060840600481, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05774792314817508, "epoch": 334, "n_parameters": 329209088}
{"train_lr": 0.00019436737778544695, "train_min_lr": 0.00019436737778544695, "train_loss": 0.6188764330895188, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05788267756071992, "epoch": 335, "n_parameters": 329209088}
{"train_lr": 0.00018884110970844586, "train_min_lr": 0.00018884110970844586, "train_loss": 0.6188582589384168, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05812949151135026, "epoch": 336, "n_parameters": 329209088}
{"train_lr": 0.00018339222606515945, "train_min_lr": 0.00018339222606515945, "train_loss": 0.6185744639175633, "train_loss_scale": 2312244.512820513, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 337, "n_parameters": 329209088}
{"train_lr": 0.0001780211418091735, "train_min_lr": 0.0001780211418091735, "train_loss": 0.61838981260856, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05827353232038709, "epoch": 338, "n_parameters": 329209088}
{"train_lr": 0.0001727282659693489, "train_min_lr": 0.0001727282659693489, "train_loss": 0.6183419898331451, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05899448341761644, "epoch": 339, "n_parameters": 329209088}
{"train_lr": 0.00016751400161867363, "train_min_lr": 0.00016751400161867363, "train_loss": 0.6181415190848594, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.058398466473684095, "epoch": 340, "n_parameters": 329209088}
{"train_lr": 0.00016237874584356534, "train_min_lr": 0.00016237874584356534, "train_loss": 0.6180627919996206, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.059225902104606994, "epoch": 341, "n_parameters": 329209088}
{"train_lr": 0.00015732288971363336, "train_min_lr": 0.00015732288971363336, "train_loss": 0.6179356266780255, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05831028468524798, "epoch": 342, "n_parameters": 329209088}
{"train_lr": 0.00015234681825189645, "train_min_lr": 0.00015234681825189645, "train_loss": 0.6178475774358958, "train_loss_scale": 3118841.435897436, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.059250366169577226, "epoch": 343, "n_parameters": 329209088}
{"train_lr": 0.0001474509104054623, "train_min_lr": 0.0001474509104054623, "train_loss": 0.6176945664561712, "train_loss_scale": 4194304.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0593567103601228, "epoch": 344, "n_parameters": 329209088}
{"train_lr": 0.00014263553901666844, "train_min_lr": 0.00014263553901666844, "train_loss": 0.6176360222116972, "train_loss_scale": 2298801.230769231, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 345, "n_parameters": 329209088}
{"train_lr": 0.00013790107079468978, "train_min_lr": 0.00013790107079468978, "train_loss": 0.6175331700987254, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.059549868453103, "epoch": 346, "n_parameters": 329209088}
{"train_lr": 0.00013324786628761168, "train_min_lr": 0.00013324786628761168, "train_loss": 0.6173387026915756, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0594609524959173, "epoch": 347, "n_parameters": 329209088}
{"train_lr": 0.00012867627985497265, "train_min_lr": 0.00012867627985497265, "train_loss": 0.6172239957198214, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06062490045307921, "epoch": 348, "n_parameters": 329209088}
{"train_lr": 0.00012418665964077967, "train_min_lr": 0.00012418665964077967, "train_loss": 0.6170887761892607, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06008780175724473, "epoch": 349, "n_parameters": 329209088}
{"train_lr": 0.00011977934754699389, "train_min_lr": 0.00011977934754699389, "train_loss": 0.616958317370751, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05987997680233839, "epoch": 350, "n_parameters": 329209088}
{"train_lr": 0.00011545467920749488, "train_min_lr": 0.00011545467920749488, "train_loss": 0.6167866381076283, "train_loss_scale": 2614718.358974359, "train_weight_decay": 0.05000000000000026, "train_grad_norm": NaN, "epoch": 351, "n_parameters": 329209088}
{"train_lr": 0.00011121298396252067, "train_min_lr": 0.00011121298396252067, "train_loss": 0.6167615295913166, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.060026866574891105, "epoch": 352, "n_parameters": 329209088}
{"train_lr": 0.00010705458483358618, "train_min_lr": 0.00010705458483358618, "train_loss": 0.6167897622948751, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06035228763730862, "epoch": 353, "n_parameters": 329209088}
{"train_lr": 0.00010297979849888524, "train_min_lr": 0.00010297979849888524, "train_loss": 0.616521514313391, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.059711449719869934, "epoch": 354, "n_parameters": 329209088}
{"train_lr": 9.898893526917322e-05, "train_min_lr": 9.898893526917322e-05, "train_loss": 0.6164810129930862, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05972302886538017, "epoch": 355, "n_parameters": 329209088}
{"train_lr": 9.508229906413639e-05, "train_min_lr": 9.508229906413639e-05, "train_loss": 0.6164099339586802, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06167773603915404, "epoch": 356, "n_parameters": 329209088}
{"train_lr": 9.126018738924707e-05, "train_min_lr": 9.126018738924707e-05, "train_loss": 0.616182995452665, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06037763428563873, "epoch": 357, "n_parameters": 329209088}
{"train_lr": 8.752289131310685e-05, "train_min_lr": 8.752289131310685e-05, "train_loss": 0.616092385783887, "train_loss_scale": 3851500.3076923075, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06158848246559501, "epoch": 358, "n_parameters": 329209088}
{"train_lr": 8.387069544528184e-05, "train_min_lr": 8.387069544528184e-05, "train_loss": 0.6160591879608825, "train_loss_scale": 2668491.487179487, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 359, "n_parameters": 329209088}
{"train_lr": 8.030387791462727e-05, "train_min_lr": 8.030387791462727e-05, "train_loss": 0.6159726758308423, "train_loss_scale": 2029935.5897435897, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 360, "n_parameters": 329209088}
{"train_lr": 7.682271034810752e-05, "train_min_lr": 7.682271034810752e-05, "train_loss": 0.6157802377517024, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.061162601523578934, "epoch": 361, "n_parameters": 329209088}
{"train_lr": 7.342745785011076e-05, "train_min_lr": 7.342745785011076e-05, "train_loss": 0.6157825800817077, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06114464859740856, "epoch": 362, "n_parameters": 329209088}
{"train_lr": 7.01183789822599e-05, "train_min_lr": 7.01183789822599e-05, "train_loss": 0.6156796769967351, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.061237632093998864, "epoch": 363, "n_parameters": 329209088}
{"train_lr": 6.689572574372245e-05, "train_min_lr": 6.689572574372245e-05, "train_loss": 0.615686372680685, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06131900971134504, "epoch": 364, "n_parameters": 329209088}
{"train_lr": 6.375974355201949e-05, "train_min_lr": 6.375974355201949e-05, "train_loss": 0.6154456634420711, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06144990777978913, "epoch": 365, "n_parameters": 329209088}
{"train_lr": 6.07106712243363e-05, "train_min_lr": 6.07106712243363e-05, "train_loss": 0.6154503608540369, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06206150356536874, "epoch": 366, "n_parameters": 329209088}
{"train_lr": 5.774874095933571e-05, "train_min_lr": 5.774874095933571e-05, "train_loss": 0.6154025624541996, "train_loss_scale": 1734183.3846153845, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.060618695408965535, "epoch": 367, "n_parameters": 329209088}
{"train_lr": 5.487417831947492e-05, "train_min_lr": 5.487417831947492e-05, "train_loss": 0.6153451637364924, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06061985910846255, "epoch": 368, "n_parameters": 329209088}
{"train_lr": 5.208720221382823e-05, "train_min_lr": 5.208720221382823e-05, "train_loss": 0.6151150952941046, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.060271603688120075, "epoch": 369, "n_parameters": 329209088}
{"train_lr": 4.938802488141633e-05, "train_min_lr": 4.938802488141633e-05, "train_loss": 0.6151448833959129, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06074908639614781, "epoch": 370, "n_parameters": 329209088}
{"train_lr": 4.677685187504342e-05, "train_min_lr": 4.677685187504342e-05, "train_loss": 0.6151111972554085, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06114926136648043, "epoch": 371, "n_parameters": 329209088}
{"train_lr": 4.425388204564351e-05, "train_min_lr": 4.425388204564351e-05, "train_loss": 0.6150053266483622, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06076718278181476, "epoch": 372, "n_parameters": 329209088}
{"train_lr": 4.181930752713709e-05, "train_min_lr": 4.181930752713709e-05, "train_loss": 0.6149013768702459, "train_loss_scale": 2117316.923076923, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 373, "n_parameters": 329209088}
{"train_lr": 3.947331372179967e-05, "train_min_lr": 3.947331372179967e-05, "train_loss": 0.614739190775137, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.060880790082499005, "epoch": 374, "n_parameters": 329209088}
{"train_lr": 3.7216079286142414e-05, "train_min_lr": 3.7216079286142414e-05, "train_loss": 0.6148230330779766, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.060367274730920024, "epoch": 375, "n_parameters": 329209088}
{"train_lr": 3.504777611730673e-05, "train_min_lr": 3.504777611730673e-05, "train_loss": 0.6148029503603585, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06092081310896155, "epoch": 376, "n_parameters": 329209088}
{"train_lr": 3.296856933997393e-05, "train_min_lr": 3.296856933997393e-05, "train_loss": 0.6146716189105064, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.06024718504303541, "epoch": 377, "n_parameters": 329209088}
{"train_lr": 3.097861729379017e-05, "train_min_lr": 3.097861729379017e-05, "train_loss": 0.6146338271896522, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05957711665914991, "epoch": 378, "n_parameters": 329209088}
{"train_lr": 2.9078071521308036e-05, "train_min_lr": 2.9078071521308036e-05, "train_loss": 0.6145035460627136, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.059493583209143996, "epoch": 379, "n_parameters": 329209088}
{"train_lr": 2.726707675644639e-05, "train_min_lr": 2.726707675644639e-05, "train_loss": 0.6144424588419497, "train_loss_scale": 3105398.153846154, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 380, "n_parameters": 329209088}
{"train_lr": 2.554577091346818e-05, "train_min_lr": 2.554577091346818e-05, "train_loss": 0.6144928194057101, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.059755704747751735, "epoch": 381, "n_parameters": 329209088}
{"train_lr": 2.39142850764776e-05, "train_min_lr": 2.39142850764776e-05, "train_loss": 0.6143560699629, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05846842838069185, "epoch": 382, "n_parameters": 329209088}
{"train_lr": 2.2372743489437732e-05, "train_min_lr": 2.2372743489437732e-05, "train_loss": 0.6143771248439757, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05851758097131283, "epoch": 383, "n_parameters": 329209088}
{"train_lr": 2.092126354670879e-05, "train_min_lr": 2.092126354670879e-05, "train_loss": 0.6143331871165011, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05992752422268192, "epoch": 384, "n_parameters": 329209088}
{"train_lr": 1.9559955784107943e-05, "train_min_lr": 1.9559955784107943e-05, "train_loss": 0.6143057309867194, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.058685124350281864, "epoch": 385, "n_parameters": 329209088}
{"train_lr": 1.8288923870491904e-05, "train_min_lr": 1.8288923870491904e-05, "train_loss": 0.6142979655116318, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05840848087786864, "epoch": 386, "n_parameters": 329209088}
{"train_lr": 1.7108264599861833e-05, "train_min_lr": 1.7108264599861833e-05, "train_loss": 0.6142287193439327, "train_loss_scale": 2026574.7692307692, "train_weight_decay": 0.05000000000000026, "train_grad_norm": Infinity, "epoch": 387, "n_parameters": 329209088}
{"train_lr": 1.6018067883992388e-05, "train_min_lr": 1.6018067883992388e-05, "train_loss": 0.6141766291529609, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.057539312300296165, "epoch": 388, "n_parameters": 329209088}
{"train_lr": 1.5018416745584281e-05, "train_min_lr": 1.5018416745584281e-05, "train_loss": 0.6141323318812423, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05734270910183207, "epoch": 389, "n_parameters": 329209088}
{"train_lr": 1.410938731194203e-05, "train_min_lr": 1.410938731194203e-05, "train_loss": 0.6140697077453996, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.057281388996694334, "epoch": 390, "n_parameters": 329209088}
{"train_lr": 1.3291048809176455e-05, "train_min_lr": 1.3291048809176455e-05, "train_loss": 0.6141464590118864, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05875818443317444, "epoch": 391, "n_parameters": 329209088}
{"train_lr": 1.2563463556932867e-05, "train_min_lr": 1.2563463556932867e-05, "train_loss": 0.614029985315238, "train_loss_scale": 1048576.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05735492182131379, "epoch": 392, "n_parameters": 329209088}
{"train_lr": 1.1926686963645178e-05, "train_min_lr": 1.1926686963645178e-05, "train_loss": 0.6140755547676235, "train_loss_scale": 1152761.435897436, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05674764691875913, "epoch": 393, "n_parameters": 329209088}
{"train_lr": 1.138076752231636e-05, "train_min_lr": 1.138076752231636e-05, "train_loss": 0.6140404714343066, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05714714400565777, "epoch": 394, "n_parameters": 329209088}
{"train_lr": 1.0925746806825481e-05, "train_min_lr": 1.0925746806825481e-05, "train_loss": 0.6140535513106256, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05643063993790211, "epoch": 395, "n_parameters": 329209088}
{"train_lr": 1.0561659468761706e-05, "train_min_lr": 1.0561659468761706e-05, "train_loss": 0.6139595152822156, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.0575700929054083, "epoch": 396, "n_parameters": 329209088}
{"train_lr": 1.0288533234785454e-05, "train_min_lr": 1.0288533234785454e-05, "train_loss": 0.6139694767502638, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05775066392305188, "epoch": 397, "n_parameters": 329209088}
{"train_lr": 1.0106388904516885e-05, "train_min_lr": 1.0106388904516885e-05, "train_loss": 0.6139701316610743, "train_loss_scale": 2097152.0, "train_weight_decay": 0.05000000000000026, "train_grad_norm": 0.05698600644245744, "epoch": 398, "n_parameters": 329209088}