diff --git "a/trainer_state.json" "b/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/trainer_state.json"
@@ -0,0 +1,4299 @@
+{
+  "best_metric": 0.540250366102693,
+  "best_model_checkpoint": "/data0/checkpoints/Qwen2.5-Math-7B-ScalePRM-v3.0/checkpoint-600",
+  "epoch": 0.757934628138323,
+  "eval_steps": 100,
+  "global_step": 600,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0012632243802305385,
+      "grad_norm": 2.429877281188965,
+      "learning_rate": 7e-06,
+      "loss": 1.1465,
+      "step": 1
+    },
+    {
+      "epoch": 0.002526448760461077,
+      "grad_norm": 2.2121567726135254,
+      "learning_rate": 7e-06,
+      "loss": 1.0451,
+      "step": 2
+    },
+    {
+      "epoch": 0.0037896731406916154,
+      "grad_norm": 1.3855836391448975,
+      "learning_rate": 7e-06,
+      "loss": 0.735,
+      "step": 3
+    },
+    {
+      "epoch": 0.005052897520922154,
+      "grad_norm": 0.3500981330871582,
+      "learning_rate": 7e-06,
+      "loss": 0.5603,
+      "step": 4
+    },
+    {
+      "epoch": 0.0063161219011526925,
+      "grad_norm": 0.40845438838005066,
+      "learning_rate": 7e-06,
+      "loss": 1.0627,
+      "step": 5
+    },
+    {
+      "epoch": 0.007579346281383231,
+      "grad_norm": 0.9035907983779907,
+      "learning_rate": 7e-06,
+      "loss": 5.0569,
+      "step": 6
+    },
+    {
+      "epoch": 0.00884257066161377,
+      "grad_norm": 0.6223624348640442,
+      "learning_rate": 7e-06,
+      "loss": 9.4706,
+      "step": 7
+    },
+    {
+      "epoch": 0.010105795041844308,
+      "grad_norm": 0.5229220390319824,
+      "learning_rate": 7e-06,
+      "loss": 5.6685,
+      "step": 8
+    },
+    {
+      "epoch": 0.011369019422074847,
+      "grad_norm": 0.5141741633415222,
+      "learning_rate": 7e-06,
+      "loss": 5.8831,
+      "step": 9
+    },
+    {
+      "epoch": 0.012632243802305385,
+      "grad_norm": 0.6420879364013672,
+      "learning_rate": 7e-06,
+      "loss": 4.6915,
+      "step": 10
+    },
+    {
+      "epoch": 0.013895468182535923,
+      "grad_norm": 0.47964179515838623,
+      "learning_rate": 7e-06,
+      "loss": 3.9531,
+      "step": 11
+    },
+    {
+      "epoch": 0.015158692562766462,
+      "grad_norm": 0.40398040413856506,
+      "learning_rate": 7e-06,
+      "loss": 1.9501,
+      "step": 12
+    },
+    {
+      "epoch": 0.016421916942996998,
+      "grad_norm": 0.5141711235046387,
+      "learning_rate": 7e-06,
+      "loss": 1.9378,
+      "step": 13
+    },
+    {
+      "epoch": 0.01768514132322754,
+      "grad_norm": 0.24602794647216797,
+      "learning_rate": 7e-06,
+      "loss": 1.535,
+      "step": 14
+    },
+    {
+      "epoch": 0.018948365703458078,
+      "grad_norm": 0.21208855509757996,
+      "learning_rate": 7e-06,
+      "loss": 1.5177,
+      "step": 15
+    },
+    {
+      "epoch": 0.020211590083688617,
+      "grad_norm": 0.6067216992378235,
+      "learning_rate": 7e-06,
+      "loss": 0.9104,
+      "step": 16
+    },
+    {
+      "epoch": 0.021474814463919155,
+      "grad_norm": 0.422442227602005,
+      "learning_rate": 7e-06,
+      "loss": 0.9266,
+      "step": 17
+    },
+    {
+      "epoch": 0.022738038844149693,
+      "grad_norm": 0.659572958946228,
+      "learning_rate": 7e-06,
+      "loss": 0.7499,
+      "step": 18
+    },
+    {
+      "epoch": 0.02400126322438023,
+      "grad_norm": 0.4817348122596741,
+      "learning_rate": 7e-06,
+      "loss": 0.7436,
+      "step": 19
+    },
+    {
+      "epoch": 0.02526448760461077,
+      "grad_norm": 0.20682591199874878,
+      "learning_rate": 7e-06,
+      "loss": 0.4786,
+      "step": 20
+    },
+    {
+      "epoch": 0.026527711984841308,
+      "grad_norm": 0.2613360583782196,
+      "learning_rate": 7e-06,
+      "loss": 0.8464,
+      "step": 21
+    },
+    {
+      "epoch": 0.027790936365071846,
+      "grad_norm": 0.2720305621623993,
+      "learning_rate": 7e-06,
+      "loss": 0.7255,
+      "step": 22
+    },
+    {
+      "epoch": 0.029054160745302385,
+      "grad_norm": 0.25043392181396484,
+      "learning_rate": 7e-06,
+      "loss": 0.9661,
+      "step": 23
+    },
+    {
+      "epoch": 0.030317385125532923,
+      "grad_norm": 0.2801963686943054,
+      "learning_rate": 7e-06,
+      "loss": 0.9602,
+      "step": 24
+    },
+    {
+      "epoch": 0.03158060950576346,
+      "grad_norm": 0.2137051522731781,
+      "learning_rate": 7e-06,
+      "loss": 0.8132,
+      "step": 25
+    },
+    {
+      "epoch": 0.032843833885993996,
+      "grad_norm": 0.13553065061569214,
+      "learning_rate": 7e-06,
+      "loss": 0.4873,
+      "step": 26
+    },
+    {
+      "epoch": 0.03410705826622454,
+      "grad_norm": 0.1350618302822113,
+      "learning_rate": 7e-06,
+      "loss": 0.5186,
+      "step": 27
+    },
+    {
+      "epoch": 0.03537028264645508,
+      "grad_norm": 0.1236298605799675,
+      "learning_rate": 7e-06,
+      "loss": 0.3401,
+      "step": 28
+    },
+    {
+      "epoch": 0.036633507026685615,
+      "grad_norm": 0.15515856444835663,
+      "learning_rate": 7e-06,
+      "loss": 1.2493,
+      "step": 29
+    },
+    {
+      "epoch": 0.037896731406916156,
+      "grad_norm": 0.09012973308563232,
+      "learning_rate": 7e-06,
+      "loss": 0.7651,
+      "step": 30
+    },
+    {
+      "epoch": 0.03915995578714669,
+      "grad_norm": 0.14378102123737335,
+      "learning_rate": 7e-06,
+      "loss": 0.4005,
+      "step": 31
+    },
+    {
+      "epoch": 0.04042318016737723,
+      "grad_norm": 0.2546883523464203,
+      "learning_rate": 7e-06,
+      "loss": 0.5304,
+      "step": 32
+    },
+    {
+      "epoch": 0.04168640454760777,
+      "grad_norm": 0.1023496687412262,
+      "learning_rate": 7e-06,
+      "loss": 0.493,
+      "step": 33
+    },
+    {
+      "epoch": 0.04294962892783831,
+      "grad_norm": 0.1719491183757782,
+      "learning_rate": 7e-06,
+      "loss": 0.3707,
+      "step": 34
+    },
+    {
+      "epoch": 0.044212853308068845,
+      "grad_norm": 0.08337250351905823,
+      "learning_rate": 7e-06,
+      "loss": 0.3143,
+      "step": 35
+    },
+    {
+      "epoch": 0.045476077688299386,
+      "grad_norm": 0.09040359407663345,
+      "learning_rate": 7e-06,
+      "loss": 0.3942,
+      "step": 36
+    },
+    {
+      "epoch": 0.04673930206852992,
+      "grad_norm": 0.10850965231657028,
+      "learning_rate": 7e-06,
+      "loss": 0.4453,
+      "step": 37
+    },
+    {
+      "epoch": 0.04800252644876046,
+      "grad_norm": 0.08887636661529541,
+      "learning_rate": 7e-06,
+      "loss": 1.2015,
+      "step": 38
+    },
+    {
+      "epoch": 0.049265750828991,
+      "grad_norm": 0.1864442229270935,
+      "learning_rate": 7e-06,
+      "loss": 0.3895,
+      "step": 39
+    },
+    {
+      "epoch": 0.05052897520922154,
+      "grad_norm": 0.175123393535614,
+      "learning_rate": 7e-06,
+      "loss": 0.3626,
+      "step": 40
+    },
+    {
+      "epoch": 0.051792199589452075,
+      "grad_norm": 0.10572918504476547,
+      "learning_rate": 7e-06,
+      "loss": 0.3335,
+      "step": 41
+    },
+    {
+      "epoch": 0.053055423969682616,
+      "grad_norm": 0.09624486416578293,
+      "learning_rate": 7e-06,
+      "loss": 0.3065,
+      "step": 42
+    },
+    {
+      "epoch": 0.05431864834991315,
+      "grad_norm": 0.13604743778705597,
+      "learning_rate": 7e-06,
+      "loss": 0.317,
+      "step": 43
+    },
+    {
+      "epoch": 0.05558187273014369,
+      "grad_norm": 0.15408551692962646,
+      "learning_rate": 7e-06,
+      "loss": 0.7709,
+      "step": 44
+    },
+    {
+      "epoch": 0.05684509711037423,
+      "grad_norm": 0.09676961600780487,
+      "learning_rate": 7e-06,
+      "loss": 0.5114,
+      "step": 45
+    },
+    {
+      "epoch": 0.05810832149060477,
+      "grad_norm": 0.11936207115650177,
+      "learning_rate": 7e-06,
+      "loss": 0.2785,
+      "step": 46
+    },
+    {
+      "epoch": 0.059371545870835304,
+      "grad_norm": 0.1744876503944397,
+      "learning_rate": 7e-06,
+      "loss": 0.2689,
+      "step": 47
+    },
+    {
+      "epoch": 0.060634770251065846,
+      "grad_norm": 0.17397810518741608,
+      "learning_rate": 7e-06,
+      "loss": 0.3316,
+      "step": 48
+    },
+    {
+      "epoch": 0.06189799463129638,
+      "grad_norm": 0.1329212635755539,
+      "learning_rate": 7e-06,
+      "loss": 0.2853,
+      "step": 49
+    },
+    {
+      "epoch": 0.06316121901152692,
+      "grad_norm": 0.09555013477802277,
+      "learning_rate": 7e-06,
+      "loss": 0.3182,
+      "step": 50
+    },
+    {
+      "epoch": 0.06442444339175746,
+      "grad_norm": 0.15529152750968933,
+      "learning_rate": 7e-06,
+      "loss": 0.5497,
+      "step": 51
+    },
+    {
+      "epoch": 0.06568766777198799,
+      "grad_norm": 0.09599810838699341,
+      "learning_rate": 7e-06,
+      "loss": 0.3102,
+      "step": 52
+    },
+    {
+      "epoch": 0.06695089215221854,
+      "grad_norm": 0.12325876951217651,
+      "learning_rate": 7e-06,
+      "loss": 0.2788,
+      "step": 53
+    },
+    {
+      "epoch": 0.06821411653244908,
+      "grad_norm": 0.2820286154747009,
+      "learning_rate": 7e-06,
+      "loss": 0.3934,
+      "step": 54
+    },
+    {
+      "epoch": 0.06947734091267961,
+      "grad_norm": 0.17912541329860687,
+      "learning_rate": 7e-06,
+      "loss": 0.3709,
+      "step": 55
+    },
+    {
+      "epoch": 0.07074056529291016,
+      "grad_norm": 0.14083553850650787,
+      "learning_rate": 7e-06,
+      "loss": 0.4105,
+      "step": 56
+    },
+    {
+      "epoch": 0.0720037896731407,
+      "grad_norm": 0.09743569046258926,
+      "learning_rate": 7e-06,
+      "loss": 0.2376,
+      "step": 57
+    },
+    {
+      "epoch": 0.07326701405337123,
+      "grad_norm": 0.10704771429300308,
+      "learning_rate": 7e-06,
+      "loss": 0.2714,
+      "step": 58
+    },
+    {
+      "epoch": 0.07453023843360176,
+      "grad_norm": 0.11463718861341476,
+      "learning_rate": 7e-06,
+      "loss": 0.456,
+      "step": 59
+    },
+    {
+      "epoch": 0.07579346281383231,
+      "grad_norm": 0.12085901200771332,
+      "learning_rate": 7e-06,
+      "loss": 0.3099,
+      "step": 60
+    },
+    {
+      "epoch": 0.07705668719406285,
+      "grad_norm": 0.10744248330593109,
+      "learning_rate": 7e-06,
+      "loss": 0.2312,
+      "step": 61
+    },
+    {
+      "epoch": 0.07831991157429338,
+      "grad_norm": 0.08374691009521484,
+      "learning_rate": 7e-06,
+      "loss": 0.2685,
+      "step": 62
+    },
+    {
+      "epoch": 0.07958313595452392,
+      "grad_norm": 0.10826320946216583,
+      "learning_rate": 7e-06,
+      "loss": 0.3069,
+      "step": 63
+    },
+    {
+      "epoch": 0.08084636033475447,
+      "grad_norm": 0.17864489555358887,
+      "learning_rate": 7e-06,
+      "loss": 0.5024,
+      "step": 64
+    },
+    {
+      "epoch": 0.082109584714985,
+      "grad_norm": 0.11988472938537598,
+      "learning_rate": 7e-06,
+      "loss": 0.3164,
+      "step": 65
+    },
+    {
+      "epoch": 0.08337280909521554,
+      "grad_norm": 0.1612488180398941,
+      "learning_rate": 7e-06,
+      "loss": 0.3295,
+      "step": 66
+    },
+    {
+      "epoch": 0.08463603347544607,
+      "grad_norm": 0.13754408061504364,
+      "learning_rate": 7e-06,
+      "loss": 0.2406,
+      "step": 67
+    },
+    {
+      "epoch": 0.08589925785567662,
+      "grad_norm": 0.11351214349269867,
+      "learning_rate": 7e-06,
+      "loss": 0.3149,
+      "step": 68
+    },
+    {
+      "epoch": 0.08716248223590715,
+      "grad_norm": 0.07585523277521133,
+      "learning_rate": 7e-06,
+      "loss": 0.2706,
+      "step": 69
+    },
+    {
+      "epoch": 0.08842570661613769,
+      "grad_norm": 0.0744984969496727,
+      "learning_rate": 7e-06,
+      "loss": 0.214,
+      "step": 70
+    },
+    {
+      "epoch": 0.08968893099636822,
+      "grad_norm": 0.2244742512702942,
+      "learning_rate": 7e-06,
+      "loss": 0.4242,
+      "step": 71
+    },
+    {
+      "epoch": 0.09095215537659877,
+      "grad_norm": 0.08662209659814835,
+      "learning_rate": 7e-06,
+      "loss": 0.2691,
+      "step": 72
+    },
+    {
+      "epoch": 0.09221537975682931,
+      "grad_norm": 0.10564761608839035,
+      "learning_rate": 7e-06,
+      "loss": 0.3228,
+      "step": 73
+    },
+    {
+      "epoch": 0.09347860413705984,
+      "grad_norm": 0.18067984282970428,
+      "learning_rate": 7e-06,
+      "loss": 0.3384,
+      "step": 74
+    },
+    {
+      "epoch": 0.09474182851729038,
+      "grad_norm": 0.07762212306261063,
+      "learning_rate": 7e-06,
+      "loss": 0.2377,
+      "step": 75
+    },
+    {
+      "epoch": 0.09600505289752093,
+      "grad_norm": 0.07793518900871277,
+      "learning_rate": 7e-06,
+      "loss": 0.248,
+      "step": 76
+    },
+    {
+      "epoch": 0.09726827727775146,
+      "grad_norm": 0.1307854801416397,
+      "learning_rate": 7e-06,
+      "loss": 0.2667,
+      "step": 77
+    },
+    {
+      "epoch": 0.098531501657982,
+      "grad_norm": 0.09771443158388138,
+      "learning_rate": 7e-06,
+      "loss": 0.318,
+      "step": 78
+    },
+    {
+      "epoch": 0.09979472603821253,
+      "grad_norm": 0.10437527298927307,
+      "learning_rate": 7e-06,
+      "loss": 0.3303,
+      "step": 79
+    },
+    {
+      "epoch": 0.10105795041844308,
+      "grad_norm": 0.11160580813884735,
+      "learning_rate": 7e-06,
+      "loss": 0.2845,
+      "step": 80
+    },
+    {
+      "epoch": 0.10232117479867361,
+      "grad_norm": 0.0809980109333992,
+      "learning_rate": 7e-06,
+      "loss": 0.2736,
+      "step": 81
+    },
+    {
+      "epoch": 0.10358439917890415,
+      "grad_norm": 0.10574865341186523,
+      "learning_rate": 7e-06,
+      "loss": 0.3012,
+      "step": 82
+    },
+    {
+      "epoch": 0.1048476235591347,
+      "grad_norm": 0.07807318866252899,
+      "learning_rate": 7e-06,
+      "loss": 0.2283,
+      "step": 83
+    },
+    {
+      "epoch": 0.10611084793936523,
+      "grad_norm": 0.10281991213560104,
+      "learning_rate": 7e-06,
+      "loss": 0.2544,
+      "step": 84
+    },
+    {
+      "epoch": 0.10737407231959577,
+      "grad_norm": 0.12749870121479034,
+      "learning_rate": 7e-06,
+      "loss": 0.2973,
+      "step": 85
+    },
+    {
+      "epoch": 0.1086372966998263,
+      "grad_norm": 0.13138003647327423,
+      "learning_rate": 7e-06,
+      "loss": 0.3399,
+      "step": 86
+    },
+    {
+      "epoch": 0.10990052108005685,
+      "grad_norm": 0.10815514624118805,
+      "learning_rate": 7e-06,
+      "loss": 0.3221,
+      "step": 87
+    },
+    {
+      "epoch": 0.11116374546028739,
+      "grad_norm": 0.13537508249282837,
+      "learning_rate": 7e-06,
+      "loss": 0.308,
+      "step": 88
+    },
+    {
+      "epoch": 0.11242696984051792,
+      "grad_norm": 0.09689060598611832,
+      "learning_rate": 7e-06,
+      "loss": 0.2511,
+      "step": 89
+    },
+    {
+      "epoch": 0.11369019422074846,
+      "grad_norm": 0.08782925456762314,
+      "learning_rate": 7e-06,
+      "loss": 0.2936,
+      "step": 90
+    },
+    {
+      "epoch": 0.114953418600979,
+      "grad_norm": 0.12655287981033325,
+      "learning_rate": 7e-06,
+      "loss": 0.4158,
+      "step": 91
+    },
+    {
+      "epoch": 0.11621664298120954,
+      "grad_norm": 0.11866717785596848,
+      "learning_rate": 7e-06,
+      "loss": 0.4059,
+      "step": 92
+    },
+    {
+      "epoch": 0.11747986736144007,
+      "grad_norm": 0.12691305577754974,
+      "learning_rate": 7e-06,
+      "loss": 0.2453,
+      "step": 93
+    },
+    {
+      "epoch": 0.11874309174167061,
+      "grad_norm": 0.11844722181558609,
+      "learning_rate": 7e-06,
+      "loss": 0.2249,
+      "step": 94
+    },
+    {
+      "epoch": 0.12000631612190116,
+      "grad_norm": 0.07606595754623413,
+      "learning_rate": 7e-06,
+      "loss": 0.2789,
+      "step": 95
+    },
+    {
+      "epoch": 0.12126954050213169,
+      "grad_norm": 0.11529266834259033,
+      "learning_rate": 7e-06,
+      "loss": 0.2654,
+      "step": 96
+    },
+    {
+      "epoch": 0.12253276488236223,
+      "grad_norm": 0.12648285925388336,
+      "learning_rate": 7e-06,
+      "loss": 0.2279,
+      "step": 97
+    },
+    {
+      "epoch": 0.12379598926259276,
+      "grad_norm": 0.1504458636045456,
+      "learning_rate": 7e-06,
+      "loss": 0.4048,
+      "step": 98
+    },
+    {
+      "epoch": 0.1250592136428233,
+      "grad_norm": 0.09578829258680344,
+      "learning_rate": 7e-06,
+      "loss": 0.2912,
+      "step": 99
+    },
+    {
+      "epoch": 0.12632243802305385,
+      "grad_norm": 0.10936733335256577,
+      "learning_rate": 7e-06,
+      "loss": 0.2644,
+      "step": 100
+    },
+    {
+      "epoch": 0.12632243802305385,
+      "eval_correct_accuracy": 0.5708227311280747,
+      "eval_error_accuracy": 0.45610085547050877,
+      "eval_f1": 0.5070537660000148,
+      "eval_loss": 0.43133699893951416,
+      "eval_runtime": 35.3366,
+      "eval_samples_per_second": 96.217,
+      "eval_steps_per_second": 6.028,
+      "step": 100
+    },
+    {
+      "epoch": 0.1275856624032844,
+      "grad_norm": 0.1165054589509964,
+      "learning_rate": 7e-06,
+      "loss": 0.2693,
+      "step": 101
+    },
+    {
+      "epoch": 0.12884888678351492,
+      "grad_norm": 0.08343573659658432,
+      "learning_rate": 7e-06,
+      "loss": 0.2388,
+      "step": 102
+    },
+    {
+      "epoch": 0.13011211116374546,
+      "grad_norm": 0.10629656910896301,
+      "learning_rate": 7e-06,
+      "loss": 0.2603,
+      "step": 103
+    },
+    {
+      "epoch": 0.13137533554397599,
+      "grad_norm": 0.07509850710630417,
+      "learning_rate": 7e-06,
+      "loss": 0.253,
+      "step": 104
+    },
+    {
+      "epoch": 0.13263855992420653,
+      "grad_norm": 0.08039335906505585,
+      "learning_rate": 7e-06,
+      "loss": 0.224,
+      "step": 105
+    },
+    {
+      "epoch": 0.13390178430443708,
+      "grad_norm": 0.10666981339454651,
+      "learning_rate": 7e-06,
+      "loss": 0.3945,
+      "step": 106
+    },
+    {
+      "epoch": 0.1351650086846676,
+      "grad_norm": 0.16490086913108826,
+      "learning_rate": 7e-06,
+      "loss": 0.3087,
+      "step": 107
+    },
+    {
+      "epoch": 0.13642823306489815,
+      "grad_norm": 0.09013114124536514,
+      "learning_rate": 7e-06,
+      "loss": 0.3355,
+      "step": 108
+    },
+    {
+      "epoch": 0.1376914574451287,
+      "grad_norm": 0.1580226719379425,
+      "learning_rate": 7e-06,
+      "loss": 0.2433,
+      "step": 109
+    },
+    {
+      "epoch": 0.13895468182535922,
+      "grad_norm": 0.09130299836397171,
+      "learning_rate": 7e-06,
+      "loss": 0.1928,
+      "step": 110
+    },
+    {
+      "epoch": 0.14021790620558977,
+      "grad_norm": 0.07702811807394028,
+      "learning_rate": 7e-06,
+      "loss": 0.2319,
+      "step": 111
+    },
+    {
+      "epoch": 0.14148113058582032,
+      "grad_norm": 0.14257381856441498,
+      "learning_rate": 7e-06,
+      "loss": 0.2496,
+      "step": 112
+    },
+    {
+      "epoch": 0.14274435496605084,
+      "grad_norm": 0.11546823382377625,
+      "learning_rate": 7e-06,
+      "loss": 0.2592,
+      "step": 113
+    },
+    {
+      "epoch": 0.1440075793462814,
+      "grad_norm": 0.12595829367637634,
+      "learning_rate": 7e-06,
+      "loss": 0.2539,
+      "step": 114
+    },
+    {
+      "epoch": 0.1452708037265119,
+      "grad_norm": 0.10172153264284134,
+      "learning_rate": 7e-06,
+      "loss": 0.2728,
+      "step": 115
+    },
+    {
+      "epoch": 0.14653402810674246,
+      "grad_norm": 0.10145121812820435,
+      "learning_rate": 7e-06,
+      "loss": 0.163,
+      "step": 116
+    },
+    {
+      "epoch": 0.147797252486973,
+      "grad_norm": 0.15631917119026184,
+      "learning_rate": 7e-06,
+      "loss": 0.2526,
+      "step": 117
+    },
+    {
+      "epoch": 0.14906047686720353,
+      "grad_norm": 0.13442394137382507,
+      "learning_rate": 7e-06,
+      "loss": 0.2591,
+      "step": 118
+    },
+    {
+      "epoch": 0.15032370124743408,
+      "grad_norm": 0.08642445504665375,
+      "learning_rate": 7e-06,
+      "loss": 0.2505,
+      "step": 119
+    },
+    {
+      "epoch": 0.15158692562766463,
+      "grad_norm": 0.13054709136486053,
+      "learning_rate": 7e-06,
+      "loss": 0.2704,
+      "step": 120
+    },
+    {
+      "epoch": 0.15285015000789515,
+      "grad_norm": 0.19653519988059998,
+      "learning_rate": 7e-06,
+      "loss": 0.2384,
+      "step": 121
+    },
+    {
+      "epoch": 0.1541133743881257,
+      "grad_norm": 0.20973946154117584,
+      "learning_rate": 7e-06,
+      "loss": 0.2385,
+      "step": 122
+    },
+    {
+      "epoch": 0.15537659876835622,
+      "grad_norm": 0.096860371530056,
+      "learning_rate": 7e-06,
+      "loss": 0.241,
+      "step": 123
+    },
+    {
+      "epoch": 0.15663982314858677,
+      "grad_norm": 0.10356521606445312,
+      "learning_rate": 7e-06,
+      "loss": 0.4737,
+      "step": 124
+    },
+    {
+      "epoch": 0.15790304752881731,
+      "grad_norm": 0.17340725660324097,
+      "learning_rate": 7e-06,
+      "loss": 0.2673,
+      "step": 125
+    },
+    {
+      "epoch": 0.15916627190904784,
+      "grad_norm": 0.3000679612159729,
+      "learning_rate": 7e-06,
+      "loss": 0.431,
+      "step": 126
+    },
+    {
+      "epoch": 0.16042949628927838,
+      "grad_norm": 0.11215244233608246,
+      "learning_rate": 7e-06,
+      "loss": 0.206,
+      "step": 127
+    },
+    {
+      "epoch": 0.16169272066950893,
+      "grad_norm": 0.07078877836465836,
+      "learning_rate": 7e-06,
+      "loss": 0.201,
+      "step": 128
+    },
+    {
+      "epoch": 0.16295594504973945,
+      "grad_norm": 0.16037459671497345,
+      "learning_rate": 7e-06,
+      "loss": 0.268,
+      "step": 129
+    },
+    {
+      "epoch": 0.16421916942997,
+      "grad_norm": 0.20243118703365326,
+      "learning_rate": 7e-06,
+      "loss": 0.2503,
+      "step": 130
+    },
+    {
+      "epoch": 0.16548239381020052,
+      "grad_norm": 0.1389663964509964,
+      "learning_rate": 7e-06,
+      "loss": 0.2517,
+      "step": 131
+    },
+    {
+      "epoch": 0.16674561819043107,
+      "grad_norm": 0.12263572961091995,
+      "learning_rate": 7e-06,
+      "loss": 0.2359,
+      "step": 132
+    },
+    {
+      "epoch": 0.16800884257066162,
+      "grad_norm": 0.14491412043571472,
+      "learning_rate": 7e-06,
+      "loss": 0.3347,
+      "step": 133
+    },
+    {
+      "epoch": 0.16927206695089214,
+      "grad_norm": 0.1378932148218155,
+      "learning_rate": 7e-06,
+      "loss": 0.2638,
+      "step": 134
+    },
+    {
+      "epoch": 0.1705352913311227,
+      "grad_norm": 0.07053989171981812,
+      "learning_rate": 7e-06,
+      "loss": 0.2299,
+      "step": 135
+    },
+    {
+      "epoch": 0.17179851571135324,
+      "grad_norm": 0.19610151648521423,
+      "learning_rate": 7e-06,
+      "loss": 0.2789,
+      "step": 136
+    },
+    {
+      "epoch": 0.17306174009158376,
+      "grad_norm": 0.1290581375360489,
+      "learning_rate": 7e-06,
+      "loss": 0.209,
+      "step": 137
+    },
+    {
+      "epoch": 0.1743249644718143,
+      "grad_norm": 0.1481819599866867,
+      "learning_rate": 7e-06,
+      "loss": 0.2723,
+      "step": 138
+    },
+    {
+      "epoch": 0.17558818885204486,
+      "grad_norm": 0.1427401453256607,
+      "learning_rate": 7e-06,
+      "loss": 0.2778,
+      "step": 139
+    },
+    {
+      "epoch": 0.17685141323227538,
+      "grad_norm": 0.0666273981332779,
+      "learning_rate": 7e-06,
+      "loss": 0.2008,
+      "step": 140
+    },
+    {
+      "epoch": 0.17811463761250593,
+      "grad_norm": 0.13182522356510162,
+      "learning_rate": 7e-06,
+      "loss": 0.2441,
+      "step": 141
+    },
+    {
+      "epoch": 0.17937786199273645,
+      "grad_norm": 0.08374546468257904,
+      "learning_rate": 7e-06,
+      "loss": 0.2603,
+      "step": 142
+    },
+    {
+      "epoch": 0.180641086372967,
+      "grad_norm": 0.10638394951820374,
+      "learning_rate": 7e-06,
+      "loss": 0.2354,
+      "step": 143
+    },
+    {
+      "epoch": 0.18190431075319755,
+      "grad_norm": 0.10801179707050323,
+      "learning_rate": 7e-06,
+      "loss": 0.2875,
+      "step": 144
+    },
+    {
+      "epoch": 0.18316753513342807,
+      "grad_norm": 0.13121351599693298,
+      "learning_rate": 7e-06,
+      "loss": 0.2304,
+      "step": 145
+    },
+    {
+      "epoch": 0.18443075951365862,
+      "grad_norm": 0.10176476836204529,
+      "learning_rate": 7e-06,
+      "loss": 0.2311,
+      "step": 146
+    },
+    {
+      "epoch": 0.18569398389388916,
+      "grad_norm": 0.10199464112520218,
+      "learning_rate": 7e-06,
+      "loss": 0.2522,
+      "step": 147
+    },
+    {
+      "epoch": 0.18695720827411969,
+      "grad_norm": 0.09650130569934845,
+      "learning_rate": 7e-06,
+      "loss": 0.2351,
+      "step": 148
+    },
+    {
+      "epoch": 0.18822043265435023,
+      "grad_norm": 0.12842021882534027,
+      "learning_rate": 7e-06,
+      "loss": 0.2244,
+      "step": 149
+    },
+    {
+      "epoch": 0.18948365703458075,
+      "grad_norm": 0.1237226277589798,
+      "learning_rate": 7e-06,
+      "loss": 0.2706,
+      "step": 150
+    },
+    {
+      "epoch": 0.1907468814148113,
+      "grad_norm": 0.12939125299453735,
+      "learning_rate": 7e-06,
+      "loss": 0.2445,
+      "step": 151
+    },
+    {
+      "epoch": 0.19201010579504185,
+      "grad_norm": 0.11460690945386887,
+      "learning_rate": 7e-06,
+      "loss": 0.2601,
+      "step": 152
+    },
+    {
+      "epoch": 0.19327333017527237,
+      "grad_norm": 0.18108275532722473,
+      "learning_rate": 7e-06,
+      "loss": 0.3465,
+      "step": 153
+    },
+    {
+      "epoch": 0.19453655455550292,
+      "grad_norm": 0.0727877989411354,
+      "learning_rate": 7e-06,
+      "loss": 0.1878,
+      "step": 154
+    },
+    {
+      "epoch": 0.19579977893573347,
+      "grad_norm": 0.12313497066497803,
+      "learning_rate": 7e-06,
+      "loss": 0.2311,
+      "step": 155
+    },
+    {
+      "epoch": 0.197063003315964,
+      "grad_norm": 0.1377153992652893,
+      "learning_rate": 7e-06,
+      "loss": 0.2573,
+      "step": 156
+    },
+    {
+      "epoch": 0.19832622769619454,
+      "grad_norm": 0.08758647739887238,
+      "learning_rate": 7e-06,
+      "loss": 0.2156,
+      "step": 157
+    },
+    {
+      "epoch": 0.19958945207642506,
+      "grad_norm": 0.11441980302333832,
+      "learning_rate": 7e-06,
+      "loss": 0.2801,
+      "step": 158
+    },
+    {
+      "epoch": 0.2008526764566556,
+      "grad_norm": 0.12151770293712616,
+      "learning_rate": 7e-06,
+      "loss": 0.242,
+      "step": 159
+    },
+    {
+      "epoch": 0.20211590083688616,
+      "grad_norm": 0.159256711602211,
+      "learning_rate": 7e-06,
+      "loss": 0.2612,
+      "step": 160
+    },
+    {
+      "epoch": 0.20337912521711668,
+      "grad_norm": 0.08577941358089447,
+      "learning_rate": 7e-06,
+      "loss": 0.2115,
+      "step": 161
+    },
+    {
+      "epoch": 0.20464234959734723,
+      "grad_norm": 0.1190810427069664,
+      "learning_rate": 7e-06,
+      "loss": 0.2434,
+      "step": 162
+    },
+    {
+      "epoch": 0.20590557397757778,
+      "grad_norm": 0.09624910354614258,
+      "learning_rate": 7e-06,
+      "loss": 0.2438,
+      "step": 163
+    },
+    {
+      "epoch": 0.2071687983578083,
+      "grad_norm": 0.16024184226989746,
+      "learning_rate": 7e-06,
+      "loss": 0.2088,
+      "step": 164
+    },
+    {
+      "epoch": 0.20843202273803885,
+      "grad_norm": 0.1891951858997345,
+      "learning_rate": 7e-06,
+      "loss": 0.2751,
+      "step": 165
+    },
+    {
+      "epoch": 0.2096952471182694,
+      "grad_norm": 0.08837898820638657,
+      "learning_rate": 7e-06,
+      "loss": 0.212,
+      "step": 166
+    },
+    {
+      "epoch": 0.21095847149849992,
+      "grad_norm": 0.0905027762055397,
+      "learning_rate": 7e-06,
+      "loss": 0.2189,
+      "step": 167
+    },
+    {
+      "epoch": 0.21222169587873047,
+      "grad_norm": 0.07917249947786331,
+      "learning_rate": 7e-06,
+      "loss": 0.2324,
+      "step": 168
+    },
+    {
+      "epoch": 0.213484920258961,
+      "grad_norm": 0.13524577021598816,
+      "learning_rate": 7e-06,
+      "loss": 0.2143,
+      "step": 169
+    },
+    {
+      "epoch": 0.21474814463919153,
+      "grad_norm": 0.13222923874855042,
+      "learning_rate": 7e-06,
+      "loss": 0.2983,
+      "step": 170
+    },
+    {
+      "epoch": 0.21601136901942208,
+      "grad_norm": 0.1525893360376358,
+      "learning_rate": 7e-06,
+      "loss": 0.2408,
+      "step": 171
+    },
+    {
+      "epoch": 0.2172745933996526,
+      "grad_norm": 0.08309401571750641,
+      "learning_rate": 7e-06,
+      "loss": 0.1722,
+      "step": 172
+    },
+    {
+      "epoch": 0.21853781777988315,
+      "grad_norm": 0.08370368182659149,
+      "learning_rate": 7e-06,
+      "loss": 0.1981,
+      "step": 173
+    },
+    {
+      "epoch": 0.2198010421601137,
+      "grad_norm": 0.11228370666503906,
+      "learning_rate": 7e-06,
+      "loss": 0.2336,
+      "step": 174
+    },
+    {
+      "epoch": 0.22106426654034422,
+      "grad_norm": 0.19010692834854126,
+      "learning_rate": 7e-06,
+      "loss": 0.3069,
+      "step": 175
+    },
+    {
+      "epoch": 0.22232749092057477,
+      "grad_norm": 0.08182361721992493,
+      "learning_rate": 7e-06,
+      "loss": 0.2549,
+      "step": 176
+    },
+    {
+      "epoch": 0.2235907153008053,
+      "grad_norm": 0.1046992763876915,
+      "learning_rate": 7e-06,
+      "loss": 0.2458,
+      "step": 177
+    },
+    {
+      "epoch": 0.22485393968103584,
+      "grad_norm": 0.11583778262138367,
+      "learning_rate": 7e-06,
+      "loss": 0.269,
+      "step": 178
+    },
+    {
+      "epoch": 0.2261171640612664,
+      "grad_norm": 0.07805290818214417,
+      "learning_rate": 7e-06,
+      "loss": 0.1784,
+      "step": 179
+    },
+    {
+      "epoch": 0.2273803884414969,
+      "grad_norm": 0.11022092401981354,
+      "learning_rate": 7e-06,
+      "loss": 0.232,
+      "step": 180
+    },
+    {
+      "epoch": 0.22864361282172746,
+      "grad_norm": 0.1311209499835968,
+      "learning_rate": 7e-06,
+      "loss": 0.2603,
+      "step": 181
+    },
+    {
+      "epoch": 0.229906837201958,
+      "grad_norm": 0.08558022975921631,
+      "learning_rate": 7e-06,
+      "loss": 0.2524,
+      "step": 182
+    },
+    {
+      "epoch": 0.23117006158218853,
+      "grad_norm": 0.0957944467663765,
+      "learning_rate": 7e-06,
+      "loss": 0.281,
+      "step": 183
+    },
+    {
+      "epoch": 0.23243328596241908,
+      "grad_norm": 0.086683489382267,
+      "learning_rate": 7e-06,
+      "loss": 0.2112,
+      "step": 184
+    },
+    {
+      "epoch": 0.2336965103426496,
+      "grad_norm": 0.09485982358455658,
+      "learning_rate": 7e-06,
+      "loss": 0.2146,
+      "step": 185
+    },
+    {
+      "epoch": 0.23495973472288015,
+      "grad_norm": 0.14843790233135223,
+      "learning_rate": 7e-06,
+      "loss": 0.2036,
+      "step": 186
+    },
+    {
+      "epoch": 0.2362229591031107,
+      "grad_norm": 0.09375383704900742,
+      "learning_rate": 7e-06,
+      "loss": 0.2386,
+      "step": 187
+    },
+    {
+      "epoch": 0.23748618348334122,
+      "grad_norm": 0.10639740526676178,
+      "learning_rate": 7e-06,
+      "loss": 0.2202,
+      "step": 188
+    },
+    {
+      "epoch": 0.23874940786357177,
+      "grad_norm": 0.10205169022083282,
+      "learning_rate": 7e-06,
+      "loss": 0.2297,
+      "step": 189
+    },
+    {
+      "epoch": 0.24001263224380232,
+      "grad_norm": 0.1138874888420105,
+      "learning_rate": 7e-06,
+      "loss": 0.2511,
+      "step": 190
+    },
+    {
+      "epoch": 0.24127585662403284,
+      "grad_norm": 0.12742598354816437,
+      "learning_rate": 7e-06,
+      "loss": 0.2247,
+      "step": 191
+    },
+    {
+      "epoch": 0.24253908100426338,
+      "grad_norm": 0.14605408906936646,
+      "learning_rate": 7e-06,
+      "loss": 0.2366,
+      "step": 192
+    },
+    {
+      "epoch": 0.24380230538449393,
+      "grad_norm": 0.10053393989801407,
+      "learning_rate": 7e-06,
+      "loss": 0.4711,
+      "step": 193
+    },
+    {
+      "epoch": 0.24506552976472445,
+      "grad_norm": 0.08829181641340256,
+      "learning_rate": 7e-06,
+      "loss": 0.1501,
+      "step": 194
+    },
+    {
+      "epoch": 0.246328754144955,
+      "grad_norm": 0.1484231799840927,
+      "learning_rate": 7e-06,
+      "loss": 0.2063,
+      "step": 195
+    },
+    {
+      "epoch": 0.24759197852518552,
+      "grad_norm": 0.17242765426635742,
+      "learning_rate": 7e-06,
+      "loss": 0.2317,
+      "step": 196
+    },
+    {
+      "epoch": 0.24885520290541607,
+      "grad_norm": 0.12016981095075607,
+      "learning_rate": 7e-06,
+      "loss": 0.2272,
+      "step": 197
+    },
+    {
+      "epoch": 0.2501184272856466,
+      "grad_norm": 0.1021333634853363,
+      "learning_rate": 7e-06,
+      "loss": 0.2402,
+      "step": 198
+    },
+    {
+      "epoch": 0.25138165166587717,
+      "grad_norm": 0.11179149895906448,
+      "learning_rate": 7e-06,
+      "loss": 0.246,
+      "step": 199
+    },
+    {
+      "epoch": 0.2526448760461077,
+      "grad_norm": 0.10811345279216766,
+      "learning_rate": 7e-06,
+      "loss": 0.2125,
+      "step": 200
+    },
+    {
+      "epoch": 0.2526448760461077,
+      "eval_correct_accuracy": 0.5988125530110263,
+      "eval_error_accuracy": 0.4524988743809095,
+      "eval_f1": 0.5154742907624302,
+      "eval_loss": 0.4177984297275543,
+      "eval_runtime": 35.0506,
+      "eval_samples_per_second": 97.003,
+      "eval_steps_per_second": 6.077,
+      "step": 200
+    },
+    {
+      "epoch": 0.2539081004263382,
+      "grad_norm": 0.12190552800893784,
+      "learning_rate": 7e-06,
+      "loss": 0.2372,
+      "step": 201
+    },
+    {
+      "epoch": 0.2551713248065688,
+      "grad_norm": 0.07629604637622833,
+      "learning_rate": 7e-06,
+      "loss": 0.1976,
+      "step": 202
+    },
+    {
+      "epoch": 0.2564345491867993,
+      "grad_norm": 0.10825781524181366,
+      "learning_rate": 7e-06,
+      "loss": 0.2169,
+      "step": 203
+    },
+    {
+      "epoch": 0.25769777356702983,
+      "grad_norm": 0.09181591868400574,
+      "learning_rate": 7e-06,
+      "loss": 0.2225,
+      "step": 204
+    },
+    {
+      "epoch": 0.2589609979472604,
+      "grad_norm": 0.1266108900308609,
+      "learning_rate": 7e-06,
+      "loss": 0.1858,
+      "step": 205
+    },
+    {
+      "epoch": 0.26022422232749093,
+      "grad_norm": 0.11106186360120773,
+      "learning_rate": 7e-06,
+      "loss": 0.2443,
+      "step": 206
+    },
+    {
+      "epoch": 0.26148744670772145,
+      "grad_norm": 0.11874532699584961,
+      "learning_rate": 7e-06,
+      "loss": 0.2224,
+      "step": 207
+    },
+    {
+      "epoch": 0.26275067108795197,
+      "grad_norm": 0.06901393085718155,
+      "learning_rate": 7e-06,
+      "loss": 0.1683,
+      "step": 208
+    },
+    {
+      "epoch": 0.26401389546818255,
+      "grad_norm": 0.1774539351463318,
+      "learning_rate": 7e-06,
+      "loss": 0.2588,
+      "step": 209
+    },
+    {
+      "epoch": 0.26527711984841307,
+      "grad_norm": 0.06564710289239883,
+      "learning_rate": 7e-06,
+      "loss": 0.1966,
+      "step": 210
+    },
+    {
+      "epoch": 0.2665403442286436,
+      "grad_norm": 0.1348266899585724,
+      "learning_rate": 7e-06,
+      "loss": 0.2094,
+      "step": 211
+    },
+    {
+      "epoch": 0.26780356860887417,
+      "grad_norm": 0.10280844569206238,
+      "learning_rate": 7e-06,
+      "loss": 0.2208,
+      "step": 212
+    },
+    {
+      "epoch": 0.2690667929891047,
+      "grad_norm": 0.09777519851922989,
+      "learning_rate": 7e-06,
+      "loss": 0.2259,
+      "step": 213
+    },
+    {
+      "epoch": 0.2703300173693352,
+      "grad_norm": 0.11480893194675446,
+      "learning_rate": 7e-06,
+      "loss": 0.2402,
+      "step": 214
+    },
+    {
+      "epoch": 0.2715932417495658,
+      "grad_norm": 0.17719541490077972,
+      "learning_rate": 7e-06,
+      "loss": 0.2692,
+      "step": 215
+    },
+    {
+      "epoch": 0.2728564661297963,
+      "grad_norm": 0.07069459557533264,
+      "learning_rate": 7e-06,
+      "loss": 0.1781,
+      "step": 216
+    },
+    {
+      "epoch": 0.2741196905100268,
+      "grad_norm": 0.06251855194568634,
+      "learning_rate": 7e-06,
+      "loss": 0.1819,
+      "step": 217
+    },
+    {
+      "epoch": 0.2753829148902574,
+      "grad_norm": 0.1753867119550705,
+      "learning_rate": 7e-06,
+      "loss": 0.2362,
+      "step": 218
+    },
+    {
+      "epoch": 0.2766461392704879,
+      "grad_norm": 0.1843274086713791,
+      "learning_rate": 7e-06,
+      "loss": 0.2638,
+      "step": 219
+    },
+    {
+      "epoch": 0.27790936365071844,
+      "grad_norm": 0.18026292324066162,
+      "learning_rate": 7e-06,
+      "loss": 0.2274,
+      "step": 220
+    },
+    {
+      "epoch": 0.279172588030949,
+      "grad_norm": 0.0640600174665451,
+      "learning_rate": 7e-06,
+      "loss": 0.3739,
+      "step": 221
+    },
+    {
+      "epoch": 0.28043581241117954,
+      "grad_norm": 0.091743104159832,
+      "learning_rate": 7e-06,
+      "loss": 0.2274,
+      "step": 222
+    },
+    {
+      "epoch": 0.28169903679141006,
+      "grad_norm": 0.10185891389846802,
+      "learning_rate": 7e-06,
+      "loss": 0.471,
+      "step": 223
+    },
+    {
+      "epoch": 0.28296226117164064,
+      "grad_norm": 0.08672218769788742,
+      "learning_rate": 7e-06,
+      "loss": 0.2171,
+      "step": 224
+    },
+    {
+      "epoch": 0.28422548555187116,
+      "grad_norm": 0.11758771538734436,
+      "learning_rate": 7e-06,
+      "loss": 0.2211,
+      "step": 225
+    },
+    {
+      "epoch": 0.2854887099321017,
+      "grad_norm": 0.07176447659730911,
+      "learning_rate": 7e-06,
+      "loss": 0.1967,
+      "step": 226
+    },
+    {
+      "epoch": 0.2867519343123322,
+      "grad_norm": 0.1037454828619957,
+      "learning_rate": 7e-06,
+      "loss": 0.2457,
+      "step": 227
+    },
+    {
+      "epoch": 0.2880151586925628,
+      "grad_norm": 0.07262658327817917,
+      "learning_rate": 7e-06,
+      "loss": 0.2026,
+      "step": 228
+    },
+    {
+      "epoch": 0.2892783830727933,
+      "grad_norm": 0.13171784579753876,
+      "learning_rate": 7e-06,
+      "loss": 0.209,
+      "step": 229
+    },
+    {
+      "epoch": 0.2905416074530238,
+      "grad_norm": 0.08208411931991577,
+      "learning_rate": 7e-06,
+      "loss": 0.1964,
+      "step": 230
+    },
+    {
+      "epoch": 0.2918048318332544,
+      "grad_norm": 0.10370495170354843,
+      "learning_rate": 7e-06,
+      "loss": 0.2202,
+      "step": 231
+    },
+    {
+      "epoch": 0.2930680562134849,
+      "grad_norm": 0.26831239461898804,
+      "learning_rate": 7e-06,
+      "loss": 0.2651,
+      "step": 232
+    },
+    {
+      "epoch": 0.29433128059371544,
+      "grad_norm": 0.12230344116687775,
+      "learning_rate": 7e-06,
+      "loss": 0.2265,
+      "step": 233
+    },
+    {
+      "epoch": 0.295594504973946,
+      "grad_norm": 0.08064734190702438,
+      "learning_rate": 7e-06,
+      "loss": 0.1711,
+      "step": 234
+    },
+    {
+      "epoch": 0.29685772935417654,
+      "grad_norm": 0.10691053420305252,
+      "learning_rate": 7e-06,
+      "loss": 0.1753,
+      "step": 235
+    },
+    {
+      "epoch": 0.29812095373440706,
+      "grad_norm": 0.08961788564920425,
+      "learning_rate": 7e-06,
+      "loss": 0.2682,
+      "step": 236
+    },
+    {
+      "epoch": 0.29938417811463763,
+      "grad_norm": 0.2417578548192978,
+      "learning_rate": 7e-06,
+      "loss": 0.2622,
+      "step": 237
+    },
+    {
+      "epoch": 0.30064740249486815,
+      "grad_norm": 0.09739197045564651,
+      "learning_rate": 7e-06,
+      "loss": 0.1747,
+      "step": 238
+    },
+    {
+      "epoch": 0.3019106268750987,
+      "grad_norm": 0.15415729582309723,
+      "learning_rate": 7e-06,
+      "loss": 0.2289,
+      "step": 239
+    },
+    {
+      "epoch": 0.30317385125532925,
+      "grad_norm": 0.08798956125974655,
+      "learning_rate": 7e-06,
+      "loss": 0.2076,
+      "step": 240
+    },
+    {
+      "epoch": 0.3044370756355598,
+      "grad_norm": 0.09532306343317032,
+      "learning_rate": 7e-06,
+      "loss": 0.3761,
+      "step": 241
+    },
+    {
+      "epoch": 0.3057003000157903,
+      "grad_norm": 0.06419141590595245,
+      "learning_rate": 7e-06,
+      "loss": 0.2308,
+      "step": 242
+    },
+    {
+      "epoch": 0.30696352439602087,
+      "grad_norm": 0.13766047358512878,
+      "learning_rate": 7e-06,
+      "loss": 0.2203,
+      "step": 243
+    },
+    {
+      "epoch": 0.3082267487762514,
+      "grad_norm": 0.09225375950336456,
+      "learning_rate": 7e-06,
+      "loss": 0.2023,
+      "step": 244
+    },
+    {
+      "epoch": 0.3094899731564819,
+      "grad_norm": 0.1266135275363922,
+      "learning_rate": 7e-06,
+      "loss": 0.2823,
+      "step": 245
+    },
+    {
+      "epoch": 0.31075319753671243,
+      "grad_norm": 0.17997467517852783,
+      "learning_rate": 7e-06,
+      "loss": 0.225,
+      "step": 246
+    },
+    {
+      "epoch": 0.312016421916943,
+      "grad_norm": 0.12776713073253632,
+      "learning_rate": 7e-06,
+      "loss": 0.1906,
+      "step": 247
+    },
+    {
+      "epoch": 0.31327964629717353,
+      "grad_norm": 0.14866380393505096,
+      "learning_rate": 7e-06,
+      "loss": 0.2119,
+      "step": 248
+    },
+    {
+      "epoch": 0.31454287067740405,
+      "grad_norm": 0.11824511736631393,
+      "learning_rate": 7e-06,
+      "loss": 0.2219,
+      "step": 249
+    },
+    {
+      "epoch": 0.31580609505763463,
+      "grad_norm": 0.14409460127353668,
+      "learning_rate": 7e-06,
+      "loss": 0.2116,
+      "step": 250
+    },
+    {
+      "epoch": 0.31706931943786515,
+      "grad_norm": 0.10304541140794754,
+      "learning_rate": 7e-06,
+      "loss": 0.2073,
+      "step": 251
+    },
+    {
+      "epoch": 0.31833254381809567,
+      "grad_norm": 0.09163326770067215,
+      "learning_rate": 7e-06,
+      "loss": 0.1882,
+      "step": 252
+    },
+    {
+      "epoch": 0.31959576819832625,
+      "grad_norm": 0.12692378461360931,
+      "learning_rate": 7e-06,
+      "loss": 0.2386,
+      "step": 253
+    },
+    {
+      "epoch": 0.32085899257855677,
+      "grad_norm": 0.1747879534959793,
+      "learning_rate": 7e-06,
+      "loss": 0.2054,
+      "step": 254
+    },
+    {
+      "epoch": 0.3221222169587873,
+      "grad_norm": 0.12346009910106659,
+      "learning_rate": 7e-06,
+      "loss": 0.2397,
+      "step": 255
+    },
+    {
+      "epoch": 0.32338544133901787,
+      "grad_norm": 0.1731298863887787,
+      "learning_rate": 7e-06,
+      "loss": 0.2575,
+      "step": 256
+    },
+    {
+      "epoch": 0.3246486657192484,
+      "grad_norm": 0.08011125028133392,
+      "learning_rate": 7e-06,
+      "loss": 0.215,
+      "step": 257
+    },
+    {
+      "epoch": 0.3259118900994789,
+      "grad_norm": 0.13160613179206848,
+      "learning_rate": 7e-06,
+      "loss": 0.222,
+      "step": 258
+    },
+    {
+      "epoch": 0.3271751144797095,
+      "grad_norm": 0.18522977828979492,
+      "learning_rate": 7e-06,
+      "loss": 0.2548,
+      "step": 259
+    },
+    {
+      "epoch": 0.32843833885994,
+      "grad_norm": 0.14212659001350403,
+      "learning_rate": 7e-06,
+      "loss": 0.3002,
+      "step": 260
+    },
+    {
+      "epoch": 0.3297015632401705,
+      "grad_norm": 0.13445697724819183,
+      "learning_rate": 7e-06,
+      "loss": 0.2351,
+      "step": 261
+    },
+    {
+      "epoch": 0.33096478762040105,
+      "grad_norm": 0.11636935919523239,
+      "learning_rate": 7e-06,
+      "loss": 0.2106,
+      "step": 262
+    },
+    {
+      "epoch": 0.3322280120006316,
+      "grad_norm": 0.14159604907035828,
+      "learning_rate": 7e-06,
+      "loss": 0.2531,
+      "step": 263
+    },
+    {
+      "epoch": 0.33349123638086214,
+      "grad_norm": 0.10319356620311737,
+      "learning_rate": 7e-06,
+      "loss": 0.2346,
+      "step": 264
+    },
+    {
+      "epoch": 0.33475446076109266,
+      "grad_norm": 0.09796885401010513,
+      "learning_rate": 7e-06,
+      "loss": 0.2059,
+      "step": 265
+    },
+    {
+      "epoch": 0.33601768514132324,
+      "grad_norm": 0.1082499697804451,
+      "learning_rate": 7e-06,
+      "loss": 0.1863,
+      "step": 266
+    },
+    {
+      "epoch": 0.33728090952155376,
+      "grad_norm": 0.12492396682500839,
+      "learning_rate": 7e-06,
+      "loss": 0.215,
+      "step": 267
+    },
+    {
+      "epoch": 0.3385441339017843,
+      "grad_norm": 0.06617411971092224,
+      "learning_rate": 7e-06,
+      "loss": 0.1782,
+      "step": 268
+    },
+    {
+      "epoch": 0.33980735828201486,
+      "grad_norm": 0.15060101449489594,
+      "learning_rate": 7e-06,
+      "loss": 0.3509,
+      "step": 269
+    },
+    {
+      "epoch": 0.3410705826622454,
+      "grad_norm": 0.11944282054901123,
+      "learning_rate": 7e-06,
+      "loss": 0.1862,
+      "step": 270
+    },
+    {
+      "epoch": 0.3423338070424759,
+      "grad_norm": 0.12389136850833893,
+      "learning_rate": 7e-06,
+      "loss": 0.2409,
+      "step": 271
+    },
+    {
+      "epoch": 0.3435970314227065,
+      "grad_norm": 0.11000983417034149,
+      "learning_rate": 7e-06,
+      "loss": 0.223,
+      "step": 272
+    },
+    {
+      "epoch": 0.344860255802937,
+      "grad_norm": 0.09012436866760254,
+      "learning_rate": 7e-06,
+      "loss": 0.2068,
+      "step": 273
+    },
+    {
+      "epoch": 0.3461234801831675,
+      "grad_norm": 0.27014490962028503,
+      "learning_rate": 7e-06,
+      "loss": 0.2087,
+      "step": 274
+    },
+    {
+      "epoch": 0.3473867045633981,
+      "grad_norm": 0.08035814762115479,
+      "learning_rate": 7e-06,
+      "loss": 0.262,
+      "step": 275
+    },
+    {
+      "epoch": 0.3486499289436286,
+      "grad_norm": 0.09129905700683594,
+      "learning_rate": 7e-06,
+      "loss": 0.2082,
+      "step": 276
+    },
+    {
+      "epoch": 0.34991315332385914,
+      "grad_norm": 0.11665099114179611,
+      "learning_rate": 7e-06,
+      "loss": 0.1841,
+      "step": 277
+    },
+    {
+      "epoch": 0.3511763777040897,
+      "grad_norm": 0.08812276273965836,
+      "learning_rate": 7e-06,
+      "loss": 0.2164,
+      "step": 278
+    },
+    {
+      "epoch": 0.35243960208432024,
+      "grad_norm": 0.1272403746843338,
+      "learning_rate": 7e-06,
+      "loss": 0.1927,
+      "step": 279
+    },
+    {
+      "epoch": 0.35370282646455076,
+      "grad_norm": 0.11256379634141922,
+      "learning_rate": 7e-06,
+      "loss": 0.2991,
+      "step": 280
+    },
+    {
+      "epoch": 0.3549660508447813,
+      "grad_norm": 0.15795424580574036,
+      "learning_rate": 7e-06,
+      "loss": 0.2363,
+      "step": 281
+    },
+    {
+      "epoch": 0.35622927522501185,
+      "grad_norm": 0.22632326185703278,
+      "learning_rate": 7e-06,
+      "loss": 0.2088,
+      "step": 282
+    },
+    {
+      "epoch": 0.3574924996052424,
+      "grad_norm": 0.18535131216049194,
+      "learning_rate": 7e-06,
+      "loss": 0.2746,
+      "step": 283
+    },
+    {
+      "epoch": 0.3587557239854729,
+      "grad_norm": 0.08579732477664948,
+      "learning_rate": 7e-06,
+      "loss": 0.1899,
+      "step": 284
+    },
+    {
+      "epoch": 0.3600189483657035,
+      "grad_norm": 0.10859379917383194,
+      "learning_rate": 7e-06,
+      "loss": 0.2067,
+      "step": 285
+    },
+    {
+      "epoch": 0.361282172745934,
+      "grad_norm": 0.07765299826860428,
+      "learning_rate": 7e-06,
+      "loss": 0.1761,
+      "step": 286
+    },
+    {
+      "epoch": 0.3625453971261645,
+      "grad_norm": 0.17053595185279846,
+      "learning_rate": 7e-06,
+      "loss": 0.2373,
+      "step": 287
+    },
+    {
+      "epoch": 0.3638086215063951,
+      "grad_norm": 0.09873699396848679,
+      "learning_rate": 7e-06,
+      "loss": 0.2176,
+      "step": 288
+    },
+    {
+      "epoch": 0.3650718458866256,
+      "grad_norm": 0.07418286055326462,
+      "learning_rate": 7e-06,
+      "loss": 0.1797,
+      "step": 289
+    },
+    {
+      "epoch": 0.36633507026685613,
+      "grad_norm": 0.11981359124183655,
+      "learning_rate": 7e-06,
+      "loss": 0.1988,
+      "step": 290
+    },
+    {
+      "epoch": 0.3675982946470867,
+      "grad_norm": 0.06424502283334732,
+      "learning_rate": 7e-06,
+      "loss": 0.2121,
+      "step": 291
+    },
+    {
+      "epoch": 0.36886151902731723,
+      "grad_norm": 0.09006607532501221,
+      "learning_rate": 7e-06,
+      "loss": 0.1945,
+      "step": 292
+    },
+    {
+      "epoch": 0.37012474340754775,
+      "grad_norm": 0.10973497480154037,
+      "learning_rate": 7e-06,
+      "loss": 0.2046,
+      "step": 293
+    },
+    {
+      "epoch": 0.37138796778777833,
+      "grad_norm": 0.09228470921516418,
+      "learning_rate": 7e-06,
+      "loss": 0.207,
+      "step": 294
+    },
+    {
+      "epoch": 0.37265119216800885,
+      "grad_norm": 0.10961271822452545,
+      "learning_rate": 7e-06,
+      "loss": 0.2128,
+      "step": 295
+    },
+    {
+      "epoch": 0.37391441654823937,
+      "grad_norm": 0.09072300046682358,
+      "learning_rate": 7e-06,
+      "loss": 0.4585,
+      "step": 296
+    },
+    {
+      "epoch": 0.37517764092846995,
+      "grad_norm": 0.08374742418527603,
+      "learning_rate": 7e-06,
+      "loss": 0.2178,
+      "step": 297
+    },
+    {
+      "epoch": 0.37644086530870047,
+      "grad_norm": 0.05344458296895027,
+      "learning_rate": 7e-06,
+      "loss": 0.1595,
+      "step": 298
+    },
+    {
+      "epoch": 0.377704089688931,
+      "grad_norm": 0.07841549813747406,
+      "learning_rate": 7e-06,
+      "loss": 0.2306,
+      "step": 299
+    },
+    {
+      "epoch": 0.3789673140691615,
+      "grad_norm": 0.09865035116672516,
+      "learning_rate": 7e-06,
+      "loss": 0.2274,
+      "step": 300
+    },
+    {
+      "epoch": 0.3789673140691615,
+      "eval_correct_accuracy": 0.5818490245971162,
+      "eval_error_accuracy": 0.494822152183701,
+      "eval_f1": 0.5348184158843582,
+      "eval_loss": 0.41273096203804016,
+      "eval_runtime": 35.0595,
+      "eval_samples_per_second": 96.978,
+      "eval_steps_per_second": 6.075,
+      "step": 300
+    },
+    {
+      "epoch": 0.3802305384493921,
+      "grad_norm": 0.11520479619503021,
+      "learning_rate": 7e-06,
+      "loss": 0.194,
+      "step": 301
+    },
+    {
+      "epoch": 0.3814937628296226,
+      "grad_norm": 0.17391149699687958,
+      "learning_rate": 7e-06,
+      "loss": 0.2053,
+      "step": 302
+    },
+    {
+      "epoch": 0.38275698720985313,
+      "grad_norm": 0.08927040547132492,
+      "learning_rate": 7e-06,
+      "loss": 0.1885,
+      "step": 303
+    },
+    {
+      "epoch": 0.3840202115900837,
+      "grad_norm": 0.10747874528169632,
+      "learning_rate": 7e-06,
+      "loss": 0.2357,
+      "step": 304
+    },
+    {
+      "epoch": 0.3852834359703142,
+      "grad_norm": 0.0821816474199295,
+      "learning_rate": 7e-06,
+      "loss": 0.2017,
+      "step": 305
+    },
+    {
+      "epoch": 0.38654666035054475,
+      "grad_norm": 0.08718965202569962,
+      "learning_rate": 7e-06,
+      "loss": 0.2333,
+      "step": 306
+    },
+    {
+      "epoch": 0.3878098847307753,
+      "grad_norm": 0.14753767848014832,
+      "learning_rate": 7e-06,
+      "loss": 0.2501,
+      "step": 307
+    },
+    {
+      "epoch": 0.38907310911100584,
+      "grad_norm": 0.12474358081817627,
+      "learning_rate": 7e-06,
+      "loss": 0.1895,
+      "step": 308
+    },
+    {
+      "epoch": 0.39033633349123636,
+      "grad_norm": 0.14409278333187103,
+      "learning_rate": 7e-06,
+      "loss": 0.208,
+      "step": 309
+    },
+    {
+      "epoch": 0.39159955787146694,
+      "grad_norm": 0.06918184459209442,
+      "learning_rate": 7e-06,
+      "loss": 0.1817,
+      "step": 310
+    },
+    {
+      "epoch": 0.39286278225169746,
+      "grad_norm": 0.08502199500799179,
+      "learning_rate": 7e-06,
+      "loss": 0.1832,
+      "step": 311
+    },
+    {
+      "epoch": 0.394126006631928,
+      "grad_norm": 0.06989938765764236,
+      "learning_rate": 7e-06,
+      "loss": 0.1741,
+      "step": 312
+    },
+    {
+      "epoch": 0.39538923101215856,
+      "grad_norm": 0.08131398260593414,
+      "learning_rate": 7e-06,
+      "loss": 0.226,
+      "step": 313
+    },
+    {
+      "epoch": 0.3966524553923891,
+      "grad_norm": 0.16150841116905212,
+      "learning_rate": 7e-06,
+      "loss": 0.2081,
+      "step": 314
+    },
+    {
+      "epoch": 0.3979156797726196,
+      "grad_norm": 0.10033854097127914,
+      "learning_rate": 7e-06,
+      "loss": 0.1757,
+      "step": 315
+    },
+    {
+      "epoch": 0.3991789041528501,
+      "grad_norm": 0.2944275438785553,
+      "learning_rate": 7e-06,
+      "loss": 0.2039,
+      "step": 316
+    },
+    {
+      "epoch": 0.4004421285330807,
+      "grad_norm": 0.09300543367862701,
+      "learning_rate": 7e-06,
+      "loss": 0.2403,
+      "step": 317
+    },
+    {
+      "epoch": 0.4017053529133112,
+      "grad_norm": 0.089630626142025,
+      "learning_rate": 7e-06,
+      "loss": 0.2457,
+      "step": 318
+    },
+    {
+      "epoch": 0.40296857729354174,
+      "grad_norm": 0.06648046523332596,
+      "learning_rate": 7e-06,
+      "loss": 0.2155,
+      "step": 319
+    },
+    {
+      "epoch": 0.4042318016737723,
+      "grad_norm": 0.18262338638305664,
+      "learning_rate": 7e-06,
+      "loss": 0.2087,
+      "step": 320
+    },
+    {
+      "epoch": 0.40549502605400284,
+      "grad_norm": 0.0919061154127121,
+      "learning_rate": 7e-06,
+      "loss": 0.2062,
+      "step": 321
+    },
+    {
+      "epoch": 0.40675825043423336,
+      "grad_norm": 0.113703154027462,
+      "learning_rate": 7e-06,
+      "loss": 0.1859,
+      "step": 322
+    },
+    {
+      "epoch": 0.40802147481446394,
+      "grad_norm": 0.20705194771289825,
+      "learning_rate": 7e-06,
+      "loss": 0.1769,
+      "step": 323
+    },
+    {
+      "epoch": 0.40928469919469446,
+      "grad_norm": 0.11209185421466827,
+      "learning_rate": 7e-06,
+      "loss": 0.1819,
+      "step": 324
+    },
+    {
+      "epoch": 0.410547923574925,
+      "grad_norm": 0.05803574621677399,
+      "learning_rate": 7e-06,
+      "loss": 0.1852,
+      "step": 325
+    },
+    {
+      "epoch": 0.41181114795515555,
+      "grad_norm": 0.16077323257923126,
+      "learning_rate": 7e-06,
+      "loss": 0.2117,
+      "step": 326
+    },
+    {
+      "epoch": 0.4130743723353861,
+      "grad_norm": 0.10078177601099014,
+      "learning_rate": 7e-06,
+      "loss": 0.193,
+      "step": 327
+    },
+    {
+      "epoch": 0.4143375967156166,
+      "grad_norm": 0.09989168494939804,
+      "learning_rate": 7e-06,
+      "loss": 0.2053,
+      "step": 328
+    },
+    {
+      "epoch": 0.4156008210958472,
+      "grad_norm": 0.13987579941749573,
+      "learning_rate": 7e-06,
+      "loss": 0.2678,
+      "step": 329
+    },
+    {
+      "epoch": 0.4168640454760777,
+      "grad_norm": 0.13039669394493103,
+      "learning_rate": 7e-06,
+      "loss": 0.1998,
+      "step": 330
+    },
+    {
+      "epoch": 0.4181272698563082,
+      "grad_norm": 0.1029522716999054,
+      "learning_rate": 7e-06,
+      "loss": 0.2337,
+      "step": 331
+    },
+    {
+      "epoch": 0.4193904942365388,
+      "grad_norm": 0.08752740174531937,
+      "learning_rate": 7e-06,
+      "loss": 0.1854,
+      "step": 332
+    },
+    {
+      "epoch": 0.4206537186167693,
+      "grad_norm": 0.07876112312078476,
+      "learning_rate": 7e-06,
+      "loss": 0.1909,
+      "step": 333
+    },
+    {
+      "epoch": 0.42191694299699983,
+      "grad_norm": 0.2126246988773346,
+      "learning_rate": 7e-06,
+      "loss": 0.2535,
+      "step": 334
+    },
+    {
+      "epoch": 0.42318016737723035,
+      "grad_norm": 0.11913909763097763,
+      "learning_rate": 7e-06,
+      "loss": 0.2184,
+      "step": 335
+    },
+    {
+      "epoch": 0.42444339175746093,
+      "grad_norm": 0.1513642817735672,
+      "learning_rate": 7e-06,
+      "loss": 0.1994,
+      "step": 336
+    },
+    {
+      "epoch": 0.42570661613769145,
+      "grad_norm": 0.1306588500738144,
+      "learning_rate": 7e-06,
+      "loss": 0.2126,
+      "step": 337
+    },
+    {
+      "epoch": 0.426969840517922,
+      "grad_norm": 0.1171175092458725,
+      "learning_rate": 7e-06,
+      "loss": 0.1994,
+      "step": 338
+    },
+    {
+      "epoch": 0.42823306489815255,
+      "grad_norm": 0.05895727127790451,
+      "learning_rate": 7e-06,
+      "loss": 0.155,
+      "step": 339
+    },
+    {
+      "epoch": 0.42949628927838307,
+      "grad_norm": 0.08570288121700287,
+      "learning_rate": 7e-06,
+      "loss": 0.1986,
+      "step": 340
+    },
+    {
+      "epoch": 0.4307595136586136,
+      "grad_norm": 0.0765470489859581,
+      "learning_rate": 7e-06,
+      "loss": 0.1696,
+      "step": 341
+    },
+    {
+      "epoch": 0.43202273803884417,
+      "grad_norm": 0.08286664634943008,
+      "learning_rate": 7e-06,
+      "loss": 0.1626,
+      "step": 342
+    },
+    {
+      "epoch": 0.4332859624190747,
+      "grad_norm": 0.2282284051179886,
+      "learning_rate": 7e-06,
+      "loss": 0.1877,
+      "step": 343
+    },
+    {
+      "epoch": 0.4345491867993052,
+      "grad_norm": 0.11943413317203522,
+      "learning_rate": 7e-06,
+      "loss": 0.1897,
+      "step": 344
+    },
+    {
+      "epoch": 0.4358124111795358,
+      "grad_norm": 0.10935524851083755,
+      "learning_rate": 7e-06,
+      "loss": 0.1828,
+      "step": 345
+    },
+    {
+      "epoch": 0.4370756355597663,
+      "grad_norm": 0.07996437698602676,
+      "learning_rate": 7e-06,
+      "loss": 0.1991,
+      "step": 346
+    },
+    {
+      "epoch": 0.4383388599399968,
+      "grad_norm": 0.13088780641555786,
+      "learning_rate": 7e-06,
+      "loss": 0.3873,
+      "step": 347
+    },
+    {
+      "epoch": 0.4396020843202274,
+      "grad_norm": 0.15082432329654694,
+      "learning_rate": 7e-06,
+      "loss": 0.2091,
+      "step": 348
+    },
+    {
+      "epoch": 0.4408653087004579,
+      "grad_norm": 0.132376566529274,
+      "learning_rate": 7e-06,
+      "loss": 0.2471,
+      "step": 349
+    },
+    {
+      "epoch": 0.44212853308068845,
+      "grad_norm": 0.07796452194452286,
+      "learning_rate": 7e-06,
+      "loss": 0.1751,
+      "step": 350
+    },
+    {
+      "epoch": 0.443391757460919,
+      "grad_norm": 0.12849055230617523,
+      "learning_rate": 7e-06,
+      "loss": 0.3155,
+      "step": 351
+    },
+    {
+      "epoch": 0.44465498184114954,
+      "grad_norm": 0.06422396749258041,
+      "learning_rate": 7e-06,
+      "loss": 0.1486,
+      "step": 352
+    },
+    {
+      "epoch": 0.44591820622138006,
+      "grad_norm": 0.1800646334886551,
+      "learning_rate": 7e-06,
+      "loss": 0.1993,
+      "step": 353
+    },
+    {
+      "epoch": 0.4471814306016106,
+      "grad_norm": 0.15747664868831635,
+      "learning_rate": 7e-06,
+      "loss": 0.209,
+      "step": 354
+    },
+    {
+      "epoch": 0.44844465498184116,
+      "grad_norm": 0.11023043096065521,
+      "learning_rate": 7e-06,
+      "loss": 0.21,
+      "step": 355
+    },
+    {
+      "epoch": 0.4497078793620717,
+      "grad_norm": 0.0927424430847168,
+      "learning_rate": 7e-06,
+      "loss": 0.1745,
+      "step": 356
+    },
+    {
+      "epoch": 0.4509711037423022,
+      "grad_norm": 0.08278126269578934,
+      "learning_rate": 7e-06,
+      "loss": 0.3105,
+      "step": 357
+    },
+    {
+      "epoch": 0.4522343281225328,
+      "grad_norm": 0.08794251829385757,
+      "learning_rate": 7e-06,
+      "loss": 0.1979,
+      "step": 358
+    },
+    {
+      "epoch": 0.4534975525027633,
+      "grad_norm": 0.11653570830821991,
+      "learning_rate": 7e-06,
+      "loss": 0.1828,
+      "step": 359
+    },
+    {
+      "epoch": 0.4547607768829938,
+      "grad_norm": 0.11114069074392319,
+      "learning_rate": 7e-06,
+      "loss": 0.1826,
+      "step": 360
+    },
+    {
+      "epoch": 0.4560240012632244,
+      "grad_norm": 0.2608173191547394,
+      "learning_rate": 7e-06,
+      "loss": 0.2304,
+      "step": 361
+    },
+    {
+      "epoch": 0.4572872256434549,
+      "grad_norm": 0.08441725373268127,
+      "learning_rate": 7e-06,
+      "loss": 0.1757,
+      "step": 362
+    },
+    {
+      "epoch": 0.45855045002368544,
+      "grad_norm": 0.10891429334878922,
+      "learning_rate": 7e-06,
+      "loss": 0.2122,
+      "step": 363
+    },
+    {
+      "epoch": 0.459813674403916,
+      "grad_norm": 0.07106776535511017,
+      "learning_rate": 7e-06,
+      "loss": 0.1721,
+      "step": 364
+    },
+    {
+      "epoch": 0.46107689878414654,
+      "grad_norm": 0.08842181414365768,
+      "learning_rate": 7e-06,
+      "loss": 0.2226,
+      "step": 365
+    },
+    {
+      "epoch": 0.46234012316437706,
+      "grad_norm": 0.0870131179690361,
+      "learning_rate": 7e-06,
+      "loss": 0.2474,
+      "step": 366
+    },
+    {
+      "epoch": 0.46360334754460764,
+      "grad_norm": 0.14521507918834686,
+      "learning_rate": 7e-06,
+      "loss": 0.2615,
+      "step": 367
+    },
+    {
+      "epoch": 0.46486657192483816,
+      "grad_norm": 0.09553767740726471,
+      "learning_rate": 7e-06,
+      "loss": 0.1791,
+      "step": 368
+    },
+    {
+      "epoch": 0.4661297963050687,
+      "grad_norm": 0.11010967195034027,
+      "learning_rate": 7e-06,
+      "loss": 0.1874,
+      "step": 369
+    },
+    {
+      "epoch": 0.4673930206852992,
+      "grad_norm": 0.09533923864364624,
+      "learning_rate": 7e-06,
+      "loss": 0.228,
+      "step": 370
+    },
+    {
+      "epoch": 0.4686562450655298,
+      "grad_norm": 0.0890774354338646,
+      "learning_rate": 7e-06,
+      "loss": 0.2345,
+      "step": 371
+    },
+    {
+      "epoch": 0.4699194694457603,
+      "grad_norm": 0.12173017859458923,
+      "learning_rate": 7e-06,
+      "loss": 0.2082,
+      "step": 372
+    },
+    {
+      "epoch": 0.4711826938259908,
+      "grad_norm": 0.0602993369102478,
+      "learning_rate": 7e-06,
+      "loss": 0.1893,
+      "step": 373
+    },
+    {
+      "epoch": 0.4724459182062214,
+      "grad_norm": 0.13122287392616272,
+      "learning_rate": 7e-06,
+      "loss": 0.2178,
+      "step": 374
+    },
+    {
+      "epoch": 0.4737091425864519,
+      "grad_norm": 0.07299527525901794,
+      "learning_rate": 7e-06,
+      "loss": 0.1888,
+      "step": 375
+    },
+    {
+      "epoch": 0.47497236696668244,
+      "grad_norm": 0.08244926482439041,
+      "learning_rate": 7e-06,
+      "loss": 0.174,
+      "step": 376
+    },
+    {
+      "epoch": 0.476235591346913,
+      "grad_norm": 0.08397851884365082,
+      "learning_rate": 7e-06,
+      "loss": 0.2108,
+      "step": 377
+    },
+    {
+      "epoch": 0.47749881572714353,
+      "grad_norm": 0.07320383936166763,
+      "learning_rate": 7e-06,
+      "loss": 0.1804,
+      "step": 378
+    },
+    {
+      "epoch": 0.47876204010737405,
+      "grad_norm": 0.0849589854478836,
+      "learning_rate": 7e-06,
+      "loss": 0.1829,
+      "step": 379
+    },
+    {
+      "epoch": 0.48002526448760463,
+      "grad_norm": 0.10207744687795639,
+      "learning_rate": 7e-06,
+      "loss": 0.2174,
+      "step": 380
+    },
+    {
+      "epoch": 0.48128848886783515,
+      "grad_norm": 0.07175120711326599,
+      "learning_rate": 7e-06,
+      "loss": 0.1847,
+      "step": 381
+    },
+    {
+      "epoch": 0.4825517132480657,
+      "grad_norm": 0.10446271300315857,
+      "learning_rate": 7e-06,
+      "loss": 0.216,
+      "step": 382
+    },
+    {
+      "epoch": 0.48381493762829625,
+      "grad_norm": 0.23799718916416168,
+      "learning_rate": 7e-06,
+      "loss": 0.2136,
+      "step": 383
+    },
+    {
+      "epoch": 0.48507816200852677,
+      "grad_norm": 0.11531874537467957,
+      "learning_rate": 7e-06,
+      "loss": 0.2357,
+      "step": 384
+    },
+    {
+      "epoch": 0.4863413863887573,
+      "grad_norm": 0.10034700483083725,
+      "learning_rate": 7e-06,
+      "loss": 0.2258,
+      "step": 385
+    },
+    {
+      "epoch": 0.48760461076898787,
+      "grad_norm": 0.0934348776936531,
+      "learning_rate": 7e-06,
+      "loss": 0.284,
+      "step": 386
+    },
+    {
+      "epoch": 0.4888678351492184,
+      "grad_norm": 0.165315181016922,
+      "learning_rate": 7e-06,
+      "loss": 0.2264,
+      "step": 387
+    },
+    {
+      "epoch": 0.4901310595294489,
+      "grad_norm": 0.1086471751332283,
+      "learning_rate": 7e-06,
+      "loss": 0.2028,
+      "step": 388
+    },
+    {
+      "epoch": 0.49139428390967943,
+      "grad_norm": 0.14764176309108734,
+      "learning_rate": 7e-06,
+      "loss": 0.1775,
+      "step": 389
+    },
+    {
+      "epoch": 0.49265750828991,
+      "grad_norm": 0.21734580397605896,
+      "learning_rate": 7e-06,
+      "loss": 0.1924,
+      "step": 390
+    },
+    {
+      "epoch": 0.4939207326701405,
+      "grad_norm": 0.0923137441277504,
+      "learning_rate": 7e-06,
+      "loss": 0.2031,
+      "step": 391
+    },
+    {
+      "epoch": 0.49518395705037105,
+      "grad_norm": 0.06933951377868652,
+      "learning_rate": 7e-06,
+      "loss": 0.1739,
+      "step": 392
+    },
+    {
+      "epoch": 0.4964471814306016,
+      "grad_norm": 0.0930216833949089,
+      "learning_rate": 7e-06,
+      "loss": 0.2083,
+      "step": 393
+    },
+    {
+      "epoch": 0.49771040581083215,
+      "grad_norm": 0.08797884732484818,
+      "learning_rate": 7e-06,
+      "loss": 0.2636,
+      "step": 394
+    },
+    {
+      "epoch": 0.49897363019106267,
+      "grad_norm": 0.0919070690870285,
+      "learning_rate": 7e-06,
+      "loss": 0.2154,
+      "step": 395
+    },
+    {
+      "epoch": 0.5002368545712932,
+      "grad_norm": 0.07787168025970459,
+      "learning_rate": 7e-06,
+      "loss": 0.2207,
+      "step": 396
+    },
+    {
+      "epoch": 0.5015000789515237,
+      "grad_norm": 0.11572758853435516,
+      "learning_rate": 7e-06,
+      "loss": 0.2002,
+      "step": 397
+    },
+    {
+      "epoch": 0.5027633033317543,
+      "grad_norm": 0.08295108377933502,
+      "learning_rate": 7e-06,
+      "loss": 0.2614,
+      "step": 398
+    },
+    {
+      "epoch": 0.5040265277119849,
+      "grad_norm": 0.0625801831483841,
+      "learning_rate": 7e-06,
+      "loss": 0.1644,
+      "step": 399
+    },
+    {
+      "epoch": 0.5052897520922154,
+      "grad_norm": 0.07405094802379608,
+      "learning_rate": 7e-06,
+      "loss": 0.2234,
+      "step": 400
+    },
+    {
+      "epoch": 0.5052897520922154,
+      "eval_correct_accuracy": 0.5852417302798982,
+      "eval_error_accuracy": 0.4709590274651058,
+      "eval_f1": 0.5219175883059916,
+      "eval_loss": 0.4229665994644165,
+      "eval_runtime": 35.2003,
+      "eval_samples_per_second": 96.59,
+      "eval_steps_per_second": 6.051,
+      "step": 400
+    },
+    {
+      "epoch": 0.5065529764724459,
+      "grad_norm": 0.09175197780132294,
+      "learning_rate": 7e-06,
+      "loss": 0.2027,
+      "step": 401
+    },
+    {
+      "epoch": 0.5078162008526764,
+      "grad_norm": 0.1550239622592926,
+      "learning_rate": 7e-06,
+      "loss": 0.2559,
+      "step": 402
+    },
+    {
+      "epoch": 0.509079425232907,
+      "grad_norm": 0.139438658952713,
+      "learning_rate": 7e-06,
+      "loss": 0.1762,
+      "step": 403
+    },
+    {
+      "epoch": 0.5103426496131376,
+      "grad_norm": 0.11481575667858124,
+      "learning_rate": 7e-06,
+      "loss": 0.1623,
+      "step": 404
+    },
+    {
+      "epoch": 0.5116058739933681,
+      "grad_norm": 0.05404340475797653,
+      "learning_rate": 7e-06,
+      "loss": 0.1961,
+      "step": 405
+    },
+    {
+      "epoch": 0.5128690983735986,
+      "grad_norm": 0.14743672311306,
+      "learning_rate": 7e-06,
+      "loss": 0.2279,
+      "step": 406
+    },
+    {
+      "epoch": 0.5141323227538291,
+      "grad_norm": 0.11647465080022812,
+      "learning_rate": 7e-06,
+      "loss": 0.2001,
+      "step": 407
+    },
+    {
+      "epoch": 0.5153955471340597,
+      "grad_norm": 0.08203577995300293,
+      "learning_rate": 7e-06,
+      "loss": 0.1752,
+      "step": 408
+    },
+    {
+      "epoch": 0.5166587715142902,
+      "grad_norm": 0.11073414981365204,
+      "learning_rate": 7e-06,
+      "loss": 0.3686,
+      "step": 409
+    },
+    {
+      "epoch": 0.5179219958945208,
+      "grad_norm": 0.11331301182508469,
+      "learning_rate": 7e-06,
+      "loss": 0.3378,
+      "step": 410
+    },
+    {
+      "epoch": 0.5191852202747513,
+      "grad_norm": 0.09435959905385971,
+      "learning_rate": 7e-06,
+      "loss": 0.1562,
+      "step": 411
+    },
+    {
+      "epoch": 0.5204484446549819,
+      "grad_norm": 0.08365237712860107,
+      "learning_rate": 7e-06,
+      "loss": 0.1926,
+      "step": 412
+    },
+    {
+      "epoch": 0.5217116690352124,
+      "grad_norm": 0.08092326670885086,
+      "learning_rate": 7e-06,
+      "loss": 0.1931,
+      "step": 413
+    },
+    {
+      "epoch": 0.5229748934154429,
+      "grad_norm": 0.07763849943876266,
+      "learning_rate": 7e-06,
+      "loss": 0.2038,
+      "step": 414
+    },
+    {
+      "epoch": 0.5242381177956734,
+      "grad_norm": 0.1350603550672531,
+      "learning_rate": 7e-06,
+      "loss": 0.2392,
+      "step": 415
+    },
+    {
+      "epoch": 0.5255013421759039,
+      "grad_norm": 0.10287491232156754,
+      "learning_rate": 7e-06,
+      "loss": 0.196,
+      "step": 416
+    },
+    {
+      "epoch": 0.5267645665561346,
+      "grad_norm": 0.0719987079501152,
+      "learning_rate": 7e-06,
+      "loss": 0.2016,
+      "step": 417
+    },
+    {
+      "epoch": 0.5280277909363651,
+      "grad_norm": 0.22227227687835693,
+      "learning_rate": 7e-06,
+      "loss": 0.2131,
+      "step": 418
+    },
+    {
+      "epoch": 0.5292910153165956,
+      "grad_norm": 0.06136275455355644,
+      "learning_rate": 7e-06,
+      "loss": 0.2044,
+      "step": 419
+    },
+    {
+      "epoch": 0.5305542396968261,
+      "grad_norm": 0.0627446323633194,
+      "learning_rate": 7e-06,
+      "loss": 0.1793,
+      "step": 420
+    },
+    {
+      "epoch": 0.5318174640770567,
+      "grad_norm": 0.20960237085819244,
+      "learning_rate": 7e-06,
+      "loss": 0.1982,
+      "step": 421
+    },
+    {
+      "epoch": 0.5330806884572872,
+      "grad_norm": 0.11971580237150192,
+      "learning_rate": 7e-06,
+      "loss": 0.2587,
+      "step": 422
+    },
+    {
+      "epoch": 0.5343439128375178,
+      "grad_norm": 0.0932474359869957,
+      "learning_rate": 7e-06,
+      "loss": 0.1947,
+      "step": 423
+    },
+    {
+      "epoch": 0.5356071372177483,
+      "grad_norm": 0.09686949849128723,
+      "learning_rate": 7e-06,
+      "loss": 0.1907,
+      "step": 424
+    },
+    {
+      "epoch": 0.5368703615979789,
+      "grad_norm": 0.07940957695245743,
+      "learning_rate": 7e-06,
+      "loss": 0.2535,
+      "step": 425
+    },
+    {
+      "epoch": 0.5381335859782094,
+      "grad_norm": 0.09676375240087509,
+      "learning_rate": 7e-06,
+      "loss": 0.2038,
+      "step": 426
+    },
+    {
+      "epoch": 0.5393968103584399,
+      "grad_norm": 0.09989267587661743,
+      "learning_rate": 7e-06,
+      "loss": 0.1976,
+      "step": 427
+    },
+    {
+      "epoch": 0.5406600347386704,
+      "grad_norm": 0.0823327898979187,
+      "learning_rate": 7e-06,
+      "loss": 0.1708,
+      "step": 428
+    },
+    {
+      "epoch": 0.541923259118901,
+      "grad_norm": 0.10633084177970886,
+      "learning_rate": 7e-06,
+      "loss": 0.1619,
+      "step": 429
+    },
+    {
+      "epoch": 0.5431864834991316,
+      "grad_norm": 0.08448205143213272,
+      "learning_rate": 7e-06,
+      "loss": 0.1854,
+      "step": 430
+    },
+    {
+      "epoch": 0.5444497078793621,
+      "grad_norm": 0.07697522640228271,
+      "learning_rate": 7e-06,
+      "loss": 0.188,
+      "step": 431
+    },
+    {
+      "epoch": 0.5457129322595926,
+      "grad_norm": 0.1970750391483307,
+      "learning_rate": 7e-06,
+      "loss": 0.2479,
+      "step": 432
+    },
+    {
+      "epoch": 0.5469761566398231,
+      "grad_norm": 0.08660274744033813,
+      "learning_rate": 7e-06,
+      "loss": 0.1536,
+      "step": 433
+    },
+    {
+      "epoch": 0.5482393810200537,
+      "grad_norm": 0.0931171253323555,
+      "learning_rate": 7e-06,
+      "loss": 0.2169,
+      "step": 434
+    },
+    {
+      "epoch": 0.5495026054002842,
+      "grad_norm": 0.07317376136779785,
+      "learning_rate": 7e-06,
+      "loss": 0.1941,
+      "step": 435
+    },
+    {
+      "epoch": 0.5507658297805148,
+      "grad_norm": 0.07506151497364044,
+      "learning_rate": 7e-06,
+      "loss": 0.1761,
+      "step": 436
+    },
+    {
+      "epoch": 0.5520290541607453,
+      "grad_norm": 0.059854380786418915,
+      "learning_rate": 7e-06,
+      "loss": 0.3068,
+      "step": 437
+    },
+    {
+      "epoch": 0.5532922785409758,
+      "grad_norm": 0.1609865128993988,
+      "learning_rate": 7e-06,
+      "loss": 0.1857,
+      "step": 438
+    },
+    {
+      "epoch": 0.5545555029212064,
+      "grad_norm": 0.08996118605136871,
+      "learning_rate": 7e-06,
+      "loss": 0.1805,
+      "step": 439
+    },
+    {
+      "epoch": 0.5558187273014369,
+      "grad_norm": 0.12089324742555618,
+      "learning_rate": 7e-06,
+      "loss": 0.1826,
+      "step": 440
+    },
+    {
+      "epoch": 0.5570819516816674,
+      "grad_norm": 0.08772964775562286,
+      "learning_rate": 7e-06,
+      "loss": 0.1791,
+      "step": 441
+    },
+    {
+      "epoch": 0.558345176061898,
+      "grad_norm": 0.10977458208799362,
+      "learning_rate": 7e-06,
+      "loss": 0.2084,
+      "step": 442
+    },
+    {
+      "epoch": 0.5596084004421286,
+      "grad_norm": 0.09188458323478699,
+      "learning_rate": 7e-06,
+      "loss": 0.3045,
+      "step": 443
+    },
+    {
+      "epoch": 0.5608716248223591,
+      "grad_norm": 0.07033522427082062,
+      "learning_rate": 7e-06,
+      "loss": 0.1926,
+      "step": 444
+    },
+    {
+      "epoch": 0.5621348492025896,
+      "grad_norm": 0.0652671530842781,
+      "learning_rate": 7e-06,
+      "loss": 0.1998,
+      "step": 445
+    },
+    {
+      "epoch": 0.5633980735828201,
+      "grad_norm": 0.07860173285007477,
+      "learning_rate": 7e-06,
+      "loss": 0.2172,
+      "step": 446
+    },
+    {
+      "epoch": 0.5646612979630506,
+      "grad_norm": 0.0679745227098465,
+      "learning_rate": 7e-06,
+      "loss": 0.179,
+      "step": 447
+    },
+    {
+      "epoch": 0.5659245223432813,
+      "grad_norm": 0.10545714199542999,
+      "learning_rate": 7e-06,
+      "loss": 0.1547,
+      "step": 448
+    },
+    {
+      "epoch": 0.5671877467235118,
+      "grad_norm": 0.07516340911388397,
+      "learning_rate": 7e-06,
+      "loss": 0.1643,
+      "step": 449
+    },
+    {
+      "epoch": 0.5684509711037423,
+      "grad_norm": 0.06046690791845322,
+      "learning_rate": 7e-06,
+      "loss": 0.1624,
+      "step": 450
+    },
+    {
+      "epoch": 0.5697141954839728,
+      "grad_norm": 0.07889428734779358,
+      "learning_rate": 7e-06,
+      "loss": 0.1808,
+      "step": 451
+    },
+    {
+      "epoch": 0.5709774198642034,
+      "grad_norm": 0.08698045462369919,
+      "learning_rate": 7e-06,
+      "loss": 0.2257,
+      "step": 452
+    },
+    {
+      "epoch": 0.5722406442444339,
+      "grad_norm": 0.08498376607894897,
+      "learning_rate": 7e-06,
+      "loss": 0.1773,
+      "step": 453
+    },
+    {
+      "epoch": 0.5735038686246644,
+      "grad_norm": 0.12781842052936554,
+      "learning_rate": 7e-06,
+      "loss": 0.258,
+      "step": 454
+    },
+    {
+      "epoch": 0.574767093004895,
+      "grad_norm": 0.11314232647418976,
+      "learning_rate": 7e-06,
+      "loss": 0.1608,
+      "step": 455
+    },
+    {
+      "epoch": 0.5760303173851256,
+      "grad_norm": 0.3507859408855438,
+      "learning_rate": 7e-06,
+      "loss": 0.1656,
+      "step": 456
+    },
+    {
+      "epoch": 0.5772935417653561,
+      "grad_norm": 0.08430968970060349,
+      "learning_rate": 7e-06,
+      "loss": 0.201,
+      "step": 457
+    },
+    {
+      "epoch": 0.5785567661455866,
+      "grad_norm": 0.09361864626407623,
+      "learning_rate": 7e-06,
+      "loss": 0.2034,
+      "step": 458
+    },
+    {
+      "epoch": 0.5798199905258171,
+      "grad_norm": 0.09698746353387833,
+      "learning_rate": 7e-06,
+      "loss": 0.2444,
+      "step": 459
+    },
+    {
+      "epoch": 0.5810832149060476,
+      "grad_norm": 0.16023226082324982,
+      "learning_rate": 7e-06,
+      "loss": 0.19,
+      "step": 460
+    },
+    {
+      "epoch": 0.5823464392862783,
+      "grad_norm": 0.08157742023468018,
+      "learning_rate": 7e-06,
+      "loss": 0.2149,
+      "step": 461
+    },
+    {
+      "epoch": 0.5836096636665088,
+      "grad_norm": 0.09342104941606522,
+      "learning_rate": 7e-06,
+      "loss": 0.2022,
+      "step": 462
+    },
+    {
+      "epoch": 0.5848728880467393,
+      "grad_norm": 0.07538167387247086,
+      "learning_rate": 7e-06,
+      "loss": 0.2516,
+      "step": 463
+    },
+    {
+      "epoch": 0.5861361124269698,
+      "grad_norm": 0.12720584869384766,
+      "learning_rate": 7e-06,
+      "loss": 0.2208,
+      "step": 464
+    },
+    {
+      "epoch": 0.5873993368072004,
+      "grad_norm": 0.08613109588623047,
+      "learning_rate": 7e-06,
+      "loss": 0.2193,
+      "step": 465
+    },
+    {
+      "epoch": 0.5886625611874309,
+      "grad_norm": 0.08249358087778091,
+      "learning_rate": 7e-06,
+      "loss": 0.1676,
+      "step": 466
+    },
+    {
+      "epoch": 0.5899257855676615,
+      "grad_norm": 0.1288759857416153,
+      "learning_rate": 7e-06,
+      "loss": 0.3238,
+      "step": 467
+    },
+    {
+      "epoch": 0.591189009947892,
+      "grad_norm": 0.05164247751235962,
+      "learning_rate": 7e-06,
+      "loss": 0.1418,
+      "step": 468
+    },
+    {
+      "epoch": 0.5924522343281226,
+      "grad_norm": 0.0994580090045929,
+      "learning_rate": 7e-06,
+      "loss": 0.2238,
+      "step": 469
+    },
+    {
+      "epoch": 0.5937154587083531,
+      "grad_norm": 0.09253129363059998,
+      "learning_rate": 7e-06,
+      "loss": 0.2006,
+      "step": 470
+    },
+    {
+      "epoch": 0.5949786830885836,
+      "grad_norm": 0.12927457690238953,
+      "learning_rate": 7e-06,
+      "loss": 0.2027,
+      "step": 471
+    },
+    {
+      "epoch": 0.5962419074688141,
+      "grad_norm": 0.11283280700445175,
+      "learning_rate": 7e-06,
+      "loss": 0.2169,
+      "step": 472
+    },
+    {
+      "epoch": 0.5975051318490446,
+      "grad_norm": 0.174880713224411,
+      "learning_rate": 7e-06,
+      "loss": 0.1746,
+      "step": 473
+    },
+    {
+      "epoch": 0.5987683562292753,
+      "grad_norm": 0.11614017933607101,
+      "learning_rate": 7e-06,
+      "loss": 0.2968,
+      "step": 474
+    },
+    {
+      "epoch": 0.6000315806095058,
+      "grad_norm": 0.07818127423524857,
+      "learning_rate": 7e-06,
+      "loss": 0.1643,
+      "step": 475
+    },
+    {
+      "epoch": 0.6012948049897363,
+      "grad_norm": 0.08300397545099258,
+      "learning_rate": 7e-06,
+      "loss": 0.1712,
+      "step": 476
+    },
+    {
+      "epoch": 0.6025580293699668,
+      "grad_norm": 0.1014489009976387,
+      "learning_rate": 7e-06,
+      "loss": 0.2089,
+      "step": 477
+    },
+    {
+      "epoch": 0.6038212537501974,
+      "grad_norm": 0.11591055244207382,
+      "learning_rate": 7e-06,
+      "loss": 0.2231,
+      "step": 478
+    },
+    {
+      "epoch": 0.6050844781304279,
+      "grad_norm": 0.13137224316596985,
+      "learning_rate": 7e-06,
+      "loss": 0.1844,
+      "step": 479
+    },
+    {
+      "epoch": 0.6063477025106585,
+      "grad_norm": 0.09693000465631485,
+      "learning_rate": 7e-06,
+      "loss": 0.2033,
+      "step": 480
+    },
+    {
+      "epoch": 0.607610926890889,
+      "grad_norm": 0.1250012218952179,
+      "learning_rate": 7e-06,
+      "loss": 0.2219,
+      "step": 481
+    },
+    {
+      "epoch": 0.6088741512711195,
+      "grad_norm": 0.18828216195106506,
+      "learning_rate": 7e-06,
+      "loss": 0.2427,
+      "step": 482
+    },
+    {
+      "epoch": 0.6101373756513501,
+      "grad_norm": 0.11057613044977188,
+      "learning_rate": 7e-06,
+      "loss": 0.2193,
+      "step": 483
+    },
+    {
+      "epoch": 0.6114006000315806,
+      "grad_norm": 0.15523040294647217,
+      "learning_rate": 7e-06,
+      "loss": 0.1883,
+      "step": 484
+    },
+    {
+      "epoch": 0.6126638244118111,
+      "grad_norm": 0.16174635291099548,
+      "learning_rate": 7e-06,
+      "loss": 0.2067,
+      "step": 485
+    },
+    {
+      "epoch": 0.6139270487920417,
+      "grad_norm": 0.2738276422023773,
+      "learning_rate": 7e-06,
+      "loss": 0.2292,
+      "step": 486
+    },
+    {
+      "epoch": 0.6151902731722723,
+      "grad_norm": 0.05995164066553116,
+      "learning_rate": 7e-06,
+      "loss": 0.1956,
+      "step": 487
+    },
+    {
+      "epoch": 0.6164534975525028,
+      "grad_norm": 0.05519471690058708,
+      "learning_rate": 7e-06,
+      "loss": 0.1501,
+      "step": 488
+    },
+    {
+      "epoch": 0.6177167219327333,
+      "grad_norm": 0.08133929967880249,
+      "learning_rate": 7e-06,
+      "loss": 0.2224,
+      "step": 489
+    },
+    {
+      "epoch": 0.6189799463129638,
+      "grad_norm": 0.12239203602075577,
+      "learning_rate": 7e-06,
+      "loss": 0.2503,
+      "step": 490
+    },
+    {
+      "epoch": 0.6202431706931943,
+      "grad_norm": 0.11004896461963654,
+      "learning_rate": 7e-06,
+      "loss": 0.2796,
+      "step": 491
+    },
+    {
+      "epoch": 0.6215063950734249,
+      "grad_norm": 0.1722228229045868,
+      "learning_rate": 7e-06,
+      "loss": 0.1807,
+      "step": 492
+    },
+    {
+      "epoch": 0.6227696194536555,
+      "grad_norm": 0.10695190727710724,
+      "learning_rate": 7e-06,
+      "loss": 0.1782,
+      "step": 493
+    },
+    {
+      "epoch": 0.624032843833886,
+      "grad_norm": 0.08578750491142273,
+      "learning_rate": 7e-06,
+      "loss": 0.2389,
+      "step": 494
+    },
+    {
+      "epoch": 0.6252960682141165,
+      "grad_norm": 0.10670057684183121,
+      "learning_rate": 7e-06,
+      "loss": 0.1801,
+      "step": 495
+    },
+    {
+      "epoch": 0.6265592925943471,
+      "grad_norm": 0.04314388707280159,
+      "learning_rate": 7e-06,
+      "loss": 0.1366,
+      "step": 496
+    },
+    {
+      "epoch": 0.6278225169745776,
+      "grad_norm": 0.11937737464904785,
+      "learning_rate": 7e-06,
+      "loss": 0.2017,
+      "step": 497
+    },
+    {
+      "epoch": 0.6290857413548081,
+      "grad_norm": 0.08274619281291962,
+      "learning_rate": 7e-06,
+      "loss": 0.1721,
+      "step": 498
+    },
+    {
+      "epoch": 0.6303489657350387,
+      "grad_norm": 0.07380262762308121,
+      "learning_rate": 7e-06,
+      "loss": 0.1962,
+      "step": 499
+    },
+    {
+      "epoch": 0.6316121901152693,
+      "grad_norm": 0.04727354645729065,
+      "learning_rate": 7e-06,
+      "loss": 0.1509,
+      "step": 500
+    },
+    {
+      "epoch": 0.6316121901152693,
+      "eval_correct_accuracy": 0.7056827820186599,
+      "eval_error_accuracy": 0.4376407023863125,
+      "eval_f1": 0.5402416946684214,
+      "eval_loss": 0.362473726272583,
+      "eval_runtime": 35.262,
+      "eval_samples_per_second": 96.421,
+      "eval_steps_per_second": 6.04,
+      "step": 500
+    },
+    {
+      "epoch": 0.6328754144954998,
+      "grad_norm": 0.08614058047533035,
+      "learning_rate": 7e-06,
+      "loss": 0.1706,
+      "step": 501
+    },
+    {
+      "epoch": 0.6341386388757303,
+      "grad_norm": 0.06968270987272263,
+      "learning_rate": 7e-06,
+      "loss": 0.2028,
+      "step": 502
+    },
+    {
+      "epoch": 0.6354018632559608,
+      "grad_norm": 0.13758571445941925,
+      "learning_rate": 7e-06,
+      "loss": 0.2296,
+      "step": 503
+    },
+    {
+      "epoch": 0.6366650876361913,
+      "grad_norm": 0.07083171606063843,
+      "learning_rate": 7e-06,
+      "loss": 0.1807,
+      "step": 504
+    },
+    {
+      "epoch": 0.637928312016422,
+      "grad_norm": 0.06689167022705078,
+      "learning_rate": 7e-06,
+      "loss": 0.1997,
+      "step": 505
+    },
+    {
+      "epoch": 0.6391915363966525,
+      "grad_norm": 0.07969733327627182,
+      "learning_rate": 7e-06,
+      "loss": 0.1803,
+      "step": 506
+    },
+    {
+      "epoch": 0.640454760776883,
+      "grad_norm": 0.095677949488163,
+      "learning_rate": 7e-06,
+      "loss": 0.1874,
+      "step": 507
+    },
+    {
+      "epoch": 0.6417179851571135,
+      "grad_norm": 0.10759231448173523,
+      "learning_rate": 7e-06,
+      "loss": 0.1656,
+      "step": 508
+    },
+    {
+      "epoch": 0.6429812095373441,
+      "grad_norm": 0.13282425701618195,
+      "learning_rate": 7e-06,
+      "loss": 0.2538,
+      "step": 509
+    },
+    {
+      "epoch": 0.6442444339175746,
+      "grad_norm": 0.09664168953895569,
+      "learning_rate": 7e-06,
+      "loss": 0.1768,
+      "step": 510
+    },
+    {
+      "epoch": 0.6455076582978051,
+      "grad_norm": 0.11897934973239899,
+      "learning_rate": 7e-06,
+      "loss": 0.2023,
+      "step": 511
+    },
+    {
+      "epoch": 0.6467708826780357,
+      "grad_norm": 0.05450622737407684,
+      "learning_rate": 7e-06,
+      "loss": 0.1277,
+      "step": 512
+    },
+    {
+      "epoch": 0.6480341070582663,
+      "grad_norm": 0.07977665215730667,
+      "learning_rate": 7e-06,
+      "loss": 0.2231,
+      "step": 513
+    },
+    {
+      "epoch": 0.6492973314384968,
+      "grad_norm": 0.19492259621620178,
+      "learning_rate": 7e-06,
+      "loss": 0.2253,
+      "step": 514
+    },
+    {
+      "epoch": 0.6505605558187273,
+      "grad_norm": 0.09466379135847092,
+      "learning_rate": 7e-06,
+      "loss": 0.3611,
+      "step": 515
+    },
+    {
+      "epoch": 0.6518237801989578,
+      "grad_norm": 0.17244236171245575,
+      "learning_rate": 7e-06,
+      "loss": 0.2149,
+      "step": 516
+    },
+    {
+      "epoch": 0.6530870045791883,
+      "grad_norm": 0.08291974663734436,
+      "learning_rate": 7e-06,
+      "loss": 0.1848,
+      "step": 517
+    },
+    {
+      "epoch": 0.654350228959419,
+      "grad_norm": 0.06109621748328209,
+      "learning_rate": 7e-06,
+      "loss": 0.1435,
+      "step": 518
+    },
+    {
+      "epoch": 0.6556134533396495,
+      "grad_norm": 0.06171726807951927,
+      "learning_rate": 7e-06,
+      "loss": 0.1749,
+      "step": 519
+    },
+    {
+      "epoch": 0.65687667771988,
+      "grad_norm": 0.09645943343639374,
+      "learning_rate": 7e-06,
+      "loss": 0.197,
+      "step": 520
+    },
+    {
+      "epoch": 0.6581399021001105,
+      "grad_norm": 0.09050124883651733,
+      "learning_rate": 7e-06,
+      "loss": 0.1609,
+      "step": 521
+    },
+    {
+      "epoch": 0.659403126480341,
+      "grad_norm": 0.09600576758384705,
+      "learning_rate": 7e-06,
+      "loss": 0.1826,
+      "step": 522
+    },
+    {
+      "epoch": 0.6606663508605716,
+      "grad_norm": 0.1261880248785019,
+      "learning_rate": 7e-06,
+      "loss": 0.1875,
+      "step": 523
+    },
+    {
+      "epoch": 0.6619295752408021,
+      "grad_norm": 0.13587896525859833,
+      "learning_rate": 7e-06,
+      "loss": 0.1905,
+      "step": 524
+    },
+    {
+      "epoch": 0.6631927996210327,
+      "grad_norm": 0.12359704077243805,
+      "learning_rate": 7e-06,
+      "loss": 0.2087,
+      "step": 525
+    },
+    {
+      "epoch": 0.6644560240012632,
+      "grad_norm": 0.10092345625162125,
+      "learning_rate": 7e-06,
+      "loss": 0.2041,
+      "step": 526
+    },
+    {
+      "epoch": 0.6657192483814938,
+      "grad_norm": 0.12595926225185394,
+      "learning_rate": 7e-06,
+      "loss": 0.1928,
+      "step": 527
+    },
+    {
+      "epoch": 0.6669824727617243,
+      "grad_norm": 0.08753985911607742,
+      "learning_rate": 7e-06,
+      "loss": 0.1444,
+      "step": 528
+    },
+    {
+      "epoch": 0.6682456971419548,
+      "grad_norm": 0.08193645626306534,
+      "learning_rate": 7e-06,
+      "loss": 0.1545,
+      "step": 529
+    },
+    {
+      "epoch": 0.6695089215221853,
+      "grad_norm": 0.07170840352773666,
+      "learning_rate": 7e-06,
+      "loss": 0.1652,
+      "step": 530
+    },
+    {
+      "epoch": 0.670772145902416,
+      "grad_norm": 0.18759992718696594,
+      "learning_rate": 7e-06,
+      "loss": 0.232,
+      "step": 531
+    },
+    {
+      "epoch": 0.6720353702826465,
+      "grad_norm": 0.13691110908985138,
+      "learning_rate": 7e-06,
+      "loss": 0.1905,
+      "step": 532
+    },
+    {
+      "epoch": 0.673298594662877,
+      "grad_norm": 0.06453829258680344,
+      "learning_rate": 7e-06,
+      "loss": 0.2283,
+      "step": 533
+    },
+    {
+      "epoch": 0.6745618190431075,
+      "grad_norm": 0.12694236636161804,
+      "learning_rate": 7e-06,
+      "loss": 0.2013,
+      "step": 534
+    },
+    {
+      "epoch": 0.675825043423338,
+      "grad_norm": 0.06403839588165283,
+      "learning_rate": 7e-06,
+      "loss": 0.1585,
+      "step": 535
+    },
+    {
+      "epoch": 0.6770882678035686,
+      "grad_norm": 0.13636727631092072,
+      "learning_rate": 7e-06,
+      "loss": 0.2156,
+      "step": 536
+    },
+    {
+      "epoch": 0.6783514921837992,
+      "grad_norm": 0.12285730242729187,
+      "learning_rate": 7e-06,
+      "loss": 0.1967,
+      "step": 537
+    },
+    {
+      "epoch": 0.6796147165640297,
+      "grad_norm": 0.0780211091041565,
+      "learning_rate": 7e-06,
+      "loss": 0.1751,
+      "step": 538
+    },
+    {
+      "epoch": 0.6808779409442602,
+      "grad_norm": 0.09688100218772888,
+      "learning_rate": 7e-06,
+      "loss": 0.2141,
+      "step": 539
+    },
+    {
+      "epoch": 0.6821411653244908,
+      "grad_norm": 0.07864505052566528,
+      "learning_rate": 7e-06,
+      "loss": 0.2138,
+      "step": 540
+    },
+    {
+      "epoch": 0.6834043897047213,
+      "grad_norm": 0.060981281101703644,
+      "learning_rate": 7e-06,
+      "loss": 0.187,
+      "step": 541
+    },
+    {
+      "epoch": 0.6846676140849518,
+      "grad_norm": 0.06510937958955765,
+      "learning_rate": 7e-06,
+      "loss": 0.1771,
+      "step": 542
+    },
+    {
+      "epoch": 0.6859308384651823,
+      "grad_norm": 0.07638704031705856,
+      "learning_rate": 7e-06,
+      "loss": 0.2129,
+      "step": 543
+    },
+    {
+      "epoch": 0.687194062845413,
+      "grad_norm": 0.11518476903438568,
+      "learning_rate": 7e-06,
+      "loss": 0.1854,
+      "step": 544
+    },
+    {
+      "epoch": 0.6884572872256435,
+      "grad_norm": 0.06868738681077957,
+      "learning_rate": 7e-06,
+      "loss": 0.1581,
+      "step": 545
+    },
+    {
+      "epoch": 0.689720511605874,
+      "grad_norm": 0.09059899300336838,
+      "learning_rate": 7e-06,
+      "loss": 0.2245,
+      "step": 546
+    },
+    {
+      "epoch": 0.6909837359861045,
+      "grad_norm": 0.06422233581542969,
+      "learning_rate": 7e-06,
+      "loss": 0.1559,
+      "step": 547
+    },
+    {
+      "epoch": 0.692246960366335,
+      "grad_norm": 0.10189103335142136,
+      "learning_rate": 7e-06,
+      "loss": 0.193,
+      "step": 548
+    },
+    {
+      "epoch": 0.6935101847465656,
+      "grad_norm": 0.08199501782655716,
+      "learning_rate": 7e-06,
+      "loss": 0.1908,
+      "step": 549
+    },
+    {
+      "epoch": 0.6947734091267962,
+      "grad_norm": 0.20546898245811462,
+      "learning_rate": 7e-06,
+      "loss": 0.2011,
+      "step": 550
+    },
+    {
+      "epoch": 0.6960366335070267,
+      "grad_norm": 0.14664340019226074,
+      "learning_rate": 7e-06,
+      "loss": 0.1892,
+      "step": 551
+    },
+    {
+      "epoch": 0.6972998578872572,
+      "grad_norm": 0.08695843815803528,
+      "learning_rate": 7e-06,
+      "loss": 0.1871,
+      "step": 552
+    },
+    {
+      "epoch": 0.6985630822674878,
+      "grad_norm": 0.08112246543169022,
+      "learning_rate": 7e-06,
+      "loss": 0.1616,
+      "step": 553
+    },
+    {
+      "epoch": 0.6998263066477183,
+      "grad_norm": 0.08381661772727966,
+      "learning_rate": 7e-06,
+      "loss": 0.2231,
+      "step": 554
+    },
+    {
+      "epoch": 0.7010895310279488,
+      "grad_norm": 0.09177428483963013,
+      "learning_rate": 7e-06,
+      "loss": 0.1956,
+      "step": 555
+    },
+    {
+      "epoch": 0.7023527554081794,
+      "grad_norm": 0.08766631782054901,
+      "learning_rate": 7e-06,
+      "loss": 0.1871,
+      "step": 556
+    },
+    {
+      "epoch": 0.70361597978841,
+      "grad_norm": 0.07755694538354874,
+      "learning_rate": 7e-06,
+      "loss": 0.2718,
+      "step": 557
+    },
+    {
+      "epoch": 0.7048792041686405,
+      "grad_norm": 0.08710070699453354,
+      "learning_rate": 7e-06,
+      "loss": 0.1909,
+      "step": 558
+    },
+    {
+      "epoch": 0.706142428548871,
+      "grad_norm": 0.07648595422506332,
+      "learning_rate": 7e-06,
+      "loss": 0.1948,
+      "step": 559
+    },
+    {
+      "epoch": 0.7074056529291015,
+      "grad_norm": 0.10871299356222153,
+      "learning_rate": 7e-06,
+      "loss": 0.2093,
+      "step": 560
+    },
+    {
+      "epoch": 0.708668877309332,
+      "grad_norm": 0.07032714784145355,
+      "learning_rate": 7e-06,
+      "loss": 0.1699,
+      "step": 561
+    },
+    {
+      "epoch": 0.7099321016895626,
+      "grad_norm": 0.0873897522687912,
+      "learning_rate": 7e-06,
+      "loss": 0.1372,
+      "step": 562
+    },
+    {
+      "epoch": 0.7111953260697932,
+      "grad_norm": 0.07188841700553894,
+      "learning_rate": 7e-06,
+      "loss": 0.1794,
+      "step": 563
+    },
+    {
+      "epoch": 0.7124585504500237,
+      "grad_norm": 0.07733464986085892,
+      "learning_rate": 7e-06,
+      "loss": 0.2043,
+      "step": 564
+    },
+    {
+      "epoch": 0.7137217748302542,
+      "grad_norm": 0.07270821928977966,
+      "learning_rate": 7e-06,
+      "loss": 0.177,
+      "step": 565
+    },
+    {
+      "epoch": 0.7149849992104848,
+      "grad_norm": 0.1570441722869873,
+      "learning_rate": 7e-06,
+      "loss": 0.2411,
+      "step": 566
+    },
+    {
+      "epoch": 0.7162482235907153,
+      "grad_norm": 0.2707260549068451,
+      "learning_rate": 7e-06,
+      "loss": 0.2307,
+      "step": 567
+    },
+    {
+      "epoch": 0.7175114479709458,
+      "grad_norm": 0.07656281441450119,
+      "learning_rate": 7e-06,
+      "loss": 0.1759,
+      "step": 568
+    },
+    {
+      "epoch": 0.7187746723511764,
+      "grad_norm": 0.09973770380020142,
+      "learning_rate": 7e-06,
+      "loss": 0.1965,
+      "step": 569
+    },
+    {
+      "epoch": 0.720037896731407,
+      "grad_norm": 0.06791306287050247,
+      "learning_rate": 7e-06,
+      "loss": 0.1749,
+      "step": 570
+    },
+    {
+      "epoch": 0.7213011211116375,
+      "grad_norm": 0.17801041901111603,
+      "learning_rate": 7e-06,
+      "loss": 0.1941,
+      "step": 571
+    },
+    {
+      "epoch": 0.722564345491868,
+      "grad_norm": 0.18452543020248413,
+      "learning_rate": 7e-06,
+      "loss": 0.2446,
+      "step": 572
+    },
+    {
+      "epoch": 0.7238275698720985,
+      "grad_norm": 0.12178942561149597,
+      "learning_rate": 7e-06,
+      "loss": 0.1583,
+      "step": 573
+    },
+    {
+      "epoch": 0.725090794252329,
+      "grad_norm": 0.13167473673820496,
+      "learning_rate": 7e-06,
+      "loss": 0.2324,
+      "step": 574
+    },
+    {
+      "epoch": 0.7263540186325597,
+      "grad_norm": 0.05255408585071564,
+      "learning_rate": 7e-06,
+      "loss": 0.195,
+      "step": 575
+    },
+    {
+      "epoch": 0.7276172430127902,
+      "grad_norm": 0.09154222905635834,
+      "learning_rate": 7e-06,
+      "loss": 0.1878,
+      "step": 576
+    },
+    {
+      "epoch": 0.7288804673930207,
+      "grad_norm": 0.0887879729270935,
+      "learning_rate": 7e-06,
+      "loss": 0.2052,
+      "step": 577
+    },
+    {
+      "epoch": 0.7301436917732512,
+      "grad_norm": 0.1336040198802948,
+      "learning_rate": 7e-06,
+      "loss": 0.1952,
+      "step": 578
+    },
+    {
+      "epoch": 0.7314069161534817,
+      "grad_norm": 0.08207479119300842,
+      "learning_rate": 7e-06,
+      "loss": 0.185,
+      "step": 579
+    },
+    {
+      "epoch": 0.7326701405337123,
+      "grad_norm": 0.05941140279173851,
+      "learning_rate": 7e-06,
+      "loss": 0.204,
+      "step": 580
+    },
+    {
+      "epoch": 0.7339333649139428,
+      "grad_norm": 0.06899949908256531,
+      "learning_rate": 7e-06,
+      "loss": 0.1408,
+      "step": 581
+    },
+    {
+      "epoch": 0.7351965892941734,
+      "grad_norm": 0.09259360283613205,
+      "learning_rate": 7e-06,
+      "loss": 0.1823,
+      "step": 582
+    },
+    {
+      "epoch": 0.7364598136744039,
+      "grad_norm": 0.1346062421798706,
+      "learning_rate": 7e-06,
+      "loss": 0.2174,
+      "step": 583
+    },
+    {
+      "epoch": 0.7377230380546345,
+      "grad_norm": 0.1547420769929886,
+      "learning_rate": 7e-06,
+      "loss": 0.1864,
+      "step": 584
+    },
+    {
+      "epoch": 0.738986262434865,
+      "grad_norm": 0.10551164299249649,
+      "learning_rate": 7e-06,
+      "loss": 0.1554,
+      "step": 585
+    },
+    {
+      "epoch": 0.7402494868150955,
+      "grad_norm": 0.08826129138469696,
+      "learning_rate": 7e-06,
+      "loss": 0.2044,
+      "step": 586
+    },
+    {
+      "epoch": 0.741512711195326,
+      "grad_norm": 0.07170785963535309,
+      "learning_rate": 7e-06,
+      "loss": 0.1777,
+      "step": 587
+    },
+    {
+      "epoch": 0.7427759355755567,
+      "grad_norm": 0.1085812896490097,
+      "learning_rate": 7e-06,
+      "loss": 0.2822,
+      "step": 588
+    },
+    {
+      "epoch": 0.7440391599557872,
+      "grad_norm": 0.08545360714197159,
+      "learning_rate": 7e-06,
+      "loss": 0.19,
+      "step": 589
+    },
+    {
+      "epoch": 0.7453023843360177,
+      "grad_norm": 0.05576294660568237,
+      "learning_rate": 7e-06,
+      "loss": 0.1826,
+      "step": 590
+    },
+    {
+      "epoch": 0.7465656087162482,
+      "grad_norm": 0.056626636534929276,
+      "learning_rate": 7e-06,
+      "loss": 0.2191,
+      "step": 591
+    },
+    {
+      "epoch": 0.7478288330964787,
+      "grad_norm": 0.06961087882518768,
+      "learning_rate": 7e-06,
+      "loss": 0.2004,
+      "step": 592
+    },
+    {
+      "epoch": 0.7490920574767093,
+      "grad_norm": 0.09317582845687866,
+      "learning_rate": 7e-06,
+      "loss": 0.1465,
+      "step": 593
+    },
+    {
+      "epoch": 0.7503552818569399,
+      "grad_norm": 0.13993658125400543,
+      "learning_rate": 7e-06,
+      "loss": 0.1876,
+      "step": 594
+    },
+    {
+      "epoch": 0.7516185062371704,
+      "grad_norm": 0.06080286204814911,
+      "learning_rate": 7e-06,
+      "loss": 0.2012,
+      "step": 595
+    },
+    {
+      "epoch": 0.7528817306174009,
+      "grad_norm": 0.060514189302921295,
+      "learning_rate": 7e-06,
+      "loss": 0.1658,
+      "step": 596
+    },
+    {
+      "epoch": 0.7541449549976315,
+      "grad_norm": 0.09004813432693481,
+      "learning_rate": 7e-06,
+      "loss": 0.3195,
+      "step": 597
+    },
+    {
+      "epoch": 0.755408179377862,
+      "grad_norm": 0.07283802330493927,
+      "learning_rate": 7e-06,
+      "loss": 0.2021,
+      "step": 598
+    },
+    {
+      "epoch": 0.7566714037580925,
+      "grad_norm": 0.08824078738689423,
+      "learning_rate": 7e-06,
+      "loss": 0.1941,
+      "step": 599
+    },
+    {
+      "epoch": 0.757934628138323,
+      "grad_norm": 0.09339006245136261,
+      "learning_rate": 7e-06,
+      "loss": 0.4697,
+      "step": 600
+    },
+    {
+      "epoch": 0.757934628138323,
+      "eval_correct_accuracy": 0.6098388464800678,
+      "eval_error_accuracy": 0.48491670418730304,
+      "eval_f1": 0.540250366102693,
+      "eval_loss": 0.42047378420829773,
+      "eval_runtime": 35.1541,
+      "eval_samples_per_second": 96.717,
+      "eval_steps_per_second": 6.059,
+      "step": 600
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 791,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.7774891537268736e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}