| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.4287245444801715, |
| "eval_steps": 100, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007145409074669525, |
| "grad_norm": 4.4086809158325195, |
| "learning_rate": 2.3809523809523808e-06, |
| "loss": 1.0969, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01429081814933905, |
| "grad_norm": 5.687011241912842, |
| "learning_rate": 4.7619047619047615e-06, |
| "loss": 1.0795, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.021436227224008574, |
| "grad_norm": 1.976590633392334, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 0.7536, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0285816362986781, |
| "grad_norm": 3.1355409622192383, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 0.5564, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03572704537334762, |
| "grad_norm": 2.6710309982299805, |
| "learning_rate": 1.1904761904761905e-05, |
| "loss": 0.623, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04287245444801715, |
| "grad_norm": 2.8567938804626465, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 0.5322, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.050017863522686674, |
| "grad_norm": 3.4388861656188965, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.5102, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0571632725973562, |
| "grad_norm": 3.093275308609009, |
| "learning_rate": 1.9047619047619046e-05, |
| "loss": 0.568, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06430868167202572, |
| "grad_norm": 2.3798677921295166, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 0.4883, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07145409074669525, |
| "grad_norm": 2.846259117126465, |
| "learning_rate": 2.380952380952381e-05, |
| "loss": 0.417, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07145409074669525, |
| "eval_news_finetune_val_loss": 0.48679304122924805, |
| "eval_news_finetune_val_runtime": 1001.9158, |
| "eval_news_finetune_val_samples_per_second": 1.397, |
| "eval_news_finetune_val_steps_per_second": 1.397, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07859949982136477, |
| "grad_norm": 1.9387887716293335, |
| "learning_rate": 2.6190476190476192e-05, |
| "loss": 0.4595, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0857449088960343, |
| "grad_norm": 2.3232853412628174, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.4658, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09289031797070382, |
| "grad_norm": 2.813093423843384, |
| "learning_rate": 3.095238095238095e-05, |
| "loss": 0.4122, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10003572704537335, |
| "grad_norm": 1.9588465690612793, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.4878, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10718113612004287, |
| "grad_norm": 1.4838117361068726, |
| "learning_rate": 3.571428571428572e-05, |
| "loss": 0.4168, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1143265451947124, |
| "grad_norm": 3.020738124847412, |
| "learning_rate": 3.809523809523809e-05, |
| "loss": 0.4298, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12147195426938193, |
| "grad_norm": 2.097656011581421, |
| "learning_rate": 4.047619047619048e-05, |
| "loss": 0.4413, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12861736334405144, |
| "grad_norm": 1.6332950592041016, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 0.3734, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13576277241872098, |
| "grad_norm": 2.1570417881011963, |
| "learning_rate": 4.523809523809524e-05, |
| "loss": 0.4015, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1429081814933905, |
| "grad_norm": 1.6941479444503784, |
| "learning_rate": 4.761904761904762e-05, |
| "loss": 0.4411, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1429081814933905, |
| "eval_news_finetune_val_loss": 0.4338369369506836, |
| "eval_news_finetune_val_runtime": 1002.1695, |
| "eval_news_finetune_val_samples_per_second": 1.397, |
| "eval_news_finetune_val_steps_per_second": 1.397, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15005359056806003, |
| "grad_norm": 2.3582301139831543, |
| "learning_rate": 5e-05, |
| "loss": 0.3697, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15719899964272954, |
| "grad_norm": 2.0517632961273193, |
| "learning_rate": 5.2380952380952384e-05, |
| "loss": 0.4076, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16434440871739908, |
| "grad_norm": 1.3338748216629028, |
| "learning_rate": 5.4761904761904766e-05, |
| "loss": 0.3307, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1714898177920686, |
| "grad_norm": 3.0515363216400146, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.4227, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17863522686673813, |
| "grad_norm": 2.4899113178253174, |
| "learning_rate": 5.9523809523809524e-05, |
| "loss": 0.4689, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18578063594140765, |
| "grad_norm": 1.6197255849838257, |
| "learning_rate": 6.19047619047619e-05, |
| "loss": 0.3618, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.19292604501607716, |
| "grad_norm": 1.654628872871399, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 0.4668, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2000714540907467, |
| "grad_norm": 1.6470831632614136, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.3525, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2072168631654162, |
| "grad_norm": 2.640536308288574, |
| "learning_rate": 6.904761904761905e-05, |
| "loss": 0.3707, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21436227224008575, |
| "grad_norm": 2.3426971435546875, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.4461, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21436227224008575, |
| "eval_news_finetune_val_loss": 0.40391305088996887, |
| "eval_news_finetune_val_runtime": 1002.5797, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.22150768131475526, |
| "grad_norm": 1.0351321697235107, |
| "learning_rate": 7.380952380952382e-05, |
| "loss": 0.3439, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2286530903894248, |
| "grad_norm": 3.062483549118042, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 0.4492, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2357984994640943, |
| "grad_norm": 2.095825672149658, |
| "learning_rate": 7.857142857142858e-05, |
| "loss": 0.3399, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.24294390853876385, |
| "grad_norm": 1.700642704963684, |
| "learning_rate": 8.095238095238096e-05, |
| "loss": 0.4336, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2500893176134334, |
| "grad_norm": 1.6802127361297607, |
| "learning_rate": 8.333333333333334e-05, |
| "loss": 0.3628, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "grad_norm": 1.1725817918777466, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.4113, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2643801357627724, |
| "grad_norm": 1.0182325839996338, |
| "learning_rate": 8.80952380952381e-05, |
| "loss": 0.4009, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.27152554483744196, |
| "grad_norm": 2.5762252807617188, |
| "learning_rate": 9.047619047619048e-05, |
| "loss": 0.3399, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.27867095391211144, |
| "grad_norm": 1.5393809080123901, |
| "learning_rate": 9.285714285714286e-05, |
| "loss": 0.326, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.285816362986781, |
| "grad_norm": 2.3259921073913574, |
| "learning_rate": 9.523809523809524e-05, |
| "loss": 0.4228, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.285816362986781, |
| "eval_news_finetune_val_loss": 0.39322975277900696, |
| "eval_news_finetune_val_runtime": 1002.8865, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2929617720614505, |
| "grad_norm": 0.9278184771537781, |
| "learning_rate": 9.761904761904762e-05, |
| "loss": 0.3184, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.30010718113612006, |
| "grad_norm": 1.4571782350540161, |
| "learning_rate": 0.0001, |
| "loss": 0.473, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.30725259021078954, |
| "grad_norm": 1.6199829578399658, |
| "learning_rate": 9.99982704095424e-05, |
| "loss": 0.392, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3143979992854591, |
| "grad_norm": 1.302309513092041, |
| "learning_rate": 9.999308175782893e-05, |
| "loss": 0.3824, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "grad_norm": 1.438289761543274, |
| "learning_rate": 9.998443440382927e-05, |
| "loss": 0.4001, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.32868881743479816, |
| "grad_norm": 1.7557189464569092, |
| "learning_rate": 9.997232894579868e-05, |
| "loss": 0.4144, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.33583422650946765, |
| "grad_norm": 0.9362027645111084, |
| "learning_rate": 9.995676622123655e-05, |
| "loss": 0.3094, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3429796355841372, |
| "grad_norm": 1.7850221395492554, |
| "learning_rate": 9.993774730682845e-05, |
| "loss": 0.2966, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.35012504465880673, |
| "grad_norm": 1.705842137336731, |
| "learning_rate": 9.991527351837174e-05, |
| "loss": 0.3274, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.35727045373347627, |
| "grad_norm": 1.0722746849060059, |
| "learning_rate": 9.988934641068436e-05, |
| "loss": 0.4301, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35727045373347627, |
| "eval_news_finetune_val_loss": 0.3787713646888733, |
| "eval_news_finetune_val_runtime": 1002.8588, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.36441586280814575, |
| "grad_norm": 1.282714605331421, |
| "learning_rate": 9.985996777749747e-05, |
| "loss": 0.3636, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3715612718828153, |
| "grad_norm": 2.0360989570617676, |
| "learning_rate": 9.982713965133122e-05, |
| "loss": 0.4467, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.37870668095748483, |
| "grad_norm": 1.7432626485824585, |
| "learning_rate": 9.979086430335417e-05, |
| "loss": 0.3875, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3858520900321543, |
| "grad_norm": 1.6053438186645508, |
| "learning_rate": 9.975114424322609e-05, |
| "loss": 0.3646, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.39299749910682386, |
| "grad_norm": 1.2323070764541626, |
| "learning_rate": 9.970798221892452e-05, |
| "loss": 0.353, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4001429081814934, |
| "grad_norm": 1.16932213306427, |
| "learning_rate": 9.966138121655445e-05, |
| "loss": 0.331, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.40728831725616294, |
| "grad_norm": 1.8134998083114624, |
| "learning_rate": 9.961134446014184e-05, |
| "loss": 0.3132, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4144337263308324, |
| "grad_norm": 1.4292124509811401, |
| "learning_rate": 9.955787541141055e-05, |
| "loss": 0.3017, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.42157913540550196, |
| "grad_norm": 1.4605034589767456, |
| "learning_rate": 9.950097776954284e-05, |
| "loss": 0.3596, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4287245444801715, |
| "grad_norm": 1.2365972995758057, |
| "learning_rate": 9.944065547092345e-05, |
| "loss": 0.3399, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4287245444801715, |
| "eval_news_finetune_val_loss": 0.36549311876296997, |
| "eval_news_finetune_val_runtime": 1002.8044, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.43586995355484104, |
| "grad_norm": 1.0590678453445435, |
| "learning_rate": 9.937691268886725e-05, |
| "loss": 0.3747, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4430153626295105, |
| "grad_norm": 0.9111473560333252, |
| "learning_rate": 9.930975383333056e-05, |
| "loss": 0.2868, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.45016077170418006, |
| "grad_norm": 2.0456018447875977, |
| "learning_rate": 9.923918355060599e-05, |
| "loss": 0.3289, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4573061807788496, |
| "grad_norm": 1.5998501777648926, |
| "learning_rate": 9.916520672300107e-05, |
| "loss": 0.3664, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4644515898535191, |
| "grad_norm": 1.0773181915283203, |
| "learning_rate": 9.908782846850037e-05, |
| "loss": 0.3432, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4715969989281886, |
| "grad_norm": 1.244042158126831, |
| "learning_rate": 9.900705414041154e-05, |
| "loss": 0.3242, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.47874240800285817, |
| "grad_norm": 1.8120310306549072, |
| "learning_rate": 9.892288932699484e-05, |
| "loss": 0.317, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4858878170775277, |
| "grad_norm": 0.7863224148750305, |
| "learning_rate": 9.883533985107663e-05, |
| "loss": 0.322, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4930332261521972, |
| "grad_norm": 1.223832130432129, |
| "learning_rate": 9.874441176964642e-05, |
| "loss": 0.343, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5001786352268668, |
| "grad_norm": 0.9870743155479431, |
| "learning_rate": 9.865011137343787e-05, |
| "loss": 0.3278, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5001786352268668, |
| "eval_news_finetune_val_loss": 0.35386842489242554, |
| "eval_news_finetune_val_runtime": 1003.4109, |
| "eval_news_finetune_val_samples_per_second": 1.395, |
| "eval_news_finetune_val_steps_per_second": 1.395, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5073240443015362, |
| "grad_norm": 1.3699963092803955, |
| "learning_rate": 9.85524451864936e-05, |
| "loss": 0.3902, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5144694533762058, |
| "grad_norm": 1.7188071012496948, |
| "learning_rate": 9.845141996571384e-05, |
| "loss": 0.369, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5216148624508753, |
| "grad_norm": 0.4889034628868103, |
| "learning_rate": 9.834704270038888e-05, |
| "loss": 0.3174, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5287602715255448, |
| "grad_norm": 0.8782143592834473, |
| "learning_rate": 9.823932061171561e-05, |
| "loss": 0.3501, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5359056806002144, |
| "grad_norm": 2.4089126586914062, |
| "learning_rate": 9.812826115229789e-05, |
| "loss": 0.3292, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5430510896748839, |
| "grad_norm": 1.6382787227630615, |
| "learning_rate": 9.801387200563096e-05, |
| "loss": 0.459, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5501964987495535, |
| "grad_norm": 1.443916916847229, |
| "learning_rate": 9.789616108556992e-05, |
| "loss": 0.3409, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5573419078242229, |
| "grad_norm": 1.632278323173523, |
| "learning_rate": 9.77751365357821e-05, |
| "loss": 0.281, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5644873168988924, |
| "grad_norm": 2.1452109813690186, |
| "learning_rate": 9.765080672918374e-05, |
| "loss": 0.3511, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.571632725973562, |
| "grad_norm": 1.2721842527389526, |
| "learning_rate": 9.752318026736078e-05, |
| "loss": 0.2298, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.571632725973562, |
| "eval_news_finetune_val_loss": 0.34554028511047363, |
| "eval_news_finetune_val_runtime": 1003.3342, |
| "eval_news_finetune_val_samples_per_second": 1.395, |
| "eval_news_finetune_val_steps_per_second": 1.395, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5787781350482315, |
| "grad_norm": 2.5264174938201904, |
| "learning_rate": 9.739226597997359e-05, |
| "loss": 0.3214, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.585923544122901, |
| "grad_norm": 1.4553183317184448, |
| "learning_rate": 9.725807292414629e-05, |
| "loss": 0.2697, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5930689531975706, |
| "grad_norm": 2.2111873626708984, |
| "learning_rate": 9.712061038384002e-05, |
| "loss": 0.3315, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6002143622722401, |
| "grad_norm": 1.4308302402496338, |
| "learning_rate": 9.697988786921071e-05, |
| "loss": 0.4036, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6073597713469097, |
| "grad_norm": 1.8136054277420044, |
| "learning_rate": 9.683591511595107e-05, |
| "loss": 0.2946, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6145051804215791, |
| "grad_norm": 1.8586084842681885, |
| "learning_rate": 9.668870208461713e-05, |
| "loss": 0.2259, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6216505894962486, |
| "grad_norm": 1.1640444993972778, |
| "learning_rate": 9.653825895993908e-05, |
| "loss": 0.4, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6287959985709182, |
| "grad_norm": 1.386013388633728, |
| "learning_rate": 9.63845961501166e-05, |
| "loss": 0.2804, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6359414076455877, |
| "grad_norm": 2.1413650512695312, |
| "learning_rate": 9.622772428609887e-05, |
| "loss": 0.3593, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "grad_norm": 1.5462217330932617, |
| "learning_rate": 9.606765422084908e-05, |
| "loss": 0.3058, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "eval_news_finetune_val_loss": 0.3292103707790375, |
| "eval_news_finetune_val_runtime": 1003.4558, |
| "eval_news_finetune_val_samples_per_second": 1.395, |
| "eval_news_finetune_val_steps_per_second": 1.395, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6502322257949268, |
| "grad_norm": 1.0373942852020264, |
| "learning_rate": 9.590439702859351e-05, |
| "loss": 0.3318, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6573776348695963, |
| "grad_norm": 1.2724213600158691, |
| "learning_rate": 9.573796400405544e-05, |
| "loss": 0.3328, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6645230439442658, |
| "grad_norm": 0.8528966903686523, |
| "learning_rate": 9.55683666616737e-05, |
| "loss": 0.2673, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6716684530189353, |
| "grad_norm": 1.65499746799469, |
| "learning_rate": 9.539561673480612e-05, |
| "loss": 0.3538, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6788138620936048, |
| "grad_norm": 2.341379404067993, |
| "learning_rate": 9.521972617491767e-05, |
| "loss": 0.3228, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6859592711682744, |
| "grad_norm": 1.4938244819641113, |
| "learning_rate": 9.504070715075372e-05, |
| "loss": 0.3974, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6931046802429439, |
| "grad_norm": 1.0390361547470093, |
| "learning_rate": 9.485857204749811e-05, |
| "loss": 0.3236, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7002500893176135, |
| "grad_norm": 3.8845393657684326, |
| "learning_rate": 9.467333346591632e-05, |
| "loss": 0.3027, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.707395498392283, |
| "grad_norm": 1.3295674324035645, |
| "learning_rate": 9.448500422148364e-05, |
| "loss": 0.3005, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7145409074669525, |
| "grad_norm": 1.0146369934082031, |
| "learning_rate": 9.429359734349863e-05, |
| "loss": 0.294, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7145409074669525, |
| "eval_news_finetune_val_loss": 0.3208242654800415, |
| "eval_news_finetune_val_runtime": 1003.2491, |
| "eval_news_finetune_val_samples_per_second": 1.395, |
| "eval_news_finetune_val_steps_per_second": 1.395, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.721686316541622, |
| "grad_norm": 1.5076738595962524, |
| "learning_rate": 9.409912607418172e-05, |
| "loss": 0.268, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7288317256162915, |
| "grad_norm": 3.3230276107788086, |
| "learning_rate": 9.390160386775895e-05, |
| "loss": 0.3038, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.735977134690961, |
| "grad_norm": 1.699854850769043, |
| "learning_rate": 9.370104438953125e-05, |
| "loss": 0.2869, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7431225437656306, |
| "grad_norm": 0.904507577419281, |
| "learning_rate": 9.349746151492902e-05, |
| "loss": 0.289, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7502679528403001, |
| "grad_norm": 0.9463105201721191, |
| "learning_rate": 9.329086932855215e-05, |
| "loss": 0.3729, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7574133619149697, |
| "grad_norm": 1.4746607542037964, |
| "learning_rate": 9.30812821231956e-05, |
| "loss": 0.2282, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7645587709896392, |
| "grad_norm": 1.0270076990127563, |
| "learning_rate": 9.286871439886058e-05, |
| "loss": 0.3029, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7717041800643086, |
| "grad_norm": 2.0656538009643555, |
| "learning_rate": 9.265318086175143e-05, |
| "loss": 0.3268, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7788495891389782, |
| "grad_norm": 0.9798826575279236, |
| "learning_rate": 9.243469642325805e-05, |
| "loss": 0.2942, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7859949982136477, |
| "grad_norm": 1.1419672966003418, |
| "learning_rate": 9.221327619892452e-05, |
| "loss": 0.3266, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7859949982136477, |
| "eval_news_finetune_val_loss": 0.307956337928772, |
| "eval_news_finetune_val_runtime": 1003.1873, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7931404072883173, |
| "grad_norm": 0.6810228228569031, |
| "learning_rate": 9.198893550740306e-05, |
| "loss": 0.3596, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8002858163629868, |
| "grad_norm": 1.6553049087524414, |
| "learning_rate": 9.176168986939446e-05, |
| "loss": 0.3106, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8074312254376563, |
| "grad_norm": 0.7749443650245667, |
| "learning_rate": 9.153155500657422e-05, |
| "loss": 0.3298, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8145766345123259, |
| "grad_norm": 0.8693751096725464, |
| "learning_rate": 9.129854684050481e-05, |
| "loss": 0.279, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8217220435869954, |
| "grad_norm": 1.1013332605361938, |
| "learning_rate": 9.10626814915343e-05, |
| "loss": 0.3195, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8288674526616648, |
| "grad_norm": 1.2278695106506348, |
| "learning_rate": 9.082397527768092e-05, |
| "loss": 0.3027, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8360128617363344, |
| "grad_norm": 2.173530101776123, |
| "learning_rate": 9.058244471350428e-05, |
| "loss": 0.2238, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8431582708110039, |
| "grad_norm": 1.125986933708191, |
| "learning_rate": 9.033810650896274e-05, |
| "loss": 0.2399, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8503036798856735, |
| "grad_norm": 0.6611151099205017, |
| "learning_rate": 9.009097756825737e-05, |
| "loss": 0.2736, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.857449088960343, |
| "grad_norm": 1.9068485498428345, |
| "learning_rate": 8.98410749886625e-05, |
| "loss": 0.2949, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.857449088960343, |
| "eval_news_finetune_val_loss": 0.31006094813346863, |
| "eval_news_finetune_val_runtime": 1002.7866, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8645944980350125, |
| "grad_norm": 1.192031979560852, |
| "learning_rate": 8.958841605934278e-05, |
| "loss": 0.3657, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8717399071096821, |
| "grad_norm": 1.2596725225448608, |
| "learning_rate": 8.933301826015715e-05, |
| "loss": 0.3068, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8788853161843515, |
| "grad_norm": 1.4713683128356934, |
| "learning_rate": 8.907489926044945e-05, |
| "loss": 0.3122, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.886030725259021, |
| "grad_norm": 1.3583886623382568, |
| "learning_rate": 8.881407691782608e-05, |
| "loss": 0.2989, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8931761343336906, |
| "grad_norm": 0.9863426089286804, |
| "learning_rate": 8.855056927692037e-05, |
| "loss": 0.2549, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9003215434083601, |
| "grad_norm": 1.0579396486282349, |
| "learning_rate": 8.828439456814442e-05, |
| "loss": 0.2809, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9074669524830297, |
| "grad_norm": 2.847482681274414, |
| "learning_rate": 8.801557120642766e-05, |
| "loss": 0.2933, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9146123615576992, |
| "grad_norm": 0.8942415118217468, |
| "learning_rate": 8.774411778994295e-05, |
| "loss": 0.2866, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9217577706323687, |
| "grad_norm": 1.297845721244812, |
| "learning_rate": 8.747005309881984e-05, |
| "loss": 0.2939, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9289031797070382, |
| "grad_norm": 1.2745181322097778, |
| "learning_rate": 8.719339609384531e-05, |
| "loss": 0.3018, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9289031797070382, |
| "eval_news_finetune_val_loss": 0.29822030663490295, |
| "eval_news_finetune_val_runtime": 1002.5672, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9360485887817077, |
| "grad_norm": 1.3898978233337402, |
| "learning_rate": 8.691416591515198e-05, |
| "loss": 0.295, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9431939978563773, |
| "grad_norm": 1.1516591310501099, |
| "learning_rate": 8.663238188089398e-05, |
| "loss": 0.209, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9503394069310468, |
| "grad_norm": 0.9356768131256104, |
| "learning_rate": 8.634806348591036e-05, |
| "loss": 0.2904, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9574848160057163, |
| "grad_norm": 1.884950876235962, |
| "learning_rate": 8.606123040037643e-05, |
| "loss": 0.2607, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9646302250803859, |
| "grad_norm": 1.2719082832336426, |
| "learning_rate": 8.577190246844291e-05, |
| "loss": 0.3279, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9717756341550554, |
| "grad_norm": 0.935297429561615, |
| "learning_rate": 8.548009970686302e-05, |
| "loss": 0.3011, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.978921043229725, |
| "grad_norm": 1.6732884645462036, |
| "learning_rate": 8.51858423036076e-05, |
| "loss": 0.2379, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9860664523043944, |
| "grad_norm": 0.6651692390441895, |
| "learning_rate": 8.488915061646856e-05, |
| "loss": 0.2599, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9932118613790639, |
| "grad_norm": 1.121752381324768, |
| "learning_rate": 8.459004517165032e-05, |
| "loss": 0.2265, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5099928379058838, |
| "learning_rate": 8.428854666234978e-05, |
| "loss": 0.3301, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_news_finetune_val_loss": 0.28762951493263245, |
| "eval_news_finetune_val_runtime": 1002.7793, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.0071454090746694, |
| "grad_norm": 0.9986103177070618, |
| "learning_rate": 8.398467594732478e-05, |
| "loss": 0.2021, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.014290818149339, |
| "grad_norm": 1.2675282955169678, |
| "learning_rate": 8.367845404945084e-05, |
| "loss": 0.2228, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.0214362272240085, |
| "grad_norm": 0.8156709671020508, |
| "learning_rate": 8.336990215426688e-05, |
| "loss": 0.1947, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.0285816362986782, |
| "grad_norm": 0.5374387502670288, |
| "learning_rate": 8.305904160850941e-05, |
| "loss": 0.2344, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.0357270453733476, |
| "grad_norm": 0.6672261357307434, |
| "learning_rate": 8.274589391863583e-05, |
| "loss": 0.1919, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0428724544480172, |
| "grad_norm": 0.9803467988967896, |
| "learning_rate": 8.243048074933634e-05, |
| "loss": 0.2218, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.0500178635226867, |
| "grad_norm": 1.482840657234192, |
| "learning_rate": 8.21128239220353e-05, |
| "loss": 0.2556, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.057163272597356, |
| "grad_norm": 1.0589625835418701, |
| "learning_rate": 8.179294541338135e-05, |
| "loss": 0.2052, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.0643086816720257, |
| "grad_norm": 0.8332052230834961, |
| "learning_rate": 8.147086735372716e-05, |
| "loss": 0.2386, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.0714540907466952, |
| "grad_norm": 0.6018723845481873, |
| "learning_rate": 8.114661202559828e-05, |
| "loss": 0.1426, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0714540907466952, |
| "eval_news_finetune_val_loss": 0.30121028423309326, |
| "eval_news_finetune_val_runtime": 1002.7457, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0785994998213648, |
| "grad_norm": 1.7663507461547852, |
| "learning_rate": 8.082020186215156e-05, |
| "loss": 0.2407, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.0857449088960343, |
| "grad_norm": 1.2081632614135742, |
| "learning_rate": 8.049165944562316e-05, |
| "loss": 0.2483, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.092890317970704, |
| "grad_norm": 0.5045826435089111, |
| "learning_rate": 8.016100750576621e-05, |
| "loss": 0.2013, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.1000357270453733, |
| "grad_norm": 1.4456278085708618, |
| "learning_rate": 7.98282689182783e-05, |
| "loss": 0.2034, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.107181136120043, |
| "grad_norm": 1.1558668613433838, |
| "learning_rate": 7.949346670321891e-05, |
| "loss": 0.2386, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.1143265451947124, |
| "grad_norm": 1.4196126461029053, |
| "learning_rate": 7.915662402341664e-05, |
| "loss": 0.2299, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.1214719542693818, |
| "grad_norm": 0.9341222047805786, |
| "learning_rate": 7.88177641828669e-05, |
| "loss": 0.2105, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.1286173633440515, |
| "grad_norm": 1.066001296043396, |
| "learning_rate": 7.847691062511957e-05, |
| "loss": 0.1925, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.135762772418721, |
| "grad_norm": 0.7840182781219482, |
| "learning_rate": 7.813408693165704e-05, |
| "loss": 0.2425, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.1429081814933906, |
| "grad_norm": 0.983668327331543, |
| "learning_rate": 7.778931682026293e-05, |
| "loss": 0.2014, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.1429081814933906, |
| "eval_news_finetune_val_loss": 0.29564452171325684, |
| "eval_news_finetune_val_runtime": 1003.001, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.15005359056806, |
| "grad_norm": 1.63984215259552, |
| "learning_rate": 7.744262414338099e-05, |
| "loss": 0.2863, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.1571989996427297, |
| "grad_norm": 0.9211621284484863, |
| "learning_rate": 7.709403288646507e-05, |
| "loss": 0.2175, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.164344408717399, |
| "grad_norm": 1.3369996547698975, |
| "learning_rate": 7.67435671663196e-05, |
| "loss": 0.1893, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.1714898177920685, |
| "grad_norm": 0.7532891631126404, |
| "learning_rate": 7.63912512294312e-05, |
| "loss": 0.2483, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.1786352268667382, |
| "grad_norm": 1.0959442853927612, |
| "learning_rate": 7.603710945029119e-05, |
| "loss": 0.1888, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.1857806359414076, |
| "grad_norm": 0.9019472599029541, |
| "learning_rate": 7.568116632970922e-05, |
| "loss": 0.2144, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.1929260450160772, |
| "grad_norm": 1.1219818592071533, |
| "learning_rate": 7.532344649311829e-05, |
| "loss": 0.191, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.2000714540907467, |
| "grad_norm": 1.0829100608825684, |
| "learning_rate": 7.496397468887106e-05, |
| "loss": 0.2762, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.2072168631654163, |
| "grad_norm": 0.7855832576751709, |
| "learning_rate": 7.460277578652759e-05, |
| "loss": 0.157, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.2143622722400857, |
| "grad_norm": 2.407999038696289, |
| "learning_rate": 7.423987477513488e-05, |
| "loss": 0.2627, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.2143622722400857, |
| "eval_news_finetune_val_loss": 0.28248873353004456, |
| "eval_news_finetune_val_runtime": 1003.1081, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.2215076813147552, |
| "grad_norm": 1.5500895977020264, |
| "learning_rate": 7.387529676149799e-05, |
| "loss": 0.1477, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.2286530903894248, |
| "grad_norm": 1.5599130392074585, |
| "learning_rate": 7.350906696844307e-05, |
| "loss": 0.1942, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.2357984994640943, |
| "grad_norm": 1.6327091455459595, |
| "learning_rate": 7.314121073307229e-05, |
| "loss": 0.2, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.242943908538764, |
| "grad_norm": 0.6044666767120361, |
| "learning_rate": 7.277175350501111e-05, |
| "loss": 0.185, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.2500893176134333, |
| "grad_norm": 1.317089319229126, |
| "learning_rate": 7.240072084464729e-05, |
| "loss": 0.196, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.257234726688103, |
| "grad_norm": 1.089105486869812, |
| "learning_rate": 7.202813842136283e-05, |
| "loss": 0.1322, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.2643801357627724, |
| "grad_norm": 1.4972888231277466, |
| "learning_rate": 7.165403201175787e-05, |
| "loss": 0.2176, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.2715255448374418, |
| "grad_norm": 1.4998830556869507, |
| "learning_rate": 7.127842749786747e-05, |
| "loss": 0.218, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.2786709539121115, |
| "grad_norm": 0.9759517908096313, |
| "learning_rate": 7.090135086537095e-05, |
| "loss": 0.1653, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.285816362986781, |
| "grad_norm": 0.9713583588600159, |
| "learning_rate": 7.052282820179412e-05, |
| "loss": 0.175, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.285816362986781, |
| "eval_news_finetune_val_loss": 0.2936909794807434, |
| "eval_news_finetune_val_runtime": 1003.12, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.2929617720614506, |
| "grad_norm": 0.6328814625740051, |
| "learning_rate": 7.014288569470446e-05, |
| "loss": 0.1727, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.30010718113612, |
| "grad_norm": 1.622104525566101, |
| "learning_rate": 6.976154962989934e-05, |
| "loss": 0.2363, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.3072525902107897, |
| "grad_norm": 1.8254674673080444, |
| "learning_rate": 6.937884638958757e-05, |
| "loss": 0.1897, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.314397999285459, |
| "grad_norm": 0.8813793063163757, |
| "learning_rate": 6.899480245056396e-05, |
| "loss": 0.2029, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.3215434083601285, |
| "grad_norm": 0.7675999999046326, |
| "learning_rate": 6.860944438237788e-05, |
| "loss": 0.2025, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.3286888174347982, |
| "grad_norm": 1.1973013877868652, |
| "learning_rate": 6.82227988454948e-05, |
| "loss": 0.2317, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.3358342265094676, |
| "grad_norm": 0.7864009737968445, |
| "learning_rate": 6.783489258945195e-05, |
| "loss": 0.2318, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.3429796355841372, |
| "grad_norm": 1.0866330862045288, |
| "learning_rate": 6.74457524510077e-05, |
| "loss": 0.1871, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.3501250446588067, |
| "grad_norm": 0.8745126724243164, |
| "learning_rate": 6.705540535228485e-05, |
| "loss": 0.211, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.3572704537334763, |
| "grad_norm": 1.3401581048965454, |
| "learning_rate": 6.66638782989081e-05, |
| "loss": 0.2307, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.3572704537334763, |
| "eval_news_finetune_val_loss": 0.2787444591522217, |
| "eval_news_finetune_val_runtime": 1002.9344, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.3644158628081458, |
| "grad_norm": 0.6149284839630127, |
| "learning_rate": 6.627119837813564e-05, |
| "loss": 0.2128, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.3715612718828152, |
| "grad_norm": 1.7847625017166138, |
| "learning_rate": 6.587739275698525e-05, |
| "loss": 0.1551, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.3787066809574848, |
| "grad_norm": 1.1973716020584106, |
| "learning_rate": 6.54824886803547e-05, |
| "loss": 0.2335, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.3858520900321543, |
| "grad_norm": 1.5757859945297241, |
| "learning_rate": 6.508651346913687e-05, |
| "loss": 0.1504, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.392997499106824, |
| "grad_norm": 1.7269341945648193, |
| "learning_rate": 6.468949451832968e-05, |
| "loss": 0.2679, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.4001429081814933, |
| "grad_norm": 1.6860129833221436, |
| "learning_rate": 6.429145929514063e-05, |
| "loss": 0.1942, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.407288317256163, |
| "grad_norm": 1.1732631921768188, |
| "learning_rate": 6.389243533708671e-05, |
| "loss": 0.2025, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.4144337263308324, |
| "grad_norm": 0.9073033332824707, |
| "learning_rate": 6.349245025008912e-05, |
| "loss": 0.1836, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.4215791354055018, |
| "grad_norm": 1.133843183517456, |
| "learning_rate": 6.309153170656342e-05, |
| "loss": 0.1526, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.4287245444801715, |
| "grad_norm": 2.656296968460083, |
| "learning_rate": 6.268970744350515e-05, |
| "loss": 0.1939, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.4287245444801715, |
| "eval_news_finetune_val_loss": 0.27414408326148987, |
| "eval_news_finetune_val_runtime": 1003.0949, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4197, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.538125336973312e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|