{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.35727045373347627, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007145409074669525, "grad_norm": 4.4086809158325195, "learning_rate": 2.3809523809523808e-06, "loss": 1.0969, "step": 10 }, { "epoch": 0.01429081814933905, "grad_norm": 5.687011241912842, "learning_rate": 4.7619047619047615e-06, "loss": 1.0795, "step": 20 }, { "epoch": 0.021436227224008574, "grad_norm": 1.976590633392334, "learning_rate": 7.142857142857143e-06, "loss": 0.7536, "step": 30 }, { "epoch": 0.0285816362986781, "grad_norm": 3.1355409622192383, "learning_rate": 9.523809523809523e-06, "loss": 0.5564, "step": 40 }, { "epoch": 0.03572704537334762, "grad_norm": 2.6710309982299805, "learning_rate": 1.1904761904761905e-05, "loss": 0.623, "step": 50 }, { "epoch": 0.04287245444801715, "grad_norm": 2.8567938804626465, "learning_rate": 1.4285714285714285e-05, "loss": 0.5322, "step": 60 }, { "epoch": 0.050017863522686674, "grad_norm": 3.4388861656188965, "learning_rate": 1.6666666666666667e-05, "loss": 0.5102, "step": 70 }, { "epoch": 0.0571632725973562, "grad_norm": 3.093275308609009, "learning_rate": 1.9047619047619046e-05, "loss": 0.568, "step": 80 }, { "epoch": 0.06430868167202572, "grad_norm": 2.3798677921295166, "learning_rate": 2.1428571428571428e-05, "loss": 0.4883, "step": 90 }, { "epoch": 0.07145409074669525, "grad_norm": 2.846259117126465, "learning_rate": 2.380952380952381e-05, "loss": 0.417, "step": 100 }, { "epoch": 0.07145409074669525, "eval_news_finetune_val_loss": 0.48679304122924805, "eval_news_finetune_val_runtime": 1001.9158, "eval_news_finetune_val_samples_per_second": 1.397, "eval_news_finetune_val_steps_per_second": 1.397, "step": 100 }, { "epoch": 0.07859949982136477, "grad_norm": 1.9387887716293335, "learning_rate": 2.6190476190476192e-05, "loss": 0.4595, "step": 110 }, { "epoch": 0.0857449088960343, "grad_norm": 2.3232853412628174, "learning_rate": 2.857142857142857e-05, "loss": 0.4658, "step": 120 }, { "epoch": 0.09289031797070382, "grad_norm": 2.813093423843384, "learning_rate": 3.095238095238095e-05, "loss": 0.4122, "step": 130 }, { "epoch": 0.10003572704537335, "grad_norm": 1.9588465690612793, "learning_rate": 3.3333333333333335e-05, "loss": 0.4878, "step": 140 }, { "epoch": 0.10718113612004287, "grad_norm": 1.4838117361068726, "learning_rate": 3.571428571428572e-05, "loss": 0.4168, "step": 150 }, { "epoch": 0.1143265451947124, "grad_norm": 3.020738124847412, "learning_rate": 3.809523809523809e-05, "loss": 0.4298, "step": 160 }, { "epoch": 0.12147195426938193, "grad_norm": 2.097656011581421, "learning_rate": 4.047619047619048e-05, "loss": 0.4413, "step": 170 }, { "epoch": 0.12861736334405144, "grad_norm": 1.6332950592041016, "learning_rate": 4.2857142857142856e-05, "loss": 0.3734, "step": 180 }, { "epoch": 0.13576277241872098, "grad_norm": 2.1570417881011963, "learning_rate": 4.523809523809524e-05, "loss": 0.4015, "step": 190 }, { "epoch": 0.1429081814933905, "grad_norm": 1.6941479444503784, "learning_rate": 4.761904761904762e-05, "loss": 0.4411, "step": 200 }, { "epoch": 0.1429081814933905, "eval_news_finetune_val_loss": 0.4338369369506836, "eval_news_finetune_val_runtime": 1002.1695, "eval_news_finetune_val_samples_per_second": 1.397, "eval_news_finetune_val_steps_per_second": 1.397, "step": 200 }, { "epoch": 0.15005359056806003, "grad_norm": 2.3582301139831543, "learning_rate": 5e-05, "loss": 0.3697, "step": 210 }, { "epoch": 0.15719899964272954, "grad_norm": 2.0517632961273193, "learning_rate": 5.2380952380952384e-05, "loss": 0.4076, "step": 220 }, { "epoch": 0.16434440871739908, "grad_norm": 1.3338748216629028, "learning_rate": 5.4761904761904766e-05, "loss": 0.3307, "step": 230 }, { "epoch": 0.1714898177920686, "grad_norm": 3.0515363216400146, "learning_rate": 5.714285714285714e-05, "loss": 0.4227, "step": 240 }, { "epoch": 0.17863522686673813, "grad_norm": 2.4899113178253174, "learning_rate": 5.9523809523809524e-05, "loss": 0.4689, "step": 250 }, { "epoch": 0.18578063594140765, "grad_norm": 1.6197255849838257, "learning_rate": 6.19047619047619e-05, "loss": 0.3618, "step": 260 }, { "epoch": 0.19292604501607716, "grad_norm": 1.654628872871399, "learning_rate": 6.428571428571429e-05, "loss": 0.4668, "step": 270 }, { "epoch": 0.2000714540907467, "grad_norm": 1.6470831632614136, "learning_rate": 6.666666666666667e-05, "loss": 0.3525, "step": 280 }, { "epoch": 0.2072168631654162, "grad_norm": 2.640536308288574, "learning_rate": 6.904761904761905e-05, "loss": 0.3707, "step": 290 }, { "epoch": 0.21436227224008575, "grad_norm": 2.3426971435546875, "learning_rate": 7.142857142857143e-05, "loss": 0.4461, "step": 300 }, { "epoch": 0.21436227224008575, "eval_news_finetune_val_loss": 0.40391305088996887, "eval_news_finetune_val_runtime": 1002.5797, "eval_news_finetune_val_samples_per_second": 1.396, "eval_news_finetune_val_steps_per_second": 1.396, "step": 300 }, { "epoch": 0.22150768131475526, "grad_norm": 1.0351321697235107, "learning_rate": 7.380952380952382e-05, "loss": 0.3439, "step": 310 }, { "epoch": 0.2286530903894248, "grad_norm": 3.062483549118042, "learning_rate": 7.619047619047618e-05, "loss": 0.4492, "step": 320 }, { "epoch": 0.2357984994640943, "grad_norm": 2.095825672149658, "learning_rate": 7.857142857142858e-05, "loss": 0.3399, "step": 330 }, { "epoch": 0.24294390853876385, "grad_norm": 1.700642704963684, "learning_rate": 8.095238095238096e-05, "loss": 0.4336, "step": 340 }, { "epoch": 0.2500893176134334, "grad_norm": 1.6802127361297607, "learning_rate": 8.333333333333334e-05, "loss": 0.3628, "step": 350 }, { "epoch": 0.2572347266881029, "grad_norm": 1.1725817918777466, "learning_rate": 8.571428571428571e-05, "loss": 0.4113, "step": 360 }, { "epoch": 0.2643801357627724, "grad_norm": 1.0182325839996338, "learning_rate": 8.80952380952381e-05, "loss": 0.4009, "step": 370 }, { "epoch": 0.27152554483744196, "grad_norm": 2.5762252807617188, "learning_rate": 9.047619047619048e-05, "loss": 0.3399, "step": 380 }, { "epoch": 0.27867095391211144, "grad_norm": 1.5393809080123901, "learning_rate": 9.285714285714286e-05, "loss": 0.326, "step": 390 }, { "epoch": 0.285816362986781, "grad_norm": 2.3259921073913574, "learning_rate": 9.523809523809524e-05, "loss": 0.4228, "step": 400 }, { "epoch": 0.285816362986781, "eval_news_finetune_val_loss": 0.39322975277900696, "eval_news_finetune_val_runtime": 1002.8865, "eval_news_finetune_val_samples_per_second": 1.396, "eval_news_finetune_val_steps_per_second": 1.396, "step": 400 }, { "epoch": 0.2929617720614505, "grad_norm": 0.9278184771537781, "learning_rate": 9.761904761904762e-05, "loss": 0.3184, "step": 410 }, { "epoch": 0.30010718113612006, "grad_norm": 1.4571782350540161, "learning_rate": 0.0001, "loss": 0.473, "step": 420 }, { "epoch": 0.30725259021078954, "grad_norm": 1.6199829578399658, "learning_rate": 9.99982704095424e-05, "loss": 0.392, "step": 430 }, { "epoch": 0.3143979992854591, "grad_norm": 1.302309513092041, "learning_rate": 9.999308175782893e-05, "loss": 0.3824, "step": 440 }, { "epoch": 0.3215434083601286, "grad_norm": 1.438289761543274, "learning_rate": 9.998443440382927e-05, "loss": 0.4001, "step": 450 }, { "epoch": 0.32868881743479816, "grad_norm": 1.7557189464569092, "learning_rate": 9.997232894579868e-05, "loss": 0.4144, "step": 460 }, { "epoch": 0.33583422650946765, "grad_norm": 0.9362027645111084, "learning_rate": 9.995676622123655e-05, "loss": 0.3094, "step": 470 }, { "epoch": 0.3429796355841372, "grad_norm": 1.7850221395492554, "learning_rate": 9.993774730682845e-05, "loss": 0.2966, "step": 480 }, { "epoch": 0.35012504465880673, "grad_norm": 1.705842137336731, "learning_rate": 9.991527351837174e-05, "loss": 0.3274, "step": 490 }, { "epoch": 0.35727045373347627, "grad_norm": 1.0722746849060059, "learning_rate": 9.988934641068436e-05, "loss": 0.4301, "step": 500 }, { "epoch": 0.35727045373347627, "eval_news_finetune_val_loss": 0.3787713646888733, "eval_news_finetune_val_runtime": 1002.8588, "eval_news_finetune_val_samples_per_second": 1.396, "eval_news_finetune_val_steps_per_second": 1.396, "step": 500 } ], "logging_steps": 10, "max_steps": 4197, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.392907957026816e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }