| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.35727045373347627, |
| "eval_steps": 100, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007145409074669525, |
| "grad_norm": 4.4086809158325195, |
| "learning_rate": 2.3809523809523808e-06, |
| "loss": 1.0969, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01429081814933905, |
| "grad_norm": 5.687011241912842, |
| "learning_rate": 4.7619047619047615e-06, |
| "loss": 1.0795, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.021436227224008574, |
| "grad_norm": 1.976590633392334, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 0.7536, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0285816362986781, |
| "grad_norm": 3.1355409622192383, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 0.5564, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03572704537334762, |
| "grad_norm": 2.6710309982299805, |
| "learning_rate": 1.1904761904761905e-05, |
| "loss": 0.623, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04287245444801715, |
| "grad_norm": 2.8567938804626465, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 0.5322, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.050017863522686674, |
| "grad_norm": 3.4388861656188965, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.5102, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0571632725973562, |
| "grad_norm": 3.093275308609009, |
| "learning_rate": 1.9047619047619046e-05, |
| "loss": 0.568, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06430868167202572, |
| "grad_norm": 2.3798677921295166, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 0.4883, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07145409074669525, |
| "grad_norm": 2.846259117126465, |
| "learning_rate": 2.380952380952381e-05, |
| "loss": 0.417, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07145409074669525, |
| "eval_news_finetune_val_loss": 0.48679304122924805, |
| "eval_news_finetune_val_runtime": 1001.9158, |
| "eval_news_finetune_val_samples_per_second": 1.397, |
| "eval_news_finetune_val_steps_per_second": 1.397, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07859949982136477, |
| "grad_norm": 1.9387887716293335, |
| "learning_rate": 2.6190476190476192e-05, |
| "loss": 0.4595, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0857449088960343, |
| "grad_norm": 2.3232853412628174, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.4658, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09289031797070382, |
| "grad_norm": 2.813093423843384, |
| "learning_rate": 3.095238095238095e-05, |
| "loss": 0.4122, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10003572704537335, |
| "grad_norm": 1.9588465690612793, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.4878, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10718113612004287, |
| "grad_norm": 1.4838117361068726, |
| "learning_rate": 3.571428571428572e-05, |
| "loss": 0.4168, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1143265451947124, |
| "grad_norm": 3.020738124847412, |
| "learning_rate": 3.809523809523809e-05, |
| "loss": 0.4298, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12147195426938193, |
| "grad_norm": 2.097656011581421, |
| "learning_rate": 4.047619047619048e-05, |
| "loss": 0.4413, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12861736334405144, |
| "grad_norm": 1.6332950592041016, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 0.3734, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13576277241872098, |
| "grad_norm": 2.1570417881011963, |
| "learning_rate": 4.523809523809524e-05, |
| "loss": 0.4015, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1429081814933905, |
| "grad_norm": 1.6941479444503784, |
| "learning_rate": 4.761904761904762e-05, |
| "loss": 0.4411, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1429081814933905, |
| "eval_news_finetune_val_loss": 0.4338369369506836, |
| "eval_news_finetune_val_runtime": 1002.1695, |
| "eval_news_finetune_val_samples_per_second": 1.397, |
| "eval_news_finetune_val_steps_per_second": 1.397, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15005359056806003, |
| "grad_norm": 2.3582301139831543, |
| "learning_rate": 5e-05, |
| "loss": 0.3697, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15719899964272954, |
| "grad_norm": 2.0517632961273193, |
| "learning_rate": 5.2380952380952384e-05, |
| "loss": 0.4076, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16434440871739908, |
| "grad_norm": 1.3338748216629028, |
| "learning_rate": 5.4761904761904766e-05, |
| "loss": 0.3307, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1714898177920686, |
| "grad_norm": 3.0515363216400146, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.4227, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17863522686673813, |
| "grad_norm": 2.4899113178253174, |
| "learning_rate": 5.9523809523809524e-05, |
| "loss": 0.4689, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18578063594140765, |
| "grad_norm": 1.6197255849838257, |
| "learning_rate": 6.19047619047619e-05, |
| "loss": 0.3618, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.19292604501607716, |
| "grad_norm": 1.654628872871399, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 0.4668, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2000714540907467, |
| "grad_norm": 1.6470831632614136, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.3525, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2072168631654162, |
| "grad_norm": 2.640536308288574, |
| "learning_rate": 6.904761904761905e-05, |
| "loss": 0.3707, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21436227224008575, |
| "grad_norm": 2.3426971435546875, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.4461, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21436227224008575, |
| "eval_news_finetune_val_loss": 0.40391305088996887, |
| "eval_news_finetune_val_runtime": 1002.5797, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.22150768131475526, |
| "grad_norm": 1.0351321697235107, |
| "learning_rate": 7.380952380952382e-05, |
| "loss": 0.3439, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2286530903894248, |
| "grad_norm": 3.062483549118042, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 0.4492, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2357984994640943, |
| "grad_norm": 2.095825672149658, |
| "learning_rate": 7.857142857142858e-05, |
| "loss": 0.3399, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.24294390853876385, |
| "grad_norm": 1.700642704963684, |
| "learning_rate": 8.095238095238096e-05, |
| "loss": 0.4336, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2500893176134334, |
| "grad_norm": 1.6802127361297607, |
| "learning_rate": 8.333333333333334e-05, |
| "loss": 0.3628, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "grad_norm": 1.1725817918777466, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.4113, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2643801357627724, |
| "grad_norm": 1.0182325839996338, |
| "learning_rate": 8.80952380952381e-05, |
| "loss": 0.4009, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.27152554483744196, |
| "grad_norm": 2.5762252807617188, |
| "learning_rate": 9.047619047619048e-05, |
| "loss": 0.3399, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.27867095391211144, |
| "grad_norm": 1.5393809080123901, |
| "learning_rate": 9.285714285714286e-05, |
| "loss": 0.326, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.285816362986781, |
| "grad_norm": 2.3259921073913574, |
| "learning_rate": 9.523809523809524e-05, |
| "loss": 0.4228, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.285816362986781, |
| "eval_news_finetune_val_loss": 0.39322975277900696, |
| "eval_news_finetune_val_runtime": 1002.8865, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2929617720614505, |
| "grad_norm": 0.9278184771537781, |
| "learning_rate": 9.761904761904762e-05, |
| "loss": 0.3184, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.30010718113612006, |
| "grad_norm": 1.4571782350540161, |
| "learning_rate": 0.0001, |
| "loss": 0.473, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.30725259021078954, |
| "grad_norm": 1.6199829578399658, |
| "learning_rate": 9.99982704095424e-05, |
| "loss": 0.392, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3143979992854591, |
| "grad_norm": 1.302309513092041, |
| "learning_rate": 9.999308175782893e-05, |
| "loss": 0.3824, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "grad_norm": 1.438289761543274, |
| "learning_rate": 9.998443440382927e-05, |
| "loss": 0.4001, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.32868881743479816, |
| "grad_norm": 1.7557189464569092, |
| "learning_rate": 9.997232894579868e-05, |
| "loss": 0.4144, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.33583422650946765, |
| "grad_norm": 0.9362027645111084, |
| "learning_rate": 9.995676622123655e-05, |
| "loss": 0.3094, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3429796355841372, |
| "grad_norm": 1.7850221395492554, |
| "learning_rate": 9.993774730682845e-05, |
| "loss": 0.2966, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.35012504465880673, |
| "grad_norm": 1.705842137336731, |
| "learning_rate": 9.991527351837174e-05, |
| "loss": 0.3274, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.35727045373347627, |
| "grad_norm": 1.0722746849060059, |
| "learning_rate": 9.988934641068436e-05, |
| "loss": 0.4301, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35727045373347627, |
| "eval_news_finetune_val_loss": 0.3787713646888733, |
| "eval_news_finetune_val_runtime": 1002.8588, |
| "eval_news_finetune_val_samples_per_second": 1.396, |
| "eval_news_finetune_val_steps_per_second": 1.396, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4197, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.392907957026816e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|