| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8676036786395974, |
| "eval_steps": 1000.0, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00017352073572791948, |
| "grad_norm": 8.0, |
| "learning_rate": 6.920415224913495e-08, |
| "loss": 1.9091681241989136, |
| "step": 1, |
| "token_acc": 0.5288686692981869 |
| }, |
| { |
| "epoch": 0.0008676036786395974, |
| "grad_norm": 9.1875, |
| "learning_rate": 3.460207612456748e-07, |
| "loss": 1.9248077869415283, |
| "step": 5, |
| "token_acc": 0.5311277064784593 |
| }, |
| { |
| "epoch": 0.0017352073572791948, |
| "grad_norm": 8.0625, |
| "learning_rate": 6.920415224913496e-07, |
| "loss": 1.9248884201049805, |
| "step": 10, |
| "token_acc": 0.5290013381031705 |
| }, |
| { |
| "epoch": 0.0026028110359187923, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.0380622837370243e-06, |
| "loss": 1.9355482101440429, |
| "step": 15, |
| "token_acc": 0.5267119264551204 |
| }, |
| { |
| "epoch": 0.0034704147145583897, |
| "grad_norm": 7.6875, |
| "learning_rate": 1.3840830449826992e-06, |
| "loss": 1.9109724044799805, |
| "step": 20, |
| "token_acc": 0.5344387591531053 |
| }, |
| { |
| "epoch": 0.004338018393197987, |
| "grad_norm": 7.5625, |
| "learning_rate": 1.7301038062283736e-06, |
| "loss": 1.9011001586914062, |
| "step": 25, |
| "token_acc": 0.5359598310957124 |
| }, |
| { |
| "epoch": 0.0052056220718375845, |
| "grad_norm": 7.09375, |
| "learning_rate": 2.0761245674740485e-06, |
| "loss": 1.8969675064086915, |
| "step": 30, |
| "token_acc": 0.535208283678928 |
| }, |
| { |
| "epoch": 0.006073225750477182, |
| "grad_norm": 6.375, |
| "learning_rate": 2.4221453287197232e-06, |
| "loss": 1.8716100692749023, |
| "step": 35, |
| "token_acc": 0.5402535722571752 |
| }, |
| { |
| "epoch": 0.006940829429116779, |
| "grad_norm": 5.71875, |
| "learning_rate": 2.7681660899653983e-06, |
| "loss": 1.845738410949707, |
| "step": 40, |
| "token_acc": 0.5421880925293895 |
| }, |
| { |
| "epoch": 0.007808433107756377, |
| "grad_norm": 5.34375, |
| "learning_rate": 3.114186851211073e-06, |
| "loss": 1.8337472915649413, |
| "step": 45, |
| "token_acc": 0.5421731912386869 |
| }, |
| { |
| "epoch": 0.008676036786395974, |
| "grad_norm": 4.875, |
| "learning_rate": 3.4602076124567473e-06, |
| "loss": 1.782250213623047, |
| "step": 50, |
| "token_acc": 0.5486540746507149 |
| }, |
| { |
| "epoch": 0.009543640465035572, |
| "grad_norm": 4.25, |
| "learning_rate": 3.8062283737024224e-06, |
| "loss": 1.7817264556884767, |
| "step": 55, |
| "token_acc": 0.553443922569435 |
| }, |
| { |
| "epoch": 0.010411244143675169, |
| "grad_norm": 4.0, |
| "learning_rate": 4.152249134948097e-06, |
| "loss": 1.7241941452026368, |
| "step": 60, |
| "token_acc": 0.562488997223915 |
| }, |
| { |
| "epoch": 0.011278847822314766, |
| "grad_norm": 3.46875, |
| "learning_rate": 4.498269896193772e-06, |
| "loss": 1.664463996887207, |
| "step": 65, |
| "token_acc": 0.5773684070111784 |
| }, |
| { |
| "epoch": 0.012146451500954364, |
| "grad_norm": 3.125, |
| "learning_rate": 4.8442906574394464e-06, |
| "loss": 1.6484092712402343, |
| "step": 70, |
| "token_acc": 0.5734682375674297 |
| }, |
| { |
| "epoch": 0.013014055179593961, |
| "grad_norm": 2.8125, |
| "learning_rate": 5.190311418685121e-06, |
| "loss": 1.6332122802734375, |
| "step": 75, |
| "token_acc": 0.5776690475867771 |
| }, |
| { |
| "epoch": 0.013881658858233559, |
| "grad_norm": 2.59375, |
| "learning_rate": 5.536332179930797e-06, |
| "loss": 1.6145336151123046, |
| "step": 80, |
| "token_acc": 0.5818942893134579 |
| }, |
| { |
| "epoch": 0.014749262536873156, |
| "grad_norm": 2.515625, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 1.5732461929321289, |
| "step": 85, |
| "token_acc": 0.5861122807970769 |
| }, |
| { |
| "epoch": 0.015616866215512754, |
| "grad_norm": 2.28125, |
| "learning_rate": 6.228373702422146e-06, |
| "loss": 1.5478083610534668, |
| "step": 90, |
| "token_acc": 0.5937751867175326 |
| }, |
| { |
| "epoch": 0.01648446989415235, |
| "grad_norm": 2.34375, |
| "learning_rate": 6.57439446366782e-06, |
| "loss": 1.5324504852294922, |
| "step": 95, |
| "token_acc": 0.5946228348671784 |
| }, |
| { |
| "epoch": 0.01735207357279195, |
| "grad_norm": 2.3125, |
| "learning_rate": 6.9204152249134946e-06, |
| "loss": 1.5145973205566405, |
| "step": 100, |
| "token_acc": 0.5947611081643478 |
| }, |
| { |
| "epoch": 0.018219677251431546, |
| "grad_norm": 2.25, |
| "learning_rate": 7.2664359861591705e-06, |
| "loss": 1.4743114471435548, |
| "step": 105, |
| "token_acc": 0.6041969950833631 |
| }, |
| { |
| "epoch": 0.019087280930071143, |
| "grad_norm": 2.34375, |
| "learning_rate": 7.612456747404845e-06, |
| "loss": 1.4888650894165039, |
| "step": 110, |
| "token_acc": 0.6016704100708781 |
| }, |
| { |
| "epoch": 0.01995488460871074, |
| "grad_norm": 2.3125, |
| "learning_rate": 7.958477508650519e-06, |
| "loss": 1.450081729888916, |
| "step": 115, |
| "token_acc": 0.6071094352086315 |
| }, |
| { |
| "epoch": 0.020822488287350338, |
| "grad_norm": 2.4375, |
| "learning_rate": 8.304498269896194e-06, |
| "loss": 1.453689956665039, |
| "step": 120, |
| "token_acc": 0.6067426531172755 |
| }, |
| { |
| "epoch": 0.021690091965989935, |
| "grad_norm": 2.25, |
| "learning_rate": 8.65051903114187e-06, |
| "loss": 1.4380900382995605, |
| "step": 125, |
| "token_acc": 0.6129145486928147 |
| }, |
| { |
| "epoch": 0.022557695644629533, |
| "grad_norm": 2.296875, |
| "learning_rate": 8.996539792387544e-06, |
| "loss": 1.4033380508422852, |
| "step": 130, |
| "token_acc": 0.6155833692194423 |
| }, |
| { |
| "epoch": 0.02342529932326913, |
| "grad_norm": 2.125, |
| "learning_rate": 9.34256055363322e-06, |
| "loss": 1.3930879592895509, |
| "step": 135, |
| "token_acc": 0.6180772126035058 |
| }, |
| { |
| "epoch": 0.024292903001908728, |
| "grad_norm": 1.96875, |
| "learning_rate": 9.688581314878893e-06, |
| "loss": 1.3432926177978515, |
| "step": 140, |
| "token_acc": 0.6306223488778284 |
| }, |
| { |
| "epoch": 0.025160506680548325, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.0034602076124568e-05, |
| "loss": 1.39078369140625, |
| "step": 145, |
| "token_acc": 0.6173330823630644 |
| }, |
| { |
| "epoch": 0.026028110359187923, |
| "grad_norm": 2.28125, |
| "learning_rate": 1.0380622837370241e-05, |
| "loss": 1.3656013488769532, |
| "step": 150, |
| "token_acc": 0.6226175175695261 |
| }, |
| { |
| "epoch": 0.02689571403782752, |
| "grad_norm": 2.296875, |
| "learning_rate": 1.0726643598615918e-05, |
| "loss": 1.3753274917602538, |
| "step": 155, |
| "token_acc": 0.6213831896726154 |
| }, |
| { |
| "epoch": 0.027763317716467117, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.1072664359861593e-05, |
| "loss": 1.3534158706665038, |
| "step": 160, |
| "token_acc": 0.6270216023570886 |
| }, |
| { |
| "epoch": 0.028630921395106715, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.1418685121107267e-05, |
| "loss": 1.3482056617736817, |
| "step": 165, |
| "token_acc": 0.6246836055823732 |
| }, |
| { |
| "epoch": 0.029498525073746312, |
| "grad_norm": 2.265625, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 1.3297002792358399, |
| "step": 170, |
| "token_acc": 0.6299395312649295 |
| }, |
| { |
| "epoch": 0.03036612875238591, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.2110726643598615e-05, |
| "loss": 1.3250617980957031, |
| "step": 175, |
| "token_acc": 0.6287991301119186 |
| }, |
| { |
| "epoch": 0.031233732431025507, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.2456747404844292e-05, |
| "loss": 1.32503080368042, |
| "step": 180, |
| "token_acc": 0.6283811790503951 |
| }, |
| { |
| "epoch": 0.032101336109665105, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.2802768166089967e-05, |
| "loss": 1.3253366470336914, |
| "step": 185, |
| "token_acc": 0.6305334557323541 |
| }, |
| { |
| "epoch": 0.0329689397883047, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.314878892733564e-05, |
| "loss": 1.3164624214172362, |
| "step": 190, |
| "token_acc": 0.6327720036837259 |
| }, |
| { |
| "epoch": 0.0338365434669443, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.3494809688581316e-05, |
| "loss": 1.2994074821472168, |
| "step": 195, |
| "token_acc": 0.6317387471841726 |
| }, |
| { |
| "epoch": 0.0347041471455839, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.3840830449826989e-05, |
| "loss": 1.3055123329162597, |
| "step": 200, |
| "token_acc": 0.6311878838826895 |
| }, |
| { |
| "epoch": 0.035571750824223494, |
| "grad_norm": 2.25, |
| "learning_rate": 1.4186851211072666e-05, |
| "loss": 1.2885337829589845, |
| "step": 205, |
| "token_acc": 0.6344415604742465 |
| }, |
| { |
| "epoch": 0.03643935450286309, |
| "grad_norm": 2.40625, |
| "learning_rate": 1.4532871972318341e-05, |
| "loss": 1.2472162246704102, |
| "step": 210, |
| "token_acc": 0.6459379308550766 |
| }, |
| { |
| "epoch": 0.03730695818150269, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.4878892733564014e-05, |
| "loss": 1.2773432731628418, |
| "step": 215, |
| "token_acc": 0.6338010832102413 |
| }, |
| { |
| "epoch": 0.038174561860142286, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.522491349480969e-05, |
| "loss": 1.272820281982422, |
| "step": 220, |
| "token_acc": 0.6386346675046153 |
| }, |
| { |
| "epoch": 0.039042165538781884, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.5570934256055366e-05, |
| "loss": 1.2626455307006836, |
| "step": 225, |
| "token_acc": 0.6376424039839007 |
| }, |
| { |
| "epoch": 0.03990976921742148, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.5916955017301038e-05, |
| "loss": 1.2593675613403321, |
| "step": 230, |
| "token_acc": 0.639645674135032 |
| }, |
| { |
| "epoch": 0.04077737289606108, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.6262975778546713e-05, |
| "loss": 1.255127239227295, |
| "step": 235, |
| "token_acc": 0.6431021760799492 |
| }, |
| { |
| "epoch": 0.041644976574700676, |
| "grad_norm": 2.125, |
| "learning_rate": 1.6608996539792388e-05, |
| "loss": 1.2209264755249023, |
| "step": 240, |
| "token_acc": 0.6458485917837405 |
| }, |
| { |
| "epoch": 0.042512580253340274, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.6955017301038063e-05, |
| "loss": 1.2057106018066406, |
| "step": 245, |
| "token_acc": 0.653398644744445 |
| }, |
| { |
| "epoch": 0.04338018393197987, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.730103806228374e-05, |
| "loss": 1.254837989807129, |
| "step": 250, |
| "token_acc": 0.6412084543831126 |
| }, |
| { |
| "epoch": 0.04424778761061947, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.7647058823529414e-05, |
| "loss": 1.2420223236083985, |
| "step": 255, |
| "token_acc": 0.6410300956861068 |
| }, |
| { |
| "epoch": 0.045115391289259066, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.799307958477509e-05, |
| "loss": 1.2112503051757812, |
| "step": 260, |
| "token_acc": 0.6482978496344912 |
| }, |
| { |
| "epoch": 0.04598299496789866, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.833910034602076e-05, |
| "loss": 1.2253754615783692, |
| "step": 265, |
| "token_acc": 0.6429915081156616 |
| }, |
| { |
| "epoch": 0.04685059864653826, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.868512110726644e-05, |
| "loss": 1.2027314186096192, |
| "step": 270, |
| "token_acc": 0.6511740530352609 |
| }, |
| { |
| "epoch": 0.04771820232517786, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.9031141868512114e-05, |
| "loss": 1.219920539855957, |
| "step": 275, |
| "token_acc": 0.6463804044677383 |
| }, |
| { |
| "epoch": 0.048585806003817456, |
| "grad_norm": 2.125, |
| "learning_rate": 1.9377162629757786e-05, |
| "loss": 1.1916674613952636, |
| "step": 280, |
| "token_acc": 0.6508475041894454 |
| }, |
| { |
| "epoch": 0.04945340968245705, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.972318339100346e-05, |
| "loss": 1.2134785652160645, |
| "step": 285, |
| "token_acc": 0.6464441609025586 |
| }, |
| { |
| "epoch": 0.05032101336109665, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.9999998353126843e-05, |
| "loss": 1.1799225807189941, |
| "step": 290, |
| "token_acc": 0.6547402065668172 |
| }, |
| { |
| "epoch": 0.05118861703973625, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.99999407126232e-05, |
| "loss": 1.2018964767456055, |
| "step": 295, |
| "token_acc": 0.6471151511881963 |
| }, |
| { |
| "epoch": 0.052056220718375845, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.9999800729003996e-05, |
| "loss": 1.1920422554016112, |
| "step": 300, |
| "token_acc": 0.6487302769689172 |
| }, |
| { |
| "epoch": 0.05292382439701544, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.9999578403421912e-05, |
| "loss": 1.1693296432495117, |
| "step": 305, |
| "token_acc": 0.6556802430962789 |
| }, |
| { |
| "epoch": 0.05379142807565504, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9999273737707648e-05, |
| "loss": 1.1848974227905273, |
| "step": 310, |
| "token_acc": 0.6521950494915804 |
| }, |
| { |
| "epoch": 0.05465903175429464, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.9998886734369936e-05, |
| "loss": 1.1778865814208985, |
| "step": 315, |
| "token_acc": 0.6545266496586913 |
| }, |
| { |
| "epoch": 0.055526635432934235, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9998417396595508e-05, |
| "loss": 1.1698062896728516, |
| "step": 320, |
| "token_acc": 0.6535174771198589 |
| }, |
| { |
| "epoch": 0.05639423911157383, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.9997865728249043e-05, |
| "loss": 1.158426284790039, |
| "step": 325, |
| "token_acc": 0.6557887720539224 |
| }, |
| { |
| "epoch": 0.05726184279021343, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.999723173387319e-05, |
| "loss": 1.1911964416503906, |
| "step": 330, |
| "token_acc": 0.6506533185984529 |
| }, |
| { |
| "epoch": 0.05812944646885303, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.9996515418688493e-05, |
| "loss": 1.1536369323730469, |
| "step": 335, |
| "token_acc": 0.6576180488329568 |
| }, |
| { |
| "epoch": 0.058997050147492625, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.999571678859333e-05, |
| "loss": 1.1710229873657227, |
| "step": 340, |
| "token_acc": 0.6535538729067681 |
| }, |
| { |
| "epoch": 0.05986465382613222, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9994835850163926e-05, |
| "loss": 1.1672002792358398, |
| "step": 345, |
| "token_acc": 0.6529600985558826 |
| }, |
| { |
| "epoch": 0.06073225750477182, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9993872610654236e-05, |
| "loss": 1.1647834777832031, |
| "step": 350, |
| "token_acc": 0.655277021628137 |
| }, |
| { |
| "epoch": 0.06159986118341142, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.9992827077995925e-05, |
| "loss": 1.1796775817871095, |
| "step": 355, |
| "token_acc": 0.653560930884772 |
| }, |
| { |
| "epoch": 0.062467464862051014, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9991699260798284e-05, |
| "loss": 1.1580224990844727, |
| "step": 360, |
| "token_acc": 0.6587823913419223 |
| }, |
| { |
| "epoch": 0.06333506854069061, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.999048916834817e-05, |
| "loss": 1.1684626579284667, |
| "step": 365, |
| "token_acc": 0.6539958690816651 |
| }, |
| { |
| "epoch": 0.06420267221933021, |
| "grad_norm": 2.25, |
| "learning_rate": 1.9989196810609918e-05, |
| "loss": 1.1673255920410157, |
| "step": 370, |
| "token_acc": 0.6526922661481962 |
| }, |
| { |
| "epoch": 0.0650702758979698, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.9987822198225265e-05, |
| "loss": 1.1529643058776855, |
| "step": 375, |
| "token_acc": 0.6575958831216069 |
| }, |
| { |
| "epoch": 0.0659378795766094, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.9986365342513266e-05, |
| "loss": 1.1544547080993652, |
| "step": 380, |
| "token_acc": 0.6566502399001392 |
| }, |
| { |
| "epoch": 0.066805483255249, |
| "grad_norm": 2.125, |
| "learning_rate": 1.99848262554702e-05, |
| "loss": 1.169089126586914, |
| "step": 385, |
| "token_acc": 0.65285508142651 |
| }, |
| { |
| "epoch": 0.0676730869338886, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.9983204949769454e-05, |
| "loss": 1.1696990966796874, |
| "step": 390, |
| "token_acc": 0.6517770916361757 |
| }, |
| { |
| "epoch": 0.0685406906125282, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.998150143876146e-05, |
| "loss": 1.1284924507141114, |
| "step": 395, |
| "token_acc": 0.6655096202085395 |
| }, |
| { |
| "epoch": 0.0694082942911678, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.9979715736473527e-05, |
| "loss": 1.1364903450012207, |
| "step": 400, |
| "token_acc": 0.6588324591008281 |
| }, |
| { |
| "epoch": 0.07027589796980739, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.9977847857609775e-05, |
| "loss": 1.1590328216552734, |
| "step": 405, |
| "token_acc": 0.6557208069503755 |
| }, |
| { |
| "epoch": 0.07114350164844699, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.9975897817550995e-05, |
| "loss": 1.1176044464111328, |
| "step": 410, |
| "token_acc": 0.665058862001308 |
| }, |
| { |
| "epoch": 0.07201110532708659, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.9973865632354516e-05, |
| "loss": 1.1324227333068848, |
| "step": 415, |
| "token_acc": 0.6612732538710037 |
| }, |
| { |
| "epoch": 0.07287870900572618, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.9971751318754087e-05, |
| "loss": 1.126877784729004, |
| "step": 420, |
| "token_acc": 0.6609341202232488 |
| }, |
| { |
| "epoch": 0.07374631268436578, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.9969554894159723e-05, |
| "loss": 1.14964017868042, |
| "step": 425, |
| "token_acc": 0.6562090443570724 |
| }, |
| { |
| "epoch": 0.07461391636300538, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.996727637665758e-05, |
| "loss": 1.1191633224487305, |
| "step": 430, |
| "token_acc": 0.6615450810570731 |
| }, |
| { |
| "epoch": 0.07548152004164498, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9964915785009793e-05, |
| "loss": 1.1374661445617675, |
| "step": 435, |
| "token_acc": 0.660144832126399 |
| }, |
| { |
| "epoch": 0.07634912372028457, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.996247313865432e-05, |
| "loss": 1.1567827224731446, |
| "step": 440, |
| "token_acc": 0.6550488426417499 |
| }, |
| { |
| "epoch": 0.07721672739892417, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9959948457704793e-05, |
| "loss": 1.1355746269226075, |
| "step": 445, |
| "token_acc": 0.6552852877530336 |
| }, |
| { |
| "epoch": 0.07808433107756377, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9957341762950346e-05, |
| "loss": 1.1385893821716309, |
| "step": 450, |
| "token_acc": 0.6600633591211301 |
| }, |
| { |
| "epoch": 0.07895193475620337, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9954653075855445e-05, |
| "loss": 1.1308669090270995, |
| "step": 455, |
| "token_acc": 0.6592446678440429 |
| }, |
| { |
| "epoch": 0.07981953843484296, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.9951882418559703e-05, |
| "loss": 1.1351963043212892, |
| "step": 460, |
| "token_acc": 0.6637463123076334 |
| }, |
| { |
| "epoch": 0.08068714211348256, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.994902981387771e-05, |
| "loss": 1.1261632919311524, |
| "step": 465, |
| "token_acc": 0.6615251252132753 |
| }, |
| { |
| "epoch": 0.08155474579212216, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.994609528529885e-05, |
| "loss": 1.1392766952514648, |
| "step": 470, |
| "token_acc": 0.6591416589510375 |
| }, |
| { |
| "epoch": 0.08242234947076175, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.994307885698708e-05, |
| "loss": 1.1046557426452637, |
| "step": 475, |
| "token_acc": 0.6653103722274739 |
| }, |
| { |
| "epoch": 0.08328995314940135, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9939980553780763e-05, |
| "loss": 1.1288423538208008, |
| "step": 480, |
| "token_acc": 0.6587156961405608 |
| }, |
| { |
| "epoch": 0.08415755682804095, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.993680040119244e-05, |
| "loss": 1.1338699340820313, |
| "step": 485, |
| "token_acc": 0.6593820953616607 |
| }, |
| { |
| "epoch": 0.08502516050668055, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.9933538425408636e-05, |
| "loss": 1.1204511642456054, |
| "step": 490, |
| "token_acc": 0.6627913185082357 |
| }, |
| { |
| "epoch": 0.08589276418532014, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.9930194653289635e-05, |
| "loss": 1.1193718910217285, |
| "step": 495, |
| "token_acc": 0.6627504181868472 |
| }, |
| { |
| "epoch": 0.08676036786395974, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.9926769112369263e-05, |
| "loss": 1.1357709884643554, |
| "step": 500, |
| "token_acc": 0.6581859131322919 |
| }, |
| { |
| "epoch": 0.08762797154259934, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9923261830854655e-05, |
| "loss": 1.1109633445739746, |
| "step": 505, |
| "token_acc": 0.6641862471522357 |
| }, |
| { |
| "epoch": 0.08849557522123894, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.991967283762603e-05, |
| "loss": 1.0808055877685547, |
| "step": 510, |
| "token_acc": 0.6749422782577114 |
| }, |
| { |
| "epoch": 0.08936317889987853, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9916002162236458e-05, |
| "loss": 1.119293212890625, |
| "step": 515, |
| "token_acc": 0.6636471832848453 |
| }, |
| { |
| "epoch": 0.09023078257851813, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.99122498349116e-05, |
| "loss": 1.1222724914550781, |
| "step": 520, |
| "token_acc": 0.6633997145381635 |
| }, |
| { |
| "epoch": 0.09109838625715773, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.990841588654947e-05, |
| "loss": 1.133096694946289, |
| "step": 525, |
| "token_acc": 0.660575962862001 |
| }, |
| { |
| "epoch": 0.09196598993579733, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.990450034872018e-05, |
| "loss": 1.1255317687988282, |
| "step": 530, |
| "token_acc": 0.659338407094156 |
| }, |
| { |
| "epoch": 0.09283359361443692, |
| "grad_norm": 2.0, |
| "learning_rate": 1.990050325366568e-05, |
| "loss": 1.1009018898010254, |
| "step": 535, |
| "token_acc": 0.6676672499663481 |
| }, |
| { |
| "epoch": 0.09370119729307652, |
| "grad_norm": 2.0, |
| "learning_rate": 1.9896424634299495e-05, |
| "loss": 1.1132999420166017, |
| "step": 540, |
| "token_acc": 0.662574878385441 |
| }, |
| { |
| "epoch": 0.09456880097171612, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9892264524206442e-05, |
| "loss": 1.0917093276977539, |
| "step": 545, |
| "token_acc": 0.670899655371247 |
| }, |
| { |
| "epoch": 0.09543640465035572, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9888022957642365e-05, |
| "loss": 1.0798656463623046, |
| "step": 550, |
| "token_acc": 0.6730398457583547 |
| }, |
| { |
| "epoch": 0.09630400832899531, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.988369996953386e-05, |
| "loss": 1.1211360931396483, |
| "step": 555, |
| "token_acc": 0.660755798237552 |
| }, |
| { |
| "epoch": 0.09717161200763491, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.987929559547796e-05, |
| "loss": 1.0922590255737306, |
| "step": 560, |
| "token_acc": 0.6691658981863865 |
| }, |
| { |
| "epoch": 0.09803921568627451, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9874809871741877e-05, |
| "loss": 1.109041404724121, |
| "step": 565, |
| "token_acc": 0.6648318872017354 |
| }, |
| { |
| "epoch": 0.0989068193649141, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9870242835262665e-05, |
| "loss": 1.1087127685546876, |
| "step": 570, |
| "token_acc": 0.6643103084814841 |
| }, |
| { |
| "epoch": 0.0997744230435537, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.986559452364696e-05, |
| "loss": 1.1010761260986328, |
| "step": 575, |
| "token_acc": 0.6648252984798432 |
| }, |
| { |
| "epoch": 0.1006420267221933, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.986086497517063e-05, |
| "loss": 1.107012939453125, |
| "step": 580, |
| "token_acc": 0.6654558712325808 |
| }, |
| { |
| "epoch": 0.1015096304008329, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.985605422877848e-05, |
| "loss": 1.0979772567749024, |
| "step": 585, |
| "token_acc": 0.6680623147820886 |
| }, |
| { |
| "epoch": 0.1023772340794725, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.9851162324083933e-05, |
| "loss": 1.0830554008483886, |
| "step": 590, |
| "token_acc": 0.6709642543415095 |
| }, |
| { |
| "epoch": 0.10324483775811209, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.984618930136869e-05, |
| "loss": 1.0940834999084472, |
| "step": 595, |
| "token_acc": 0.6654719073768496 |
| }, |
| { |
| "epoch": 0.10411244143675169, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9841135201582418e-05, |
| "loss": 1.087096881866455, |
| "step": 600, |
| "token_acc": 0.6688288825090731 |
| }, |
| { |
| "epoch": 0.10498004511539129, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9836000066342396e-05, |
| "loss": 1.0840859413146973, |
| "step": 605, |
| "token_acc": 0.6716854817537411 |
| }, |
| { |
| "epoch": 0.10584764879403089, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.9830783937933172e-05, |
| "loss": 1.1092602729797363, |
| "step": 610, |
| "token_acc": 0.6614420479795627 |
| }, |
| { |
| "epoch": 0.10671525247267048, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.982548685930623e-05, |
| "loss": 1.067424201965332, |
| "step": 615, |
| "token_acc": 0.673647896123652 |
| }, |
| { |
| "epoch": 0.10758285615131008, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9820108874079626e-05, |
| "loss": 1.072523593902588, |
| "step": 620, |
| "token_acc": 0.670838285674334 |
| }, |
| { |
| "epoch": 0.10845045982994968, |
| "grad_norm": 2.265625, |
| "learning_rate": 1.9814650026537632e-05, |
| "loss": 1.1132768630981444, |
| "step": 625, |
| "token_acc": 0.6627720656963546 |
| }, |
| { |
| "epoch": 0.10931806350858927, |
| "grad_norm": 1.875, |
| "learning_rate": 1.9809110361630356e-05, |
| "loss": 1.081822395324707, |
| "step": 630, |
| "token_acc": 0.6701974000962927 |
| }, |
| { |
| "epoch": 0.11018566718722887, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9803489924973403e-05, |
| "loss": 1.0843083381652832, |
| "step": 635, |
| "token_acc": 0.670640893606908 |
| }, |
| { |
| "epoch": 0.11105327086586847, |
| "grad_norm": 1.875, |
| "learning_rate": 1.9797788762847474e-05, |
| "loss": 1.1068120002746582, |
| "step": 640, |
| "token_acc": 0.6664943545095905 |
| }, |
| { |
| "epoch": 0.11192087454450807, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9792006922197983e-05, |
| "loss": 1.090738296508789, |
| "step": 645, |
| "token_acc": 0.6678904842496042 |
| }, |
| { |
| "epoch": 0.11278847822314766, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.97861444506347e-05, |
| "loss": 1.0894213676452638, |
| "step": 650, |
| "token_acc": 0.6677103350040985 |
| }, |
| { |
| "epoch": 0.11365608190178726, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.9780201396431328e-05, |
| "loss": 1.1013753890991211, |
| "step": 655, |
| "token_acc": 0.6645008860011813 |
| }, |
| { |
| "epoch": 0.11452368558042686, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9774177808525113e-05, |
| "loss": 1.0939213752746582, |
| "step": 660, |
| "token_acc": 0.6666116111982823 |
| }, |
| { |
| "epoch": 0.11539128925906646, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9768073736516446e-05, |
| "loss": 1.0730672836303712, |
| "step": 665, |
| "token_acc": 0.674365815777946 |
| }, |
| { |
| "epoch": 0.11625889293770605, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.9761889230668462e-05, |
| "loss": 1.0676060676574708, |
| "step": 670, |
| "token_acc": 0.6705334815226451 |
| }, |
| { |
| "epoch": 0.11712649661634565, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.975562434190661e-05, |
| "loss": 1.0712880134582519, |
| "step": 675, |
| "token_acc": 0.6705970273187744 |
| }, |
| { |
| "epoch": 0.11799410029498525, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9749279121818235e-05, |
| "loss": 1.1015710830688477, |
| "step": 680, |
| "token_acc": 0.6644264612144223 |
| }, |
| { |
| "epoch": 0.11886170397362485, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9742853622652176e-05, |
| "loss": 1.0666415214538574, |
| "step": 685, |
| "token_acc": 0.6735260146303254 |
| }, |
| { |
| "epoch": 0.11972930765226444, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9736347897318303e-05, |
| "loss": 1.1168096542358399, |
| "step": 690, |
| "token_acc": 0.6619707286530484 |
| }, |
| { |
| "epoch": 0.12059691133090404, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.9729761999387102e-05, |
| "loss": 1.061478042602539, |
| "step": 695, |
| "token_acc": 0.674633270806062 |
| }, |
| { |
| "epoch": 0.12146451500954364, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9723095983089235e-05, |
| "loss": 1.0845521926879882, |
| "step": 700, |
| "token_acc": 0.666814367237328 |
| }, |
| { |
| "epoch": 0.12233211868818324, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9716349903315075e-05, |
| "loss": 1.0705391883850097, |
| "step": 705, |
| "token_acc": 0.6704897791192207 |
| }, |
| { |
| "epoch": 0.12319972236682283, |
| "grad_norm": 2.0, |
| "learning_rate": 1.970952381561428e-05, |
| "loss": 1.0789600372314454, |
| "step": 710, |
| "token_acc": 0.6659224188949265 |
| }, |
| { |
| "epoch": 0.12406732604546243, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9702617776195314e-05, |
| "loss": 1.0921841621398927, |
| "step": 715, |
| "token_acc": 0.6679036012597562 |
| }, |
| { |
| "epoch": 0.12493492972410203, |
| "grad_norm": 2.0, |
| "learning_rate": 1.9695631841924993e-05, |
| "loss": 1.084920597076416, |
| "step": 720, |
| "token_acc": 0.6674108653000473 |
| }, |
| { |
| "epoch": 0.12580253340274164, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.9688566070328018e-05, |
| "loss": 1.0615843772888183, |
| "step": 725, |
| "token_acc": 0.6733901515151515 |
| }, |
| { |
| "epoch": 0.12667013708138122, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9681420519586502e-05, |
| "loss": 1.0624969482421875, |
| "step": 730, |
| "token_acc": 0.6717009575388738 |
| }, |
| { |
| "epoch": 0.12753774076002083, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9674195248539482e-05, |
| "loss": 1.0610927581787108, |
| "step": 735, |
| "token_acc": 0.6758764832793959 |
| }, |
| { |
| "epoch": 0.12840534443866042, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.9666890316682443e-05, |
| "loss": 1.0778383255004882, |
| "step": 740, |
| "token_acc": 0.6720665616068805 |
| }, |
| { |
| "epoch": 0.12927294811730003, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.9659505784166827e-05, |
| "loss": 1.078394317626953, |
| "step": 745, |
| "token_acc": 0.6708379109836813 |
| }, |
| { |
| "epoch": 0.1301405517959396, |
| "grad_norm": 2.125, |
| "learning_rate": 1.965204171179954e-05, |
| "loss": 1.088584041595459, |
| "step": 750, |
| "token_acc": 0.667237308961385 |
| }, |
| { |
| "epoch": 0.13100815547457922, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9644498161042436e-05, |
| "loss": 1.0937715530395509, |
| "step": 755, |
| "token_acc": 0.6666981577704298 |
| }, |
| { |
| "epoch": 0.1318757591532188, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.9636875194011836e-05, |
| "loss": 1.0754453659057617, |
| "step": 760, |
| "token_acc": 0.6709822832582032 |
| }, |
| { |
| "epoch": 0.13274336283185842, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.9629172873477995e-05, |
| "loss": 1.070410919189453, |
| "step": 765, |
| "token_acc": 0.6699807311459616 |
| }, |
| { |
| "epoch": 0.133610966510498, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9621391262864597e-05, |
| "loss": 1.0697467803955079, |
| "step": 770, |
| "token_acc": 0.672360857509975 |
| }, |
| { |
| "epoch": 0.13447857018913761, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.961353042624823e-05, |
| "loss": 1.090577983856201, |
| "step": 775, |
| "token_acc": 0.6647531413321472 |
| }, |
| { |
| "epoch": 0.1353461738677772, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9605590428357853e-05, |
| "loss": 1.0771003723144532, |
| "step": 780, |
| "token_acc": 0.6692675159235669 |
| }, |
| { |
| "epoch": 0.1362137775464168, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.959757133457427e-05, |
| "loss": 1.0793813705444335, |
| "step": 785, |
| "token_acc": 0.6685078374160277 |
| }, |
| { |
| "epoch": 0.1370813812250564, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.958947321092959e-05, |
| "loss": 1.0954531669616698, |
| "step": 790, |
| "token_acc": 0.666005196025954 |
| }, |
| { |
| "epoch": 0.137948984903696, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9581296124106682e-05, |
| "loss": 1.049675750732422, |
| "step": 795, |
| "token_acc": 0.6793725574174206 |
| }, |
| { |
| "epoch": 0.1388165885823356, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.9573040141438625e-05, |
| "loss": 1.0865850448608398, |
| "step": 800, |
| "token_acc": 0.6686926806866836 |
| }, |
| { |
| "epoch": 0.1396841922609752, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.9564705330908155e-05, |
| "loss": 1.0714460372924806, |
| "step": 805, |
| "token_acc": 0.6687210017329085 |
| }, |
| { |
| "epoch": 0.14055179593961478, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.9556291761147106e-05, |
| "loss": 1.0626968383789062, |
| "step": 810, |
| "token_acc": 0.6734635695958513 |
| }, |
| { |
| "epoch": 0.1414193996182544, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9547799501435848e-05, |
| "loss": 1.078728485107422, |
| "step": 815, |
| "token_acc": 0.6699388135142325 |
| }, |
| { |
| "epoch": 0.14228700329689398, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9539228621702696e-05, |
| "loss": 1.0764430999755858, |
| "step": 820, |
| "token_acc": 0.6692216671049172 |
| }, |
| { |
| "epoch": 0.1431546069755336, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9530579192523374e-05, |
| "loss": 1.0595266342163085, |
| "step": 825, |
| "token_acc": 0.6733650861607621 |
| }, |
| { |
| "epoch": 0.14402221065417317, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.9521851285120393e-05, |
| "loss": 1.0454116821289063, |
| "step": 830, |
| "token_acc": 0.6766406455817306 |
| }, |
| { |
| "epoch": 0.14488981433281278, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9513044971362494e-05, |
| "loss": 1.0634162902832032, |
| "step": 835, |
| "token_acc": 0.6721870895229326 |
| }, |
| { |
| "epoch": 0.14575741801145237, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9504160323764032e-05, |
| "loss": 1.0595422744750977, |
| "step": 840, |
| "token_acc": 0.6775388978821892 |
| }, |
| { |
| "epoch": 0.14662502169009198, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.9495197415484397e-05, |
| "loss": 1.082723903656006, |
| "step": 845, |
| "token_acc": 0.668543901058705 |
| }, |
| { |
| "epoch": 0.14749262536873156, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9486156320327406e-05, |
| "loss": 1.0727534294128418, |
| "step": 850, |
| "token_acc": 0.6706723270354948 |
| }, |
| { |
| "epoch": 0.14836022904737117, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.9477037112740703e-05, |
| "loss": 1.0933048248291015, |
| "step": 855, |
| "token_acc": 0.6638589138214922 |
| }, |
| { |
| "epoch": 0.14922783272601076, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.9467839867815118e-05, |
| "loss": 1.0769481658935547, |
| "step": 860, |
| "token_acc": 0.6706557839960199 |
| }, |
| { |
| "epoch": 0.15009543640465037, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9458564661284085e-05, |
| "loss": 1.062359619140625, |
| "step": 865, |
| "token_acc": 0.6725027997050067 |
| }, |
| { |
| "epoch": 0.15096304008328995, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9449211569523002e-05, |
| "loss": 1.0662097930908203, |
| "step": 870, |
| "token_acc": 0.6707562050881049 |
| }, |
| { |
| "epoch": 0.15183064376192956, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9439780669548586e-05, |
| "loss": 1.0621366500854492, |
| "step": 875, |
| "token_acc": 0.673269502864129 |
| }, |
| { |
| "epoch": 0.15269824744056915, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.9430272039018277e-05, |
| "loss": 1.0658045768737794, |
| "step": 880, |
| "token_acc": 0.6731692212416783 |
| }, |
| { |
| "epoch": 0.15356585111920876, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.942068575622956e-05, |
| "loss": 1.0896780967712403, |
| "step": 885, |
| "token_acc": 0.6671697313899149 |
| }, |
| { |
| "epoch": 0.15443345479784834, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9411021900119343e-05, |
| "loss": 1.0421188354492188, |
| "step": 890, |
| "token_acc": 0.6814261145654187 |
| }, |
| { |
| "epoch": 0.15530105847648795, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.94012805502633e-05, |
| "loss": 1.0770461082458496, |
| "step": 895, |
| "token_acc": 0.670378502031211 |
| }, |
| { |
| "epoch": 0.15616866215512754, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.9391461786875216e-05, |
| "loss": 1.0411422729492188, |
| "step": 900, |
| "token_acc": 0.6799084886073606 |
| }, |
| { |
| "epoch": 0.15703626583376715, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9381565690806328e-05, |
| "loss": 1.0435258865356445, |
| "step": 905, |
| "token_acc": 0.6792285176667363 |
| }, |
| { |
| "epoch": 0.15790386951240673, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.9371592343544655e-05, |
| "loss": 1.0748100280761719, |
| "step": 910, |
| "token_acc": 0.6707071531575654 |
| }, |
| { |
| "epoch": 0.15877147319104634, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.9361541827214338e-05, |
| "loss": 1.0855265617370606, |
| "step": 915, |
| "token_acc": 0.667949364401157 |
| }, |
| { |
| "epoch": 0.15963907686968593, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.9351414224574944e-05, |
| "loss": 1.0524426460266114, |
| "step": 920, |
| "token_acc": 0.6748159542907373 |
| }, |
| { |
| "epoch": 0.16050668054832554, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.9341209619020804e-05, |
| "loss": 1.0575942993164062, |
| "step": 925, |
| "token_acc": 0.6725628566510876 |
| }, |
| { |
| "epoch": 0.16137428422696512, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9330928094580324e-05, |
| "loss": 1.058868408203125, |
| "step": 930, |
| "token_acc": 0.6738103592539609 |
| }, |
| { |
| "epoch": 0.16224188790560473, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9320569735915273e-05, |
| "loss": 1.0528675079345704, |
| "step": 935, |
| "token_acc": 0.6737685311378745 |
| }, |
| { |
| "epoch": 0.16310949158424431, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.9310134628320116e-05, |
| "loss": 1.0708015441894532, |
| "step": 940, |
| "token_acc": 0.6706150717308855 |
| }, |
| { |
| "epoch": 0.16397709526288393, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.929962285772128e-05, |
| "loss": 1.0595834732055665, |
| "step": 945, |
| "token_acc": 0.6712688842219362 |
| }, |
| { |
| "epoch": 0.1648446989415235, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9289034510676483e-05, |
| "loss": 1.0492593765258789, |
| "step": 950, |
| "token_acc": 0.6747535596933187 |
| }, |
| { |
| "epoch": 0.16571230262016312, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.9278369674373985e-05, |
| "loss": 1.0697070121765138, |
| "step": 955, |
| "token_acc": 0.6718501687702754 |
| }, |
| { |
| "epoch": 0.1665799062988027, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.9267628436631893e-05, |
| "loss": 1.0314347267150878, |
| "step": 960, |
| "token_acc": 0.6785393180717892 |
| }, |
| { |
| "epoch": 0.16744750997744232, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.9256810885897434e-05, |
| "loss": 1.0667208671569823, |
| "step": 965, |
| "token_acc": 0.6720282411646826 |
| }, |
| { |
| "epoch": 0.1683151136560819, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.9245917111246205e-05, |
| "loss": 1.0356231689453126, |
| "step": 970, |
| "token_acc": 0.6796255346195098 |
| }, |
| { |
| "epoch": 0.1691827173347215, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.9234947202381487e-05, |
| "loss": 1.0789193153381347, |
| "step": 975, |
| "token_acc": 0.6698977346968708 |
| }, |
| { |
| "epoch": 0.1700503210133611, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.922390124963345e-05, |
| "loss": 1.0505391120910645, |
| "step": 980, |
| "token_acc": 0.6759316831814153 |
| }, |
| { |
| "epoch": 0.1709179246920007, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9212779343958466e-05, |
| "loss": 1.0695667266845703, |
| "step": 985, |
| "token_acc": 0.6721886545823162 |
| }, |
| { |
| "epoch": 0.1717855283706403, |
| "grad_norm": 2.0, |
| "learning_rate": 1.92015815769383e-05, |
| "loss": 1.0540275573730469, |
| "step": 990, |
| "token_acc": 0.6755136400344937 |
| }, |
| { |
| "epoch": 0.1726531320492799, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.919030804077941e-05, |
| "loss": 1.0307014465332032, |
| "step": 995, |
| "token_acc": 0.6810972040253115 |
| }, |
| { |
| "epoch": 0.17352073572791948, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.9178958828312146e-05, |
| "loss": 1.067826271057129, |
| "step": 1000, |
| "token_acc": 0.6724737299518053 |
| }, |
| { |
| "epoch": 0.1743883394065591, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9167534032990024e-05, |
| "loss": 1.0573354721069337, |
| "step": 1005, |
| "token_acc": 0.6738269981618387 |
| }, |
| { |
| "epoch": 0.17525594308519868, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9156033748888918e-05, |
| "loss": 1.0550942420959473, |
| "step": 1010, |
| "token_acc": 0.6726485901683684 |
| }, |
| { |
| "epoch": 0.1761235467638383, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.9144458070706317e-05, |
| "loss": 1.0487598419189452, |
| "step": 1015, |
| "token_acc": 0.6768756795940558 |
| }, |
| { |
| "epoch": 0.17699115044247787, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.9132807093760523e-05, |
| "loss": 1.0621299743652344, |
| "step": 1020, |
| "token_acc": 0.6733473561667395 |
| }, |
| { |
| "epoch": 0.17785875412111748, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.912108091398988e-05, |
| "loss": 1.052401065826416, |
| "step": 1025, |
| "token_acc": 0.6748749154834347 |
| }, |
| { |
| "epoch": 0.17872635779975707, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.9109279627951978e-05, |
| "loss": 1.0468477249145507, |
| "step": 1030, |
| "token_acc": 0.6755857259832536 |
| }, |
| { |
| "epoch": 0.17959396147839668, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.9097403332822863e-05, |
| "loss": 1.0689468383789062, |
| "step": 1035, |
| "token_acc": 0.670591049218667 |
| }, |
| { |
| "epoch": 0.18046156515703626, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.908545212639622e-05, |
| "loss": 1.0497617721557617, |
| "step": 1040, |
| "token_acc": 0.6754224207406193 |
| }, |
| { |
| "epoch": 0.18132916883567587, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.90734261070826e-05, |
| "loss": 1.0642064094543457, |
| "step": 1045, |
| "token_acc": 0.6719027275714755 |
| }, |
| { |
| "epoch": 0.18219677251431546, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.906132537390857e-05, |
| "loss": 1.0482969284057617, |
| "step": 1050, |
| "token_acc": 0.6774891482197671 |
| }, |
| { |
| "epoch": 0.18306437619295507, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.9049150026515937e-05, |
| "loss": 1.0419374465942384, |
| "step": 1055, |
| "token_acc": 0.6783982416374751 |
| }, |
| { |
| "epoch": 0.18393197987159465, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.9036900165160895e-05, |
| "loss": 1.047512149810791, |
| "step": 1060, |
| "token_acc": 0.673420406340701 |
| }, |
| { |
| "epoch": 0.18479958355023426, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.9024575890713216e-05, |
| "loss": 1.0479446411132813, |
| "step": 1065, |
| "token_acc": 0.6774471529854157 |
| }, |
| { |
| "epoch": 0.18566718722887385, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.9012177304655418e-05, |
| "loss": 1.0644286155700684, |
| "step": 1070, |
| "token_acc": 0.6712419897903769 |
| }, |
| { |
| "epoch": 0.18653479090751346, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.8999704509081927e-05, |
| "loss": 1.0513483047485352, |
| "step": 1075, |
| "token_acc": 0.6753527477190749 |
| }, |
| { |
| "epoch": 0.18740239458615304, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.8987157606698234e-05, |
| "loss": 1.025481605529785, |
| "step": 1080, |
| "token_acc": 0.6835672249886826 |
| }, |
| { |
| "epoch": 0.18826999826479265, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.8974536700820062e-05, |
| "loss": 1.0314741134643555, |
| "step": 1085, |
| "token_acc": 0.6798985689043553 |
| }, |
| { |
| "epoch": 0.18913760194343224, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.896184189537249e-05, |
| "loss": 1.0473779678344726, |
| "step": 1090, |
| "token_acc": 0.6763754045307443 |
| }, |
| { |
| "epoch": 0.19000520562207185, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.8949073294889127e-05, |
| "loss": 1.0450904846191407, |
| "step": 1095, |
| "token_acc": 0.6737394957983194 |
| }, |
| { |
| "epoch": 0.19087280930071143, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.8936231004511224e-05, |
| "loss": 1.0552305221557616, |
| "step": 1100, |
| "token_acc": 0.6746180059360228 |
| }, |
| { |
| "epoch": 0.19174041297935104, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.8923315129986838e-05, |
| "loss": 1.0332719802856445, |
| "step": 1105, |
| "token_acc": 0.6815246996363837 |
| }, |
| { |
| "epoch": 0.19260801665799063, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.8910325777669923e-05, |
| "loss": 1.0561046600341797, |
| "step": 1110, |
| "token_acc": 0.6739904907684597 |
| }, |
| { |
| "epoch": 0.19347562033663024, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.8897263054519498e-05, |
| "loss": 1.0276466369628907, |
| "step": 1115, |
| "token_acc": 0.6814717548158276 |
| }, |
| { |
| "epoch": 0.19434322401526982, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.8884127068098726e-05, |
| "loss": 1.0520359992980957, |
| "step": 1120, |
| "token_acc": 0.675560674842469 |
| }, |
| { |
| "epoch": 0.19521082769390943, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.8870917926574056e-05, |
| "loss": 1.0623506546020507, |
| "step": 1125, |
| "token_acc": 0.6724925733011843 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.8857635738714316e-05, |
| "loss": 1.050804901123047, |
| "step": 1130, |
| "token_acc": 0.6761054927622447 |
| }, |
| { |
| "epoch": 0.19694603505118863, |
| "grad_norm": 2.0, |
| "learning_rate": 1.884428061388983e-05, |
| "loss": 1.0528631210327148, |
| "step": 1135, |
| "token_acc": 0.6742988058872535 |
| }, |
| { |
| "epoch": 0.1978136387298282, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.8830852662071507e-05, |
| "loss": 1.0435836791992188, |
| "step": 1140, |
| "token_acc": 0.6766125320533894 |
| }, |
| { |
| "epoch": 0.19868124240846782, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.8817351993829947e-05, |
| "loss": 1.058847713470459, |
| "step": 1145, |
| "token_acc": 0.6732232009828266 |
| }, |
| { |
| "epoch": 0.1995488460871074, |
| "grad_norm": 1.765625, |
| "learning_rate": 1.8803778720334512e-05, |
| "loss": 1.0335227966308593, |
| "step": 1150, |
| "token_acc": 0.6779969283000565 |
| }, |
| { |
| "epoch": 0.20041644976574702, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.8790132953352427e-05, |
| "loss": 1.04959077835083, |
| "step": 1155, |
| "token_acc": 0.6770108354485658 |
| }, |
| { |
| "epoch": 0.2012840534443866, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.8776414805247857e-05, |
| "loss": 1.0455670356750488, |
| "step": 1160, |
| "token_acc": 0.6780289627154183 |
| }, |
| { |
| "epoch": 0.2021516571230262, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.8762624388980976e-05, |
| "loss": 1.033797264099121, |
| "step": 1165, |
| "token_acc": 0.6787956767884714 |
| }, |
| { |
| "epoch": 0.2030192608016658, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.8748761818107046e-05, |
| "loss": 1.0679737091064454, |
| "step": 1170, |
| "token_acc": 0.6713174689300571 |
| }, |
| { |
| "epoch": 0.2038868644803054, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.8734827206775463e-05, |
| "loss": 1.0490418434143067, |
| "step": 1175, |
| "token_acc": 0.6751949483539963 |
| }, |
| { |
| "epoch": 0.204754468158945, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.8720820669728846e-05, |
| "loss": 1.0127446174621582, |
| "step": 1180, |
| "token_acc": 0.6857832294389704 |
| }, |
| { |
| "epoch": 0.2056220718375846, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.8706742322302064e-05, |
| "loss": 1.0334016799926757, |
| "step": 1185, |
| "token_acc": 0.6802701904224747 |
| }, |
| { |
| "epoch": 0.20648967551622419, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.8692592280421305e-05, |
| "loss": 1.043479824066162, |
| "step": 1190, |
| "token_acc": 0.6771668797706226 |
| }, |
| { |
| "epoch": 0.2073572791948638, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.8678370660603115e-05, |
| "loss": 1.0523313522338866, |
| "step": 1195, |
| "token_acc": 0.6751031599887095 |
| }, |
| { |
| "epoch": 0.20822488287350338, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.8664077579953434e-05, |
| "loss": 1.05529727935791, |
| "step": 1200, |
| "token_acc": 0.6741786043282646 |
| }, |
| { |
| "epoch": 0.209092486552143, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.864971315616664e-05, |
| "loss": 1.043968391418457, |
| "step": 1205, |
| "token_acc": 0.6766692503598716 |
| }, |
| { |
| "epoch": 0.20996009023078258, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.8635277507524573e-05, |
| "loss": 1.0732519149780273, |
| "step": 1210, |
| "token_acc": 0.6701793283338767 |
| }, |
| { |
| "epoch": 0.2108276939094222, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.8620770752895567e-05, |
| "loss": 1.0491312980651855, |
| "step": 1215, |
| "token_acc": 0.6746863348120731 |
| }, |
| { |
| "epoch": 0.21169529758806177, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.860619301173347e-05, |
| "loss": 1.0385177612304688, |
| "step": 1220, |
| "token_acc": 0.6771983724985469 |
| }, |
| { |
| "epoch": 0.21256290126670138, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.8591544404076654e-05, |
| "loss": 1.0225757598876952, |
| "step": 1225, |
| "token_acc": 0.6851136908248575 |
| }, |
| { |
| "epoch": 0.21343050494534097, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.8576825050547033e-05, |
| "loss": 1.0491232872009277, |
| "step": 1230, |
| "token_acc": 0.6755004153380315 |
| }, |
| { |
| "epoch": 0.21429810862398058, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.856203507234907e-05, |
| "loss": 1.0523208618164062, |
| "step": 1235, |
| "token_acc": 0.675254080094805 |
| }, |
| { |
| "epoch": 0.21516571230262016, |
| "grad_norm": 2.0, |
| "learning_rate": 1.8547174591268774e-05, |
| "loss": 1.0285789489746093, |
| "step": 1240, |
| "token_acc": 0.6805194115460195 |
| }, |
| { |
| "epoch": 0.21603331598125977, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.8532243729672707e-05, |
| "loss": 1.0230236053466797, |
| "step": 1245, |
| "token_acc": 0.6832664590042764 |
| }, |
| { |
| "epoch": 0.21690091965989935, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.8517242610506953e-05, |
| "loss": 1.0365596771240235, |
| "step": 1250, |
| "token_acc": 0.678233046932105 |
| }, |
| { |
| "epoch": 0.21776852333853897, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.8502171357296144e-05, |
| "loss": 1.0360082626342773, |
| "step": 1255, |
| "token_acc": 0.6784880946067773 |
| }, |
| { |
| "epoch": 0.21863612701717855, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.8487030094142403e-05, |
| "loss": 1.044863796234131, |
| "step": 1260, |
| "token_acc": 0.6762245320026152 |
| }, |
| { |
| "epoch": 0.21950373069581816, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.8471818945724355e-05, |
| "loss": 1.0216045379638672, |
| "step": 1265, |
| "token_acc": 0.6839718075188765 |
| }, |
| { |
| "epoch": 0.22037133437445774, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.845653803729607e-05, |
| "loss": 1.0163522720336915, |
| "step": 1270, |
| "token_acc": 0.6835048168294121 |
| }, |
| { |
| "epoch": 0.22123893805309736, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.8441187494686055e-05, |
| "loss": 1.0463291168212892, |
| "step": 1275, |
| "token_acc": 0.6759099019331642 |
| }, |
| { |
| "epoch": 0.22210654173173694, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.8425767444296213e-05, |
| "loss": 1.0286881446838378, |
| "step": 1280, |
| "token_acc": 0.6834346103038309 |
| }, |
| { |
| "epoch": 0.22297414541037655, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.8410278013100803e-05, |
| "loss": 1.0348123550415038, |
| "step": 1285, |
| "token_acc": 0.679287010183677 |
| }, |
| { |
| "epoch": 0.22384174908901613, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.839471932864537e-05, |
| "loss": 1.0408474922180175, |
| "step": 1290, |
| "token_acc": 0.6770663593126929 |
| }, |
| { |
| "epoch": 0.22470935276765575, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.8379091519045737e-05, |
| "loss": 1.0488122940063476, |
| "step": 1295, |
| "token_acc": 0.6739063026626222 |
| }, |
| { |
| "epoch": 0.22557695644629533, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.8363394712986915e-05, |
| "loss": 1.0353066444396972, |
| "step": 1300, |
| "token_acc": 0.6792478688704328 |
| }, |
| { |
| "epoch": 0.22644456012493494, |
| "grad_norm": 2.0, |
| "learning_rate": 1.834762903972207e-05, |
| "loss": 1.0343815803527832, |
| "step": 1305, |
| "token_acc": 0.6786524515782157 |
| }, |
| { |
| "epoch": 0.22731216380357452, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.8331794629071427e-05, |
| "loss": 1.0241337776184083, |
| "step": 1310, |
| "token_acc": 0.6810138309840513 |
| }, |
| { |
| "epoch": 0.22817976748221414, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.831589161142124e-05, |
| "loss": 1.0487545013427735, |
| "step": 1315, |
| "token_acc": 0.6746494771055173 |
| }, |
| { |
| "epoch": 0.22904737116085372, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.8299920117722677e-05, |
| "loss": 1.0491311073303222, |
| "step": 1320, |
| "token_acc": 0.6740286726172584 |
| }, |
| { |
| "epoch": 0.22991497483949333, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.828388027949078e-05, |
| "loss": 1.0435140609741211, |
| "step": 1325, |
| "token_acc": 0.6763682837492424 |
| }, |
| { |
| "epoch": 0.2307825785181329, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.8267772228803357e-05, |
| "loss": 1.023078155517578, |
| "step": 1330, |
| "token_acc": 0.6799355293097844 |
| }, |
| { |
| "epoch": 0.23165018219677252, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.82515960982999e-05, |
| "loss": 1.015854835510254, |
| "step": 1335, |
| "token_acc": 0.6842098118535009 |
| }, |
| { |
| "epoch": 0.2325177858754121, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.8235352021180496e-05, |
| "loss": 1.0593996047973633, |
| "step": 1340, |
| "token_acc": 0.6741832751181426 |
| }, |
| { |
| "epoch": 0.23338538955405172, |
| "grad_norm": 2.0, |
| "learning_rate": 1.821904013120473e-05, |
| "loss": 1.0396366119384766, |
| "step": 1345, |
| "token_acc": 0.6776407492466381 |
| }, |
| { |
| "epoch": 0.2342529932326913, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.8202660562690592e-05, |
| "loss": 1.0485494613647461, |
| "step": 1350, |
| "token_acc": 0.6759969479137384 |
| }, |
| { |
| "epoch": 0.23512059691133091, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.8186213450513336e-05, |
| "loss": 1.026517391204834, |
| "step": 1355, |
| "token_acc": 0.6813391968138068 |
| }, |
| { |
| "epoch": 0.2359882005899705, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.816969893010442e-05, |
| "loss": 1.041010570526123, |
| "step": 1360, |
| "token_acc": 0.6755975379040209 |
| }, |
| { |
| "epoch": 0.2368558042686101, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.815311713745036e-05, |
| "loss": 1.0168442726135254, |
| "step": 1365, |
| "token_acc": 0.6804629906694595 |
| }, |
| { |
| "epoch": 0.2377234079472497, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.81364682090916e-05, |
| "loss": 1.025059700012207, |
| "step": 1370, |
| "token_acc": 0.680214399694494 |
| }, |
| { |
| "epoch": 0.2385910116258893, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.811975228212143e-05, |
| "loss": 1.02586030960083, |
| "step": 1375, |
| "token_acc": 0.679387984579139 |
| }, |
| { |
| "epoch": 0.2394586153045289, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.810296949418481e-05, |
| "loss": 1.0357915878295898, |
| "step": 1380, |
| "token_acc": 0.6767545616531072 |
| }, |
| { |
| "epoch": 0.2403262189831685, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.8086119983477265e-05, |
| "loss": 1.031496810913086, |
| "step": 1385, |
| "token_acc": 0.676317743132888 |
| }, |
| { |
| "epoch": 0.24119382266180808, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.8069203888743734e-05, |
| "loss": 1.0320685386657715, |
| "step": 1390, |
| "token_acc": 0.6808824724396653 |
| }, |
| { |
| "epoch": 0.2420614263404477, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.8052221349277445e-05, |
| "loss": 1.044478416442871, |
| "step": 1395, |
| "token_acc": 0.6767207412842042 |
| }, |
| { |
| "epoch": 0.24292903001908728, |
| "grad_norm": 1.875, |
| "learning_rate": 1.803517250491874e-05, |
| "loss": 1.037778091430664, |
| "step": 1400, |
| "token_acc": 0.6757977163281176 |
| }, |
| { |
| "epoch": 0.2437966336977269, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.801805749605395e-05, |
| "loss": 1.0458430290222167, |
| "step": 1405, |
| "token_acc": 0.6760411743080721 |
| }, |
| { |
| "epoch": 0.24466423737636647, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.800087646361423e-05, |
| "loss": 1.020294761657715, |
| "step": 1410, |
| "token_acc": 0.6817285303383098 |
| }, |
| { |
| "epoch": 0.24553184105500608, |
| "grad_norm": 1.875, |
| "learning_rate": 1.798362954907439e-05, |
| "loss": 1.0418660163879394, |
| "step": 1415, |
| "token_acc": 0.6780114226375908 |
| }, |
| { |
| "epoch": 0.24639944473364567, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.796631689445174e-05, |
| "loss": 1.0439669609069824, |
| "step": 1420, |
| "token_acc": 0.6750978011601241 |
| }, |
| { |
| "epoch": 0.24726704841228528, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.7948938642304915e-05, |
| "loss": 1.0315986633300782, |
| "step": 1425, |
| "token_acc": 0.6803868088271758 |
| }, |
| { |
| "epoch": 0.24813465209092486, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.793149493573271e-05, |
| "loss": 1.0325140953063965, |
| "step": 1430, |
| "token_acc": 0.6792667142140159 |
| }, |
| { |
| "epoch": 0.24900225576956447, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.791398591837289e-05, |
| "loss": 1.0254653930664062, |
| "step": 1435, |
| "token_acc": 0.6815645499333134 |
| }, |
| { |
| "epoch": 0.24986985944820406, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.7896411734401008e-05, |
| "loss": 1.042679786682129, |
| "step": 1440, |
| "token_acc": 0.6756525459991441 |
| }, |
| { |
| "epoch": 0.25073746312684364, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.7878772528529232e-05, |
| "loss": 1.0409419059753418, |
| "step": 1445, |
| "token_acc": 0.6742666575920506 |
| }, |
| { |
| "epoch": 0.2516050668054833, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.7861068446005144e-05, |
| "loss": 1.0194078445434571, |
| "step": 1450, |
| "token_acc": 0.68190224912376 |
| }, |
| { |
| "epoch": 0.25247267048412286, |
| "grad_norm": 2.0, |
| "learning_rate": 1.7843299632610537e-05, |
| "loss": 1.031000518798828, |
| "step": 1455, |
| "token_acc": 0.6809780158582832 |
| }, |
| { |
| "epoch": 0.25334027416276245, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.782546623466022e-05, |
| "loss": 1.0219725608825683, |
| "step": 1460, |
| "token_acc": 0.6826487625065825 |
| }, |
| { |
| "epoch": 0.25420787784140203, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.7807568399000824e-05, |
| "loss": 1.0241089820861817, |
| "step": 1465, |
| "token_acc": 0.6821418475993054 |
| }, |
| { |
| "epoch": 0.25507548152004167, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.7789606273009574e-05, |
| "loss": 1.010830020904541, |
| "step": 1470, |
| "token_acc": 0.6835254004334725 |
| }, |
| { |
| "epoch": 0.25594308519868125, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.7771580004593093e-05, |
| "loss": 1.045233917236328, |
| "step": 1475, |
| "token_acc": 0.6747018970189702 |
| }, |
| { |
| "epoch": 0.25681068887732084, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.7753489742186164e-05, |
| "loss": 1.011804962158203, |
| "step": 1480, |
| "token_acc": 0.6846772177711121 |
| }, |
| { |
| "epoch": 0.2576782925559604, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.773533563475053e-05, |
| "loss": 1.0484785079956054, |
| "step": 1485, |
| "token_acc": 0.6750959795243682 |
| }, |
| { |
| "epoch": 0.25854589623460006, |
| "grad_norm": 2.0, |
| "learning_rate": 1.771711783177366e-05, |
| "loss": 1.0313974380493165, |
| "step": 1490, |
| "token_acc": 0.6784674492495447 |
| }, |
| { |
| "epoch": 0.25941349991323964, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.76988364832675e-05, |
| "loss": 1.0448792457580567, |
| "step": 1495, |
| "token_acc": 0.6759750041845674 |
| }, |
| { |
| "epoch": 0.2602811035918792, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.768049173976727e-05, |
| "loss": 1.030049991607666, |
| "step": 1500, |
| "token_acc": 0.6813090211643735 |
| }, |
| { |
| "epoch": 0.2611487072705188, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.7662083752330193e-05, |
| "loss": 1.0194572448730468, |
| "step": 1505, |
| "token_acc": 0.6832258674993579 |
| }, |
| { |
| "epoch": 0.26201631094915845, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.7643612672534275e-05, |
| "loss": 1.0071066856384276, |
| "step": 1510, |
| "token_acc": 0.684999272515641 |
| }, |
| { |
| "epoch": 0.26288391462779803, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.7625078652477036e-05, |
| "loss": 1.0143555641174316, |
| "step": 1515, |
| "token_acc": 0.683634143031619 |
| }, |
| { |
| "epoch": 0.2637515183064376, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.760648184477429e-05, |
| "loss": 1.0410999298095702, |
| "step": 1520, |
| "token_acc": 0.6760032102728732 |
| }, |
| { |
| "epoch": 0.2646191219850772, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.7587822402558837e-05, |
| "loss": 1.0309484481811524, |
| "step": 1525, |
| "token_acc": 0.6798862358621602 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.756910047947926e-05, |
| "loss": 1.045750045776367, |
| "step": 1530, |
| "token_acc": 0.6779168647335341 |
| }, |
| { |
| "epoch": 0.2663543293423564, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.755031622969862e-05, |
| "loss": 1.0056123733520508, |
| "step": 1535, |
| "token_acc": 0.6844262847741953 |
| }, |
| { |
| "epoch": 0.267221933020996, |
| "grad_norm": 1.875, |
| "learning_rate": 1.7531469807893196e-05, |
| "loss": 1.0222766876220704, |
| "step": 1540, |
| "token_acc": 0.6819466963244851 |
| }, |
| { |
| "epoch": 0.2680895366996356, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.751256136925122e-05, |
| "loss": 1.0223438262939453, |
| "step": 1545, |
| "token_acc": 0.6796833846239153 |
| }, |
| { |
| "epoch": 0.26895714037827523, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.749359106947158e-05, |
| "loss": 1.0395459175109862, |
| "step": 1550, |
| "token_acc": 0.6780158536915294 |
| }, |
| { |
| "epoch": 0.2698247440569148, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.7474559064762575e-05, |
| "loss": 1.0296743392944336, |
| "step": 1555, |
| "token_acc": 0.6756519151698767 |
| }, |
| { |
| "epoch": 0.2706923477355544, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.745546551184058e-05, |
| "loss": 1.016903781890869, |
| "step": 1560, |
| "token_acc": 0.6835704451583295 |
| }, |
| { |
| "epoch": 0.271559951414194, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.74363105679288e-05, |
| "loss": 1.020066261291504, |
| "step": 1565, |
| "token_acc": 0.679187746898607 |
| }, |
| { |
| "epoch": 0.2724275550928336, |
| "grad_norm": 2.0, |
| "learning_rate": 1.7417094390755936e-05, |
| "loss": 1.0340109825134278, |
| "step": 1570, |
| "token_acc": 0.677778992239589 |
| }, |
| { |
| "epoch": 0.2732951587714732, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.739781713855492e-05, |
| "loss": 1.0160035133361816, |
| "step": 1575, |
| "token_acc": 0.681804898783274 |
| }, |
| { |
| "epoch": 0.2741627624501128, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.7378478970061596e-05, |
| "loss": 1.024774169921875, |
| "step": 1580, |
| "token_acc": 0.680820860552937 |
| }, |
| { |
| "epoch": 0.27503036612875237, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.735908004451341e-05, |
| "loss": 1.0384547233581543, |
| "step": 1585, |
| "token_acc": 0.6769406692778844 |
| }, |
| { |
| "epoch": 0.275897969807392, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.7339620521648107e-05, |
| "loss": 1.027394962310791, |
| "step": 1590, |
| "token_acc": 0.6820617131309908 |
| }, |
| { |
| "epoch": 0.2767655734860316, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.7320100561702408e-05, |
| "loss": 1.0266061782836915, |
| "step": 1595, |
| "token_acc": 0.6778099499868386 |
| }, |
| { |
| "epoch": 0.2776331771646712, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.73005203254107e-05, |
| "loss": 1.0057987213134765, |
| "step": 1600, |
| "token_acc": 0.6856035977459904 |
| }, |
| { |
| "epoch": 0.27850078084331076, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.728087997400371e-05, |
| "loss": 1.0396166801452638, |
| "step": 1605, |
| "token_acc": 0.6768935264496704 |
| }, |
| { |
| "epoch": 0.2793683845219504, |
| "grad_norm": 1.765625, |
| "learning_rate": 1.726117966920716e-05, |
| "loss": 1.0311265945434571, |
| "step": 1610, |
| "token_acc": 0.6808383077444412 |
| }, |
| { |
| "epoch": 0.28023598820059, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.7241419573240463e-05, |
| "loss": 1.0097067832946778, |
| "step": 1615, |
| "token_acc": 0.683870040253019 |
| }, |
| { |
| "epoch": 0.28110359187922956, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.7221599848815374e-05, |
| "loss": 1.0008836746215821, |
| "step": 1620, |
| "token_acc": 0.6870527000650618 |
| }, |
| { |
| "epoch": 0.28197119555786915, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.7201720659134642e-05, |
| "loss": 1.0405941009521484, |
| "step": 1625, |
| "token_acc": 0.6768849218838519 |
| }, |
| { |
| "epoch": 0.2828387992365088, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.7181782167890678e-05, |
| "loss": 1.0066216468811036, |
| "step": 1630, |
| "token_acc": 0.6848891318550914 |
| }, |
| { |
| "epoch": 0.28370640291514837, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.716178453926421e-05, |
| "loss": 1.046470832824707, |
| "step": 1635, |
| "token_acc": 0.6714027873902482 |
| }, |
| { |
| "epoch": 0.28457400659378795, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.7141727937922912e-05, |
| "loss": 1.0199688911437987, |
| "step": 1640, |
| "token_acc": 0.6823405115629932 |
| }, |
| { |
| "epoch": 0.28544161027242754, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.712161252902007e-05, |
| "loss": 1.044092559814453, |
| "step": 1645, |
| "token_acc": 0.6758528428093645 |
| }, |
| { |
| "epoch": 0.2863092139510672, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.7101438478193212e-05, |
| "loss": 1.0233346939086914, |
| "step": 1650, |
| "token_acc": 0.6805489760838082 |
| }, |
| { |
| "epoch": 0.28717681762970676, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.708120595156274e-05, |
| "loss": 1.0456744194030763, |
| "step": 1655, |
| "token_acc": 0.6750241212956581 |
| }, |
| { |
| "epoch": 0.28804442130834634, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.706091511573057e-05, |
| "loss": 1.0319430351257324, |
| "step": 1660, |
| "token_acc": 0.6777862117640792 |
| }, |
| { |
| "epoch": 0.2889120249869859, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.704056613777876e-05, |
| "loss": 1.0204211235046388, |
| "step": 1665, |
| "token_acc": 0.6796524738028916 |
| }, |
| { |
| "epoch": 0.28977962866562557, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.7020159185268123e-05, |
| "loss": 1.0458597183227538, |
| "step": 1670, |
| "token_acc": 0.6737207077953132 |
| }, |
| { |
| "epoch": 0.29064723234426515, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.6999694426236862e-05, |
| "loss": 1.0375800132751465, |
| "step": 1675, |
| "token_acc": 0.6758920495200551 |
| }, |
| { |
| "epoch": 0.29151483602290473, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.697917202919918e-05, |
| "loss": 1.0144439697265626, |
| "step": 1680, |
| "token_acc": 0.679975894834207 |
| }, |
| { |
| "epoch": 0.2923824397015443, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.6958592163143884e-05, |
| "loss": 1.0309619903564453, |
| "step": 1685, |
| "token_acc": 0.678642271573428 |
| }, |
| { |
| "epoch": 0.29325004338018396, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.6937954997533016e-05, |
| "loss": 1.0367056846618652, |
| "step": 1690, |
| "token_acc": 0.6776543556428868 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.691726070230043e-05, |
| "loss": 1.0386839866638184, |
| "step": 1695, |
| "token_acc": 0.6778316736701301 |
| }, |
| { |
| "epoch": 0.2949852507374631, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.689650944785041e-05, |
| "loss": 1.0176087379455567, |
| "step": 1700, |
| "token_acc": 0.6798760737924237 |
| }, |
| { |
| "epoch": 0.2958528544161027, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.6875701405056262e-05, |
| "loss": 1.006351852416992, |
| "step": 1705, |
| "token_acc": 0.6863717464315701 |
| }, |
| { |
| "epoch": 0.29672045809474235, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.685483674525891e-05, |
| "loss": 1.0238887786865234, |
| "step": 1710, |
| "token_acc": 0.6787703215736074 |
| }, |
| { |
| "epoch": 0.29758806177338193, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.6833915640265485e-05, |
| "loss": 1.0253664016723634, |
| "step": 1715, |
| "token_acc": 0.6786322245940176 |
| }, |
| { |
| "epoch": 0.2984556654520215, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.6812938262347907e-05, |
| "loss": 1.0375401496887207, |
| "step": 1720, |
| "token_acc": 0.677038246903498 |
| }, |
| { |
| "epoch": 0.2993232691306611, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.6791904784241458e-05, |
| "loss": 1.0252004623413087, |
| "step": 1725, |
| "token_acc": 0.6804137056166104 |
| }, |
| { |
| "epoch": 0.30019087280930074, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.6770815379143385e-05, |
| "loss": 1.010302734375, |
| "step": 1730, |
| "token_acc": 0.6837099330986861 |
| }, |
| { |
| "epoch": 0.3010584764879403, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.674967022071144e-05, |
| "loss": 1.0301790237426758, |
| "step": 1735, |
| "token_acc": 0.6772626037659445 |
| }, |
| { |
| "epoch": 0.3019260801665799, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.6728469483062486e-05, |
| "loss": 0.9938658714294434, |
| "step": 1740, |
| "token_acc": 0.6898220909033759 |
| }, |
| { |
| "epoch": 0.3027936838452195, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.6707213340771028e-05, |
| "loss": 1.0314199447631835, |
| "step": 1745, |
| "token_acc": 0.6770383134840673 |
| }, |
| { |
| "epoch": 0.3036612875238591, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.6685901968867813e-05, |
| "loss": 1.0129788398742676, |
| "step": 1750, |
| "token_acc": 0.6820457843611499 |
| }, |
| { |
| "epoch": 0.3045288912024987, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.6664535542838352e-05, |
| "loss": 1.002908420562744, |
| "step": 1755, |
| "token_acc": 0.6864051119594943 |
| }, |
| { |
| "epoch": 0.3053964948811383, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.6643114238621495e-05, |
| "loss": 1.034525489807129, |
| "step": 1760, |
| "token_acc": 0.6801218196814923 |
| }, |
| { |
| "epoch": 0.3062640985597779, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.6621638232607984e-05, |
| "loss": 1.025135612487793, |
| "step": 1765, |
| "token_acc": 0.6795281498360474 |
| }, |
| { |
| "epoch": 0.3071317022384175, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.6600107701638993e-05, |
| "loss": 1.035383129119873, |
| "step": 1770, |
| "token_acc": 0.6749825634422447 |
| }, |
| { |
| "epoch": 0.3079993059170571, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.6578522823004666e-05, |
| "loss": 0.9947221755981446, |
| "step": 1775, |
| "token_acc": 0.6872965042273526 |
| }, |
| { |
| "epoch": 0.3088669095956967, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.6556883774442675e-05, |
| "loss": 1.0022805213928223, |
| "step": 1780, |
| "token_acc": 0.6862549392253107 |
| }, |
| { |
| "epoch": 0.30973451327433627, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.653519073413675e-05, |
| "loss": 1.0279296875, |
| "step": 1785, |
| "token_acc": 0.6769972826086956 |
| }, |
| { |
| "epoch": 0.3106021169529759, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.65134438807152e-05, |
| "loss": 1.0212496757507323, |
| "step": 1790, |
| "token_acc": 0.679449427274692 |
| }, |
| { |
| "epoch": 0.3114697206316155, |
| "grad_norm": 1.7265625, |
| "learning_rate": 1.649164339324945e-05, |
| "loss": 1.006572437286377, |
| "step": 1795, |
| "token_acc": 0.6861015265579256 |
| }, |
| { |
| "epoch": 0.31233732431025507, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.646978945125257e-05, |
| "loss": 1.0250924110412598, |
| "step": 1800, |
| "token_acc": 0.6786020029623188 |
| }, |
| { |
| "epoch": 0.31320492798889465, |
| "grad_norm": 2.0, |
| "learning_rate": 1.6447882234677796e-05, |
| "loss": 1.0435279846191405, |
| "step": 1805, |
| "token_acc": 0.6758889509765172 |
| }, |
| { |
| "epoch": 0.3140725316675343, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.6425921923917042e-05, |
| "loss": 1.0279791831970215, |
| "step": 1810, |
| "token_acc": 0.6805610242902337 |
| }, |
| { |
| "epoch": 0.3149401353461739, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.6403908699799423e-05, |
| "loss": 1.02548828125, |
| "step": 1815, |
| "token_acc": 0.6779067427037907 |
| }, |
| { |
| "epoch": 0.31580773902481346, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.6381842743589765e-05, |
| "loss": 1.0200424194335938, |
| "step": 1820, |
| "token_acc": 0.6822118412765064 |
| }, |
| { |
| "epoch": 0.31667534270345304, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.635972423698709e-05, |
| "loss": 1.0166802406311035, |
| "step": 1825, |
| "token_acc": 0.6827727138286145 |
| }, |
| { |
| "epoch": 0.3175429463820927, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.6337553362123165e-05, |
| "loss": 1.0155767440795898, |
| "step": 1830, |
| "token_acc": 0.6837214270455031 |
| }, |
| { |
| "epoch": 0.31841055006073227, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.6315330301560956e-05, |
| "loss": 1.0089836120605469, |
| "step": 1835, |
| "token_acc": 0.6831295389068122 |
| }, |
| { |
| "epoch": 0.31927815373937185, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.6293055238293155e-05, |
| "loss": 1.0108304977416993, |
| "step": 1840, |
| "token_acc": 0.6825737553161517 |
| }, |
| { |
| "epoch": 0.32014575741801143, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.6270728355740658e-05, |
| "loss": 1.0052438735961915, |
| "step": 1845, |
| "token_acc": 0.6869656992084433 |
| }, |
| { |
| "epoch": 0.3210133610966511, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.6248349837751064e-05, |
| "loss": 1.0119807243347168, |
| "step": 1850, |
| "token_acc": 0.6814690154990364 |
| }, |
| { |
| "epoch": 0.32188096477529066, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.6225919868597154e-05, |
| "loss": 1.0213706970214844, |
| "step": 1855, |
| "token_acc": 0.6794819414937081 |
| }, |
| { |
| "epoch": 0.32274856845393024, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.620343863297538e-05, |
| "loss": 0.9990407943725585, |
| "step": 1860, |
| "token_acc": 0.6861129568106312 |
| }, |
| { |
| "epoch": 0.3236161721325698, |
| "grad_norm": 2.0, |
| "learning_rate": 1.6180906316004336e-05, |
| "loss": 1.0262950897216796, |
| "step": 1865, |
| "token_acc": 0.6775860676697801 |
| }, |
| { |
| "epoch": 0.32448377581120946, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.615832310322324e-05, |
| "loss": 1.036133098602295, |
| "step": 1870, |
| "token_acc": 0.6766465480728505 |
| }, |
| { |
| "epoch": 0.32535137948984905, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.6135689180590404e-05, |
| "loss": 1.020677089691162, |
| "step": 1875, |
| "token_acc": 0.6793250062916407 |
| }, |
| { |
| "epoch": 0.32621898316848863, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.6113004734481704e-05, |
| "loss": 1.0076414108276368, |
| "step": 1880, |
| "token_acc": 0.6839123609309945 |
| }, |
| { |
| "epoch": 0.3270865868471282, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.609026995168904e-05, |
| "loss": 1.0311081886291504, |
| "step": 1885, |
| "token_acc": 0.6782416456600568 |
| }, |
| { |
| "epoch": 0.32795419052576785, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.6067485019418814e-05, |
| "loss": 1.0099788665771485, |
| "step": 1890, |
| "token_acc": 0.6829244908301488 |
| }, |
| { |
| "epoch": 0.32882179420440744, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.6044650125290365e-05, |
| "loss": 1.0263484001159668, |
| "step": 1895, |
| "token_acc": 0.6801680694975478 |
| }, |
| { |
| "epoch": 0.329689397883047, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.6021765457334444e-05, |
| "loss": 1.0163857460021972, |
| "step": 1900, |
| "token_acc": 0.6806223824561879 |
| }, |
| { |
| "epoch": 0.3305570015616866, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.5998831203991648e-05, |
| "loss": 1.0088854789733888, |
| "step": 1905, |
| "token_acc": 0.6855646039732352 |
| }, |
| { |
| "epoch": 0.33142460524032624, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.5975847554110888e-05, |
| "loss": 0.9952527999877929, |
| "step": 1910, |
| "token_acc": 0.6883125788578638 |
| }, |
| { |
| "epoch": 0.3322922089189658, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.595281469694782e-05, |
| "loss": 1.0266911506652832, |
| "step": 1915, |
| "token_acc": 0.6797566371681416 |
| }, |
| { |
| "epoch": 0.3331598125976054, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.592973282216329e-05, |
| "loss": 1.0018574714660644, |
| "step": 1920, |
| "token_acc": 0.6860606854970837 |
| }, |
| { |
| "epoch": 0.334027416276245, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.590660211982177e-05, |
| "loss": 1.0108092308044434, |
| "step": 1925, |
| "token_acc": 0.6822820656674948 |
| }, |
| { |
| "epoch": 0.33489501995488463, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.5883422780389806e-05, |
| "loss": 1.0258635520935058, |
| "step": 1930, |
| "token_acc": 0.6778978538515823 |
| }, |
| { |
| "epoch": 0.3357626236335242, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.5860194994734427e-05, |
| "loss": 1.021854782104492, |
| "step": 1935, |
| "token_acc": 0.6808329178366179 |
| }, |
| { |
| "epoch": 0.3366302273121638, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.5836918954121588e-05, |
| "loss": 1.0331063270568848, |
| "step": 1940, |
| "token_acc": 0.6784242872199181 |
| }, |
| { |
| "epoch": 0.3374978309908034, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.58135948502146e-05, |
| "loss": 1.0210276603698731, |
| "step": 1945, |
| "token_acc": 0.6840329583182118 |
| }, |
| { |
| "epoch": 0.338365434669443, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.579022287507254e-05, |
| "loss": 1.0260606765747071, |
| "step": 1950, |
| "token_acc": 0.6783528979227396 |
| }, |
| { |
| "epoch": 0.3392330383480826, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.5766803221148676e-05, |
| "loss": 0.9952493667602539, |
| "step": 1955, |
| "token_acc": 0.6859501834760369 |
| }, |
| { |
| "epoch": 0.3401006420267222, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.574333608128887e-05, |
| "loss": 1.0229947090148925, |
| "step": 1960, |
| "token_acc": 0.6816778645360451 |
| }, |
| { |
| "epoch": 0.34096824570536177, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.5719821648730014e-05, |
| "loss": 1.0026690483093261, |
| "step": 1965, |
| "token_acc": 0.6833949856144678 |
| }, |
| { |
| "epoch": 0.3418358493840014, |
| "grad_norm": 1.7578125, |
| "learning_rate": 1.5696260117098424e-05, |
| "loss": 0.9994998931884765, |
| "step": 1970, |
| "token_acc": 0.6882951486903434 |
| }, |
| { |
| "epoch": 0.342703453062641, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.5672651680408237e-05, |
| "loss": 1.0034085273742677, |
| "step": 1975, |
| "token_acc": 0.6842928918540483 |
| }, |
| { |
| "epoch": 0.3435710567412806, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.5648996533059824e-05, |
| "loss": 1.0039892196655273, |
| "step": 1980, |
| "token_acc": 0.6863158175442339 |
| }, |
| { |
| "epoch": 0.34443866041992016, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.5625294869838203e-05, |
| "loss": 1.0203709602355957, |
| "step": 1985, |
| "token_acc": 0.6797555567287894 |
| }, |
| { |
| "epoch": 0.3453062640985598, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.5601546885911406e-05, |
| "loss": 1.021955966949463, |
| "step": 1990, |
| "token_acc": 0.6785332666062516 |
| }, |
| { |
| "epoch": 0.3461738677771994, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.5577752776828892e-05, |
| "loss": 1.0178564071655274, |
| "step": 1995, |
| "token_acc": 0.6787890301656874 |
| }, |
| { |
| "epoch": 0.34704147145583897, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.555391273851993e-05, |
| "loss": 0.9952051162719726, |
| "step": 2000, |
| "token_acc": 0.6859772527441359 |
| }, |
| { |
| "epoch": 0.34790907513447855, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.553002696729198e-05, |
| "loss": 1.0093853950500489, |
| "step": 2005, |
| "token_acc": 0.6833726738760498 |
| }, |
| { |
| "epoch": 0.3487766788131182, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.55060956598291e-05, |
| "loss": 1.0151101112365724, |
| "step": 2010, |
| "token_acc": 0.6831491047292776 |
| }, |
| { |
| "epoch": 0.3496442824917578, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.5482119013190296e-05, |
| "loss": 1.0173629760742187, |
| "step": 2015, |
| "token_acc": 0.6829174613265523 |
| }, |
| { |
| "epoch": 0.35051188617039736, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.5458097224807916e-05, |
| "loss": 1.019275188446045, |
| "step": 2020, |
| "token_acc": 0.6805337208534249 |
| }, |
| { |
| "epoch": 0.35137948984903694, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.5434030492486023e-05, |
| "loss": 1.0199106216430665, |
| "step": 2025, |
| "token_acc": 0.6799649276633055 |
| }, |
| { |
| "epoch": 0.3522470935276766, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.5409919014398762e-05, |
| "loss": 1.0161332130432128, |
| "step": 2030, |
| "token_acc": 0.682195193046612 |
| }, |
| { |
| "epoch": 0.35311469720631616, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.5385762989088738e-05, |
| "loss": 1.027943992614746, |
| "step": 2035, |
| "token_acc": 0.676602066311027 |
| }, |
| { |
| "epoch": 0.35398230088495575, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.5361562615465366e-05, |
| "loss": 1.0008016586303712, |
| "step": 2040, |
| "token_acc": 0.6849210596735349 |
| }, |
| { |
| "epoch": 0.35484990456359533, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.5337318092803243e-05, |
| "loss": 1.0304694175720215, |
| "step": 2045, |
| "token_acc": 0.6774736297159127 |
| }, |
| { |
| "epoch": 0.35571750824223497, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.5313029620740506e-05, |
| "loss": 1.0220866203308105, |
| "step": 2050, |
| "token_acc": 0.6807486487213273 |
| }, |
| { |
| "epoch": 0.35658511192087455, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.5288697399277182e-05, |
| "loss": 1.019200611114502, |
| "step": 2055, |
| "token_acc": 0.6806197591915717 |
| }, |
| { |
| "epoch": 0.35745271559951414, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.526432162877356e-05, |
| "loss": 1.013671875, |
| "step": 2060, |
| "token_acc": 0.6828907213817285 |
| }, |
| { |
| "epoch": 0.3583203192781537, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.5239902509948514e-05, |
| "loss": 1.0091094017028808, |
| "step": 2065, |
| "token_acc": 0.6834054718392647 |
| }, |
| { |
| "epoch": 0.35918792295679336, |
| "grad_norm": 1.7578125, |
| "learning_rate": 1.521544024387787e-05, |
| "loss": 1.0055926322937012, |
| "step": 2070, |
| "token_acc": 0.6828146538012936 |
| }, |
| { |
| "epoch": 0.36005552663543294, |
| "grad_norm": 1.7578125, |
| "learning_rate": 1.5190935031992742e-05, |
| "loss": 1.0013408660888672, |
| "step": 2075, |
| "token_acc": 0.6865663839408236 |
| }, |
| { |
| "epoch": 0.3609231303140725, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.5166387076077876e-05, |
| "loss": 1.014689826965332, |
| "step": 2080, |
| "token_acc": 0.6808145941313308 |
| }, |
| { |
| "epoch": 0.3617907339927121, |
| "grad_norm": 1.875, |
| "learning_rate": 1.5141796578269986e-05, |
| "loss": 1.0103944778442382, |
| "step": 2085, |
| "token_acc": 0.6806936577861687 |
| }, |
| { |
| "epoch": 0.36265833767135175, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.5117163741056092e-05, |
| "loss": 1.0004392623901368, |
| "step": 2090, |
| "token_acc": 0.6851040904004753 |
| }, |
| { |
| "epoch": 0.36352594134999133, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.5092488767271858e-05, |
| "loss": 1.004606342315674, |
| "step": 2095, |
| "token_acc": 0.682853725269135 |
| }, |
| { |
| "epoch": 0.3643935450286309, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.5067771860099905e-05, |
| "loss": 0.9848871231079102, |
| "step": 2100, |
| "token_acc": 0.6914043831501331 |
| }, |
| { |
| "epoch": 0.3652611487072705, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.5043013223068155e-05, |
| "loss": 1.0125656127929688, |
| "step": 2105, |
| "token_acc": 0.6832955602426212 |
| }, |
| { |
| "epoch": 0.36612875238591014, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.501821306004815e-05, |
| "loss": 1.0106427192687988, |
| "step": 2110, |
| "token_acc": 0.6838881419006099 |
| }, |
| { |
| "epoch": 0.3669963560645497, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.4993371575253368e-05, |
| "loss": 1.0103277206420898, |
| "step": 2115, |
| "token_acc": 0.6830820506764292 |
| }, |
| { |
| "epoch": 0.3678639597431893, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.496848897323755e-05, |
| "loss": 0.9989145278930665, |
| "step": 2120, |
| "token_acc": 0.6848548395882129 |
| }, |
| { |
| "epoch": 0.3687315634218289, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.4943565458892999e-05, |
| "loss": 1.0156753540039063, |
| "step": 2125, |
| "token_acc": 0.683586704457614 |
| }, |
| { |
| "epoch": 0.36959916710046853, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.4918601237448925e-05, |
| "loss": 1.0110110282897948, |
| "step": 2130, |
| "token_acc": 0.6824813659671195 |
| }, |
| { |
| "epoch": 0.3704667707791081, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.4893596514469718e-05, |
| "loss": 1.0106982231140136, |
| "step": 2135, |
| "token_acc": 0.6820436574981416 |
| }, |
| { |
| "epoch": 0.3713343744577477, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.4868551495853278e-05, |
| "loss": 1.0084819793701172, |
| "step": 2140, |
| "token_acc": 0.6837009642055211 |
| }, |
| { |
| "epoch": 0.3722019781363873, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.4843466387829317e-05, |
| "loss": 1.0337956428527832, |
| "step": 2145, |
| "token_acc": 0.6756529177470663 |
| }, |
| { |
| "epoch": 0.3730695818150269, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.4818341396957651e-05, |
| "loss": 1.010234260559082, |
| "step": 2150, |
| "token_acc": 0.6839618937946557 |
| }, |
| { |
| "epoch": 0.3739371854936665, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.4793176730126512e-05, |
| "loss": 0.9982177734375, |
| "step": 2155, |
| "token_acc": 0.6882951820647545 |
| }, |
| { |
| "epoch": 0.3748047891723061, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.4767972594550832e-05, |
| "loss": 1.0000919342041015, |
| "step": 2160, |
| "token_acc": 0.685829937736179 |
| }, |
| { |
| "epoch": 0.37567239285094567, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.4742729197770551e-05, |
| "loss": 1.0299704551696778, |
| "step": 2165, |
| "token_acc": 0.6772884904796179 |
| }, |
| { |
| "epoch": 0.3765399965295853, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.4717446747648894e-05, |
| "loss": 1.016530704498291, |
| "step": 2170, |
| "token_acc": 0.6815982696795492 |
| }, |
| { |
| "epoch": 0.3774076002082249, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.4692125452370664e-05, |
| "loss": 1.0197928428649903, |
| "step": 2175, |
| "token_acc": 0.6793494519840083 |
| }, |
| { |
| "epoch": 0.3782752038868645, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.4666765520440534e-05, |
| "loss": 1.0177095413208008, |
| "step": 2180, |
| "token_acc": 0.6810032017075773 |
| }, |
| { |
| "epoch": 0.37914280756550406, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.464136716068132e-05, |
| "loss": 1.0126147270202637, |
| "step": 2185, |
| "token_acc": 0.683709293410274 |
| }, |
| { |
| "epoch": 0.3800104112441437, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.461593058223227e-05, |
| "loss": 1.021070957183838, |
| "step": 2190, |
| "token_acc": 0.679652122955623 |
| }, |
| { |
| "epoch": 0.3808780149227833, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.4590455994547337e-05, |
| "loss": 1.001976203918457, |
| "step": 2195, |
| "token_acc": 0.6833014477415503 |
| }, |
| { |
| "epoch": 0.38174561860142286, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.456494360739346e-05, |
| "loss": 0.9893196105957032, |
| "step": 2200, |
| "token_acc": 0.6892311085988446 |
| }, |
| { |
| "epoch": 0.38261322228006245, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.4539393630848829e-05, |
| "loss": 0.9814781188964844, |
| "step": 2205, |
| "token_acc": 0.6899440949405221 |
| }, |
| { |
| "epoch": 0.3834808259587021, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.451380627530115e-05, |
| "loss": 1.011701488494873, |
| "step": 2210, |
| "token_acc": 0.6809758515295867 |
| }, |
| { |
| "epoch": 0.38434842963734167, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.4488181751445939e-05, |
| "loss": 1.0211992263793945, |
| "step": 2215, |
| "token_acc": 0.6797978865156532 |
| }, |
| { |
| "epoch": 0.38521603331598125, |
| "grad_norm": 2.0, |
| "learning_rate": 1.4462520270284756e-05, |
| "loss": 0.9845295906066894, |
| "step": 2220, |
| "token_acc": 0.6868465406909026 |
| }, |
| { |
| "epoch": 0.38608363699462084, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.4436822043123485e-05, |
| "loss": 1.0249157905578614, |
| "step": 2225, |
| "token_acc": 0.6786562283760498 |
| }, |
| { |
| "epoch": 0.3869512406732605, |
| "grad_norm": 1.765625, |
| "learning_rate": 1.441108728157059e-05, |
| "loss": 1.0030797004699707, |
| "step": 2230, |
| "token_acc": 0.684765917234319 |
| }, |
| { |
| "epoch": 0.38781884435190006, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.4385316197535373e-05, |
| "loss": 1.0158026695251465, |
| "step": 2235, |
| "token_acc": 0.6832608666746447 |
| }, |
| { |
| "epoch": 0.38868644803053964, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.4359509003226221e-05, |
| "loss": 1.0172318458557128, |
| "step": 2240, |
| "token_acc": 0.6808322441812877 |
| }, |
| { |
| "epoch": 0.3895540517091792, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.4333665911148881e-05, |
| "loss": 0.9851541519165039, |
| "step": 2245, |
| "token_acc": 0.6889603544215962 |
| }, |
| { |
| "epoch": 0.39042165538781887, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.4307787134104682e-05, |
| "loss": 1.014187717437744, |
| "step": 2250, |
| "token_acc": 0.683114625160409 |
| }, |
| { |
| "epoch": 0.39128925906645845, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.42818728851888e-05, |
| "loss": 1.0081872940063477, |
| "step": 2255, |
| "token_acc": 0.6823870250820193 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.4255923377788497e-05, |
| "loss": 1.0085988998413087, |
| "step": 2260, |
| "token_acc": 0.6840598070654684 |
| }, |
| { |
| "epoch": 0.3930244664237376, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.4229938825581373e-05, |
| "loss": 1.0013799667358398, |
| "step": 2265, |
| "token_acc": 0.6847899527045825 |
| }, |
| { |
| "epoch": 0.39389207010237726, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.4203919442533597e-05, |
| "loss": 1.018793773651123, |
| "step": 2270, |
| "token_acc": 0.681686886192952 |
| }, |
| { |
| "epoch": 0.39475967378101684, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.4177865442898137e-05, |
| "loss": 1.0064517974853515, |
| "step": 2275, |
| "token_acc": 0.6819670370966876 |
| }, |
| { |
| "epoch": 0.3956272774596564, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.4151777041213021e-05, |
| "loss": 0.9828666687011719, |
| "step": 2280, |
| "token_acc": 0.6887780548628429 |
| }, |
| { |
| "epoch": 0.396494881138296, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.4125654452299553e-05, |
| "loss": 1.0092188835144043, |
| "step": 2285, |
| "token_acc": 0.6844631486295059 |
| }, |
| { |
| "epoch": 0.39736248481693565, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.4099497891260538e-05, |
| "loss": 0.9924700736999512, |
| "step": 2290, |
| "token_acc": 0.6873599312908464 |
| }, |
| { |
| "epoch": 0.39823008849557523, |
| "grad_norm": 2.0, |
| "learning_rate": 1.4073307573478528e-05, |
| "loss": 1.0148592948913575, |
| "step": 2295, |
| "token_acc": 0.6811773236297232 |
| }, |
| { |
| "epoch": 0.3990976921742148, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.4047083714614038e-05, |
| "loss": 1.0003128051757812, |
| "step": 2300, |
| "token_acc": 0.6852241329539362 |
| }, |
| { |
| "epoch": 0.3999652958528544, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.4020826530603775e-05, |
| "loss": 0.9960025787353516, |
| "step": 2305, |
| "token_acc": 0.6852598031645303 |
| }, |
| { |
| "epoch": 0.40083289953149404, |
| "grad_norm": 2.0, |
| "learning_rate": 1.399453623765885e-05, |
| "loss": 1.0148781776428222, |
| "step": 2310, |
| "token_acc": 0.684109947643979 |
| }, |
| { |
| "epoch": 0.4017005032101336, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.3968213052263014e-05, |
| "loss": 1.012251091003418, |
| "step": 2315, |
| "token_acc": 0.6833315462148831 |
| }, |
| { |
| "epoch": 0.4025681068887732, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.3941857191170857e-05, |
| "loss": 0.9941699028015136, |
| "step": 2320, |
| "token_acc": 0.6864559695983815 |
| }, |
| { |
| "epoch": 0.4034357105674128, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.3915468871406044e-05, |
| "loss": 1.0085437774658204, |
| "step": 2325, |
| "token_acc": 0.6833488248572567 |
| }, |
| { |
| "epoch": 0.4043033142460524, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.38890483102595e-05, |
| "loss": 1.0144371032714843, |
| "step": 2330, |
| "token_acc": 0.68039780521262 |
| }, |
| { |
| "epoch": 0.405170917924692, |
| "grad_norm": 1.75, |
| "learning_rate": 1.3862595725287653e-05, |
| "loss": 0.9994147300720215, |
| "step": 2335, |
| "token_acc": 0.687611521794545 |
| }, |
| { |
| "epoch": 0.4060385216033316, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.3836111334310622e-05, |
| "loss": 0.9963122367858886, |
| "step": 2340, |
| "token_acc": 0.685745011351416 |
| }, |
| { |
| "epoch": 0.4069061252819712, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.3809595355410424e-05, |
| "loss": 1.0122366905212403, |
| "step": 2345, |
| "token_acc": 0.683117204922772 |
| }, |
| { |
| "epoch": 0.4077737289606108, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.3783048006929185e-05, |
| "loss": 1.0144343376159668, |
| "step": 2350, |
| "token_acc": 0.6814033279539999 |
| }, |
| { |
| "epoch": 0.4086413326392504, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.375646950746734e-05, |
| "loss": 1.0156232833862304, |
| "step": 2355, |
| "token_acc": 0.6830444078275435 |
| }, |
| { |
| "epoch": 0.40950893631789, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.3729860075881827e-05, |
| "loss": 1.034743595123291, |
| "step": 2360, |
| "token_acc": 0.6783182628209359 |
| }, |
| { |
| "epoch": 0.41037653999652957, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.3703219931284304e-05, |
| "loss": 0.9984539031982422, |
| "step": 2365, |
| "token_acc": 0.6839781943890441 |
| }, |
| { |
| "epoch": 0.4112441436751692, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.3676549293039316e-05, |
| "loss": 1.0032421112060548, |
| "step": 2370, |
| "token_acc": 0.6834760671844918 |
| }, |
| { |
| "epoch": 0.4121117473538088, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.3649848380762513e-05, |
| "loss": 0.9850346565246582, |
| "step": 2375, |
| "token_acc": 0.6905733974775712 |
| }, |
| { |
| "epoch": 0.41297935103244837, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.3623117414318827e-05, |
| "loss": 1.0028590202331542, |
| "step": 2380, |
| "token_acc": 0.6836534850029511 |
| }, |
| { |
| "epoch": 0.41384695471108796, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.3596356613820669e-05, |
| "loss": 1.013303279876709, |
| "step": 2385, |
| "token_acc": 0.6805802728792536 |
| }, |
| { |
| "epoch": 0.4147145583897276, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.3569566199626114e-05, |
| "loss": 1.0094331741333007, |
| "step": 2390, |
| "token_acc": 0.685405305236406 |
| }, |
| { |
| "epoch": 0.4155821620683672, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.3542746392337087e-05, |
| "loss": 1.005965805053711, |
| "step": 2395, |
| "token_acc": 0.682434716756596 |
| }, |
| { |
| "epoch": 0.41644976574700676, |
| "grad_norm": 1.75, |
| "learning_rate": 1.3515897412797547e-05, |
| "loss": 0.9940034866333007, |
| "step": 2400, |
| "token_acc": 0.6875033593120129 |
| }, |
| { |
| "epoch": 0.41731736942564635, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.348901948209167e-05, |
| "loss": 0.9850317955017089, |
| "step": 2405, |
| "token_acc": 0.686592845447229 |
| }, |
| { |
| "epoch": 0.418184973104286, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.3462112821542016e-05, |
| "loss": 1.0118427276611328, |
| "step": 2410, |
| "token_acc": 0.6816512666869937 |
| }, |
| { |
| "epoch": 0.41905257678292557, |
| "grad_norm": 1.875, |
| "learning_rate": 1.3435177652707735e-05, |
| "loss": 1.0028743743896484, |
| "step": 2415, |
| "token_acc": 0.6860446549751178 |
| }, |
| { |
| "epoch": 0.41992018046156515, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.3408214197382705e-05, |
| "loss": 0.9918471336364746, |
| "step": 2420, |
| "token_acc": 0.6874362288279708 |
| }, |
| { |
| "epoch": 0.42078778414020473, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.3381222677593737e-05, |
| "loss": 1.0141358375549316, |
| "step": 2425, |
| "token_acc": 0.6807328527018983 |
| }, |
| { |
| "epoch": 0.4216553878188444, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.3354203315598733e-05, |
| "loss": 1.0219820976257323, |
| "step": 2430, |
| "token_acc": 0.6813468119008437 |
| }, |
| { |
| "epoch": 0.42252299149748396, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.3327156333884856e-05, |
| "loss": 1.0195876121520997, |
| "step": 2435, |
| "token_acc": 0.6805910377684181 |
| }, |
| { |
| "epoch": 0.42339059517612354, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.33000819551667e-05, |
| "loss": 1.0096649169921874, |
| "step": 2440, |
| "token_acc": 0.6815645521723036 |
| }, |
| { |
| "epoch": 0.4242581988547631, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.3272980402384459e-05, |
| "loss": 1.0119336128234864, |
| "step": 2445, |
| "token_acc": 0.68414329128903 |
| }, |
| { |
| "epoch": 0.42512580253340276, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.3245851898702083e-05, |
| "loss": 1.004085636138916, |
| "step": 2450, |
| "token_acc": 0.6859707219637835 |
| }, |
| { |
| "epoch": 0.42599340621204235, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.3218696667505444e-05, |
| "loss": 1.006967830657959, |
| "step": 2455, |
| "token_acc": 0.6833605995039316 |
| }, |
| { |
| "epoch": 0.42686100989068193, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.319151493240051e-05, |
| "loss": 1.0127968788146973, |
| "step": 2460, |
| "token_acc": 0.6829119501118417 |
| }, |
| { |
| "epoch": 0.4277286135693215, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.3164306917211475e-05, |
| "loss": 1.0015942573547363, |
| "step": 2465, |
| "token_acc": 0.683394712251965 |
| }, |
| { |
| "epoch": 0.42859621724796115, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.313707284597895e-05, |
| "loss": 0.9921387672424317, |
| "step": 2470, |
| "token_acc": 0.6884706008353861 |
| }, |
| { |
| "epoch": 0.42946382092660074, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.3109812942958087e-05, |
| "loss": 0.9937407493591308, |
| "step": 2475, |
| "token_acc": 0.6858287322723828 |
| }, |
| { |
| "epoch": 0.4303314246052403, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.308252743261675e-05, |
| "loss": 1.0000595092773437, |
| "step": 2480, |
| "token_acc": 0.6844413945289899 |
| }, |
| { |
| "epoch": 0.4311990282838799, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.3055216539633668e-05, |
| "loss": 0.9946840286254883, |
| "step": 2485, |
| "token_acc": 0.6857801388537539 |
| }, |
| { |
| "epoch": 0.43206663196251954, |
| "grad_norm": 1.7421875, |
| "learning_rate": 1.302788048889657e-05, |
| "loss": 0.9850924491882325, |
| "step": 2490, |
| "token_acc": 0.6912732362675458 |
| }, |
| { |
| "epoch": 0.4329342356411591, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.3000519505500354e-05, |
| "loss": 1.013066577911377, |
| "step": 2495, |
| "token_acc": 0.6841163491550963 |
| }, |
| { |
| "epoch": 0.4338018393197987, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.297313381474522e-05, |
| "loss": 1.0059243202209474, |
| "step": 2500, |
| "token_acc": 0.682485376889968 |
| }, |
| { |
| "epoch": 0.4346694429984383, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.2945723642134808e-05, |
| "loss": 0.9933188438415528, |
| "step": 2505, |
| "token_acc": 0.6839858247063091 |
| }, |
| { |
| "epoch": 0.43553704667707793, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.2918289213374362e-05, |
| "loss": 1.0141347885131835, |
| "step": 2510, |
| "token_acc": 0.6845435525845792 |
| }, |
| { |
| "epoch": 0.4364046503557175, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.2890830754368855e-05, |
| "loss": 1.0011796951293945, |
| "step": 2515, |
| "token_acc": 0.6849304174950298 |
| }, |
| { |
| "epoch": 0.4372722540343571, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.2863348491221129e-05, |
| "loss": 1.004225254058838, |
| "step": 2520, |
| "token_acc": 0.6839080459770115 |
| }, |
| { |
| "epoch": 0.4381398577129967, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.2835842650230046e-05, |
| "loss": 1.005355167388916, |
| "step": 2525, |
| "token_acc": 0.6849887538762527 |
| }, |
| { |
| "epoch": 0.4390074613916363, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.2808313457888614e-05, |
| "loss": 1.0048332214355469, |
| "step": 2530, |
| "token_acc": 0.6836279848033905 |
| }, |
| { |
| "epoch": 0.4398750650702759, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.2780761140882123e-05, |
| "loss": 1.0195894241333008, |
| "step": 2535, |
| "token_acc": 0.6795473179123936 |
| }, |
| { |
| "epoch": 0.4407426687489155, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.2753185926086282e-05, |
| "loss": 1.0192377090454101, |
| "step": 2540, |
| "token_acc": 0.6803299012123347 |
| }, |
| { |
| "epoch": 0.4416102724275551, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.2725588040565344e-05, |
| "loss": 1.005928134918213, |
| "step": 2545, |
| "token_acc": 0.6801644427607302 |
| }, |
| { |
| "epoch": 0.4424778761061947, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.2697967711570243e-05, |
| "loss": 1.003110980987549, |
| "step": 2550, |
| "token_acc": 0.6823795540443708 |
| }, |
| { |
| "epoch": 0.4433454797848343, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.2670325166536726e-05, |
| "loss": 1.000045108795166, |
| "step": 2555, |
| "token_acc": 0.6848798995377668 |
| }, |
| { |
| "epoch": 0.4442130834634739, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.2642660633083467e-05, |
| "loss": 0.9951872825622559, |
| "step": 2560, |
| "token_acc": 0.6862447171184515 |
| }, |
| { |
| "epoch": 0.44508068714211346, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.2614974339010208e-05, |
| "loss": 1.0055727005004882, |
| "step": 2565, |
| "token_acc": 0.6850169715125947 |
| }, |
| { |
| "epoch": 0.4459482908207531, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.2587266512295868e-05, |
| "loss": 1.0195012092590332, |
| "step": 2570, |
| "token_acc": 0.6816625277741472 |
| }, |
| { |
| "epoch": 0.4468158944993927, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.2559537381096681e-05, |
| "loss": 0.9964936256408692, |
| "step": 2575, |
| "token_acc": 0.6885902240435685 |
| }, |
| { |
| "epoch": 0.44768349817803227, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.2531787173744298e-05, |
| "loss": 0.9999607086181641, |
| "step": 2580, |
| "token_acc": 0.6850149960102359 |
| }, |
| { |
| "epoch": 0.44855110185667185, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.2504016118743936e-05, |
| "loss": 1.0000761032104493, |
| "step": 2585, |
| "token_acc": 0.6829418781621488 |
| }, |
| { |
| "epoch": 0.4494187055353115, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.2476224444772467e-05, |
| "loss": 1.0015432357788085, |
| "step": 2590, |
| "token_acc": 0.6852074323242031 |
| }, |
| { |
| "epoch": 0.4502863092139511, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.244841238067655e-05, |
| "loss": 1.0161554336547851, |
| "step": 2595, |
| "token_acc": 0.68090608621095 |
| }, |
| { |
| "epoch": 0.45115391289259066, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.242058015547074e-05, |
| "loss": 1.0064961433410644, |
| "step": 2600, |
| "token_acc": 0.6828215164844034 |
| }, |
| { |
| "epoch": 0.45202151657123024, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.2392727998335617e-05, |
| "loss": 1.0068798065185547, |
| "step": 2605, |
| "token_acc": 0.6836080829566604 |
| }, |
| { |
| "epoch": 0.4528891202498699, |
| "grad_norm": 1.875, |
| "learning_rate": 1.2364856138615873e-05, |
| "loss": 0.9954544067382812, |
| "step": 2610, |
| "token_acc": 0.6852013951546656 |
| }, |
| { |
| "epoch": 0.45375672392850946, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.2336964805818445e-05, |
| "loss": 1.012361431121826, |
| "step": 2615, |
| "token_acc": 0.6833901146441704 |
| }, |
| { |
| "epoch": 0.45462432760714905, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.2309054229610625e-05, |
| "loss": 1.0001043319702148, |
| "step": 2620, |
| "token_acc": 0.6823560827524173 |
| }, |
| { |
| "epoch": 0.45549193128578863, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.2281124639818152e-05, |
| "loss": 1.0002737998962403, |
| "step": 2625, |
| "token_acc": 0.6835434270674609 |
| }, |
| { |
| "epoch": 0.45635953496442827, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.2253176266423332e-05, |
| "loss": 0.9922337532043457, |
| "step": 2630, |
| "token_acc": 0.6857845693124239 |
| }, |
| { |
| "epoch": 0.45722713864306785, |
| "grad_norm": 1.78125, |
| "learning_rate": 1.2225209339563144e-05, |
| "loss": 0.9958258628845215, |
| "step": 2635, |
| "token_acc": 0.6889344316136575 |
| }, |
| { |
| "epoch": 0.45809474232170744, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.2197224089527347e-05, |
| "loss": 1.000858688354492, |
| "step": 2640, |
| "token_acc": 0.6852724153892232 |
| }, |
| { |
| "epoch": 0.458962346000347, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.2169220746756567e-05, |
| "loss": 1.01625337600708, |
| "step": 2645, |
| "token_acc": 0.6788307748873977 |
| }, |
| { |
| "epoch": 0.45982994967898666, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.2141199541840428e-05, |
| "loss": 1.0196890830993652, |
| "step": 2650, |
| "token_acc": 0.6794828350233297 |
| }, |
| { |
| "epoch": 0.46069755335762624, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.2113160705515626e-05, |
| "loss": 1.0036340713500977, |
| "step": 2655, |
| "token_acc": 0.6851736637091539 |
| }, |
| { |
| "epoch": 0.4615651570362658, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.2085104468664041e-05, |
| "loss": 1.0029501914978027, |
| "step": 2660, |
| "token_acc": 0.6866774142396532 |
| }, |
| { |
| "epoch": 0.4624327607149054, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.2057031062310845e-05, |
| "loss": 1.0131060600280761, |
| "step": 2665, |
| "token_acc": 0.6806618788309262 |
| }, |
| { |
| "epoch": 0.46330036439354505, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.2028940717622576e-05, |
| "loss": 1.0073641777038573, |
| "step": 2670, |
| "token_acc": 0.6839914676655741 |
| }, |
| { |
| "epoch": 0.46416796807218463, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.2000833665905255e-05, |
| "loss": 1.0106398582458496, |
| "step": 2675, |
| "token_acc": 0.6813406569965871 |
| }, |
| { |
| "epoch": 0.4650355717508242, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.1972710138602482e-05, |
| "loss": 1.0050904273986816, |
| "step": 2680, |
| "token_acc": 0.6856562992838491 |
| }, |
| { |
| "epoch": 0.4659031754294638, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.194457036729351e-05, |
| "loss": 1.015509033203125, |
| "step": 2685, |
| "token_acc": 0.6802187834233648 |
| }, |
| { |
| "epoch": 0.46677077910810344, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.1916414583691361e-05, |
| "loss": 1.0169716835021974, |
| "step": 2690, |
| "token_acc": 0.6822935779816514 |
| }, |
| { |
| "epoch": 0.467638382786743, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.18882430196409e-05, |
| "loss": 1.00482234954834, |
| "step": 2695, |
| "token_acc": 0.6824643916517463 |
| }, |
| { |
| "epoch": 0.4685059864653826, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.1860055907116937e-05, |
| "loss": 1.013214111328125, |
| "step": 2700, |
| "token_acc": 0.6813338959360743 |
| }, |
| { |
| "epoch": 0.4693735901440222, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.1831853478222318e-05, |
| "loss": 1.0059806823730468, |
| "step": 2705, |
| "token_acc": 0.683576909519807 |
| }, |
| { |
| "epoch": 0.47024119382266183, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.1803635965186002e-05, |
| "loss": 0.9913622856140136, |
| "step": 2710, |
| "token_acc": 0.6861254522541567 |
| }, |
| { |
| "epoch": 0.4711087975013014, |
| "grad_norm": 1.734375, |
| "learning_rate": 1.1775403600361167e-05, |
| "loss": 1.0054823875427246, |
| "step": 2715, |
| "token_acc": 0.684439954609615 |
| }, |
| { |
| "epoch": 0.471976401179941, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.1747156616223272e-05, |
| "loss": 1.0157322883605957, |
| "step": 2720, |
| "token_acc": 0.680252464832309 |
| }, |
| { |
| "epoch": 0.4728440048585806, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.1718895245368167e-05, |
| "loss": 1.0170634269714356, |
| "step": 2725, |
| "token_acc": 0.6813107028863409 |
| }, |
| { |
| "epoch": 0.4737116085372202, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.1690619720510165e-05, |
| "loss": 0.9852043151855469, |
| "step": 2730, |
| "token_acc": 0.6872285921724955 |
| }, |
| { |
| "epoch": 0.4745792122158598, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.1662330274480128e-05, |
| "loss": 1.0059645652770997, |
| "step": 2735, |
| "token_acc": 0.6836207236712307 |
| }, |
| { |
| "epoch": 0.4754468158944994, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.1634027140223544e-05, |
| "loss": 0.9841846466064453, |
| "step": 2740, |
| "token_acc": 0.6890565215615255 |
| }, |
| { |
| "epoch": 0.47631441957313897, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.1605710550798626e-05, |
| "loss": 0.995844554901123, |
| "step": 2745, |
| "token_acc": 0.6851391782871187 |
| }, |
| { |
| "epoch": 0.4771820232517786, |
| "grad_norm": 1.7421875, |
| "learning_rate": 1.1577380739374376e-05, |
| "loss": 0.9913998603820801, |
| "step": 2750, |
| "token_acc": 0.6908336288532514 |
| }, |
| { |
| "epoch": 0.4780496269304182, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.1549037939228667e-05, |
| "loss": 0.9965376853942871, |
| "step": 2755, |
| "token_acc": 0.6868992547759661 |
| }, |
| { |
| "epoch": 0.4789172306090578, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.1520682383746334e-05, |
| "loss": 1.0012220382690429, |
| "step": 2760, |
| "token_acc": 0.6884490453429107 |
| }, |
| { |
| "epoch": 0.47978483428769736, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.1492314306417233e-05, |
| "loss": 0.9879722595214844, |
| "step": 2765, |
| "token_acc": 0.6881789911554461 |
| }, |
| { |
| "epoch": 0.480652437966337, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.1463933940834342e-05, |
| "loss": 1.0053581237792968, |
| "step": 2770, |
| "token_acc": 0.6855757229040982 |
| }, |
| { |
| "epoch": 0.4815200416449766, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.1435541520691815e-05, |
| "loss": 0.9921921730041504, |
| "step": 2775, |
| "token_acc": 0.6862049831504012 |
| }, |
| { |
| "epoch": 0.48238764532361617, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.1407137279783074e-05, |
| "loss": 1.0183118820190429, |
| "step": 2780, |
| "token_acc": 0.6817061841095448 |
| }, |
| { |
| "epoch": 0.48325524900225575, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.1378721451998874e-05, |
| "loss": 0.9925461769104004, |
| "step": 2785, |
| "token_acc": 0.688545962485034 |
| }, |
| { |
| "epoch": 0.4841228526808954, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.1350294271325379e-05, |
| "loss": 1.0159781455993653, |
| "step": 2790, |
| "token_acc": 0.681974551332532 |
| }, |
| { |
| "epoch": 0.48499045635953497, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.1321855971842243e-05, |
| "loss": 1.0086934089660644, |
| "step": 2795, |
| "token_acc": 0.6831038631199038 |
| }, |
| { |
| "epoch": 0.48585806003817456, |
| "grad_norm": 1.875, |
| "learning_rate": 1.129340678772067e-05, |
| "loss": 1.0199012756347656, |
| "step": 2800, |
| "token_acc": 0.6786575483258884 |
| }, |
| { |
| "epoch": 0.48672566371681414, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.1264946953221496e-05, |
| "loss": 1.0137310028076172, |
| "step": 2805, |
| "token_acc": 0.6824437730782141 |
| }, |
| { |
| "epoch": 0.4875932673954538, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.123647670269325e-05, |
| "loss": 1.0020729064941407, |
| "step": 2810, |
| "token_acc": 0.6836910759886811 |
| }, |
| { |
| "epoch": 0.48846087107409336, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.1207996270570242e-05, |
| "loss": 0.9875768661499024, |
| "step": 2815, |
| "token_acc": 0.6884513431530621 |
| }, |
| { |
| "epoch": 0.48932847475273294, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.117950589137061e-05, |
| "loss": 1.0017758369445802, |
| "step": 2820, |
| "token_acc": 0.6836398649214367 |
| }, |
| { |
| "epoch": 0.49019607843137253, |
| "grad_norm": 1.875, |
| "learning_rate": 1.1151005799694401e-05, |
| "loss": 1.0143745422363282, |
| "step": 2825, |
| "token_acc": 0.6843956569062094 |
| }, |
| { |
| "epoch": 0.49106368211001217, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.1122496230221644e-05, |
| "loss": 1.0051603317260742, |
| "step": 2830, |
| "token_acc": 0.6826833612462451 |
| }, |
| { |
| "epoch": 0.49193128578865175, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.1093977417710408e-05, |
| "loss": 0.9880369186401368, |
| "step": 2835, |
| "token_acc": 0.6881879959200434 |
| }, |
| { |
| "epoch": 0.49279888946729133, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.1065449596994876e-05, |
| "loss": 0.9956092834472656, |
| "step": 2840, |
| "token_acc": 0.6856416772554003 |
| }, |
| { |
| "epoch": 0.4936664931459309, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.1036913002983392e-05, |
| "loss": 1.0082509994506836, |
| "step": 2845, |
| "token_acc": 0.6834558638400725 |
| }, |
| { |
| "epoch": 0.49453409682457056, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.1008367870656568e-05, |
| "loss": 0.9957260131835938, |
| "step": 2850, |
| "token_acc": 0.6855102932343144 |
| }, |
| { |
| "epoch": 0.49540170050321014, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.0979814435065308e-05, |
| "loss": 0.9961285591125488, |
| "step": 2855, |
| "token_acc": 0.6854094100735335 |
| }, |
| { |
| "epoch": 0.4962693041818497, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.0951252931328887e-05, |
| "loss": 0.9894907951354981, |
| "step": 2860, |
| "token_acc": 0.69068332911443 |
| }, |
| { |
| "epoch": 0.4971369078604893, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.092268359463302e-05, |
| "loss": 1.0121468544006347, |
| "step": 2865, |
| "token_acc": 0.6819141923071749 |
| }, |
| { |
| "epoch": 0.49800451153912895, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.0894106660227926e-05, |
| "loss": 1.017982578277588, |
| "step": 2870, |
| "token_acc": 0.6814496708942045 |
| }, |
| { |
| "epoch": 0.49887211521776853, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.0865522363426376e-05, |
| "loss": 1.0043160438537597, |
| "step": 2875, |
| "token_acc": 0.6829500019793358 |
| }, |
| { |
| "epoch": 0.4997397188964081, |
| "grad_norm": 1.875, |
| "learning_rate": 1.0836930939601768e-05, |
| "loss": 1.008955478668213, |
| "step": 2880, |
| "token_acc": 0.6822375933533712 |
| }, |
| { |
| "epoch": 0.5006073225750477, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.0808332624186197e-05, |
| "loss": 1.0033825874328612, |
| "step": 2885, |
| "token_acc": 0.684949342881556 |
| }, |
| { |
| "epoch": 0.5014749262536873, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.0779727652668496e-05, |
| "loss": 1.001988697052002, |
| "step": 2890, |
| "token_acc": 0.6846615607534672 |
| }, |
| { |
| "epoch": 0.5023425299323269, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.0751116260592312e-05, |
| "loss": 0.9898590087890625, |
| "step": 2895, |
| "token_acc": 0.6871776024781131 |
| }, |
| { |
| "epoch": 0.5032101336109666, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.072249868355415e-05, |
| "loss": 0.9838379859924317, |
| "step": 2900, |
| "token_acc": 0.6897600586613799 |
| }, |
| { |
| "epoch": 0.5040777372896061, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.0693875157201459e-05, |
| "loss": 0.9647768020629883, |
| "step": 2905, |
| "token_acc": 0.6952837795361677 |
| }, |
| { |
| "epoch": 0.5049453409682457, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.0665245917230666e-05, |
| "loss": 1.0030086517333985, |
| "step": 2910, |
| "token_acc": 0.6839139614674057 |
| }, |
| { |
| "epoch": 0.5058129446468853, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.0636611199385251e-05, |
| "loss": 1.0003300666809083, |
| "step": 2915, |
| "token_acc": 0.6843827426478509 |
| }, |
| { |
| "epoch": 0.5066805483255249, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.0607971239453805e-05, |
| "loss": 0.9978496551513671, |
| "step": 2920, |
| "token_acc": 0.6854442053489087 |
| }, |
| { |
| "epoch": 0.5075481520041645, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.0579326273268074e-05, |
| "loss": 0.9909579277038574, |
| "step": 2925, |
| "token_acc": 0.686803062770415 |
| }, |
| { |
| "epoch": 0.5084157556828041, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.0550676536701034e-05, |
| "loss": 0.9943648338317871, |
| "step": 2930, |
| "token_acc": 0.6851877207875784 |
| }, |
| { |
| "epoch": 0.5092833593614436, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.052202226566494e-05, |
| "loss": 0.9951316833496093, |
| "step": 2935, |
| "token_acc": 0.6850922617852889 |
| }, |
| { |
| "epoch": 0.5101509630400833, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.0493363696109388e-05, |
| "loss": 0.9918990135192871, |
| "step": 2940, |
| "token_acc": 0.6878556595377437 |
| }, |
| { |
| "epoch": 0.5110185667187229, |
| "grad_norm": 1.875, |
| "learning_rate": 1.0464701064019364e-05, |
| "loss": 1.0089019775390624, |
| "step": 2945, |
| "token_acc": 0.682502467917078 |
| }, |
| { |
| "epoch": 0.5118861703973625, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.0436034605413312e-05, |
| "loss": 0.982180118560791, |
| "step": 2950, |
| "token_acc": 0.6869962643166984 |
| }, |
| { |
| "epoch": 0.5127537740760021, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.0407364556341183e-05, |
| "loss": 1.0064614295959473, |
| "step": 2955, |
| "token_acc": 0.6841499638737991 |
| }, |
| { |
| "epoch": 0.5136213777546417, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.0378691152882496e-05, |
| "loss": 1.0329419136047364, |
| "step": 2960, |
| "token_acc": 0.6754290852352366 |
| }, |
| { |
| "epoch": 0.5144889814332813, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.0350014631144382e-05, |
| "loss": 1.0033533096313476, |
| "step": 2965, |
| "token_acc": 0.6818894869228896 |
| }, |
| { |
| "epoch": 0.5153565851119208, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.0321335227259661e-05, |
| "loss": 0.9842534065246582, |
| "step": 2970, |
| "token_acc": 0.6909560794180386 |
| }, |
| { |
| "epoch": 0.5162241887905604, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.0292653177384878e-05, |
| "loss": 1.0118374824523926, |
| "step": 2975, |
| "token_acc": 0.682726188540142 |
| }, |
| { |
| "epoch": 0.5170917924692001, |
| "grad_norm": 1.765625, |
| "learning_rate": 1.0263968717698365e-05, |
| "loss": 1.0183884620666503, |
| "step": 2980, |
| "token_acc": 0.6805970149253732 |
| }, |
| { |
| "epoch": 0.5179593961478397, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.0235282084398301e-05, |
| "loss": 0.9902758598327637, |
| "step": 2985, |
| "token_acc": 0.6868815227383335 |
| }, |
| { |
| "epoch": 0.5188269998264793, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.0206593513700767e-05, |
| "loss": 1.0007359504699707, |
| "step": 2990, |
| "token_acc": 0.683507329474766 |
| }, |
| { |
| "epoch": 0.5196946035051189, |
| "grad_norm": 1.875, |
| "learning_rate": 1.0177903241837789e-05, |
| "loss": 0.9968069076538086, |
| "step": 2995, |
| "token_acc": 0.6845976760975876 |
| }, |
| { |
| "epoch": 0.5205622071837585, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.0149211505055407e-05, |
| "loss": 0.9842087745666503, |
| "step": 3000, |
| "token_acc": 0.6890838871678698 |
| }, |
| { |
| "epoch": 0.521429810862398, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.012051853961172e-05, |
| "loss": 0.9979012489318848, |
| "step": 3005, |
| "token_acc": 0.6855041583613994 |
| }, |
| { |
| "epoch": 0.5222974145410376, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.0091824581774947e-05, |
| "loss": 1.0025611877441407, |
| "step": 3010, |
| "token_acc": 0.6843610112039744 |
| }, |
| { |
| "epoch": 0.5231650182196772, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.0063129867821475e-05, |
| "loss": 1.0025950431823731, |
| "step": 3015, |
| "token_acc": 0.6844177684199013 |
| }, |
| { |
| "epoch": 0.5240326218983169, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.0034434634033919e-05, |
| "loss": 0.9844324111938476, |
| "step": 3020, |
| "token_acc": 0.6901680615091516 |
| }, |
| { |
| "epoch": 0.5249002255769565, |
| "grad_norm": 1.875, |
| "learning_rate": 1.0005739116699178e-05, |
| "loss": 1.0121084213256837, |
| "step": 3025, |
| "token_acc": 0.6821625441696113 |
| }, |
| { |
| "epoch": 0.5257678292555961, |
| "grad_norm": 1.8359375, |
| "learning_rate": 9.977043552106484e-06, |
| "loss": 0.9731731414794922, |
| "step": 3030, |
| "token_acc": 0.69295219319862 |
| }, |
| { |
| "epoch": 0.5266354329342356, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.94834817654545e-06, |
| "loss": 0.9912844657897949, |
| "step": 3035, |
| "token_acc": 0.6854252683732452 |
| }, |
| { |
| "epoch": 0.5275030366128752, |
| "grad_norm": 1.9375, |
| "learning_rate": 9.919653226304148e-06, |
| "loss": 0.989024543762207, |
| "step": 3040, |
| "token_acc": 0.6876149180822745 |
| }, |
| { |
| "epoch": 0.5283706402915148, |
| "grad_norm": 1.7890625, |
| "learning_rate": 9.890958937667135e-06, |
| "loss": 1.012401008605957, |
| "step": 3045, |
| "token_acc": 0.6832023046685692 |
| }, |
| { |
| "epoch": 0.5292382439701544, |
| "grad_norm": 1.8671875, |
| "learning_rate": 9.862265546913526e-06, |
| "loss": 1.0105487823486328, |
| "step": 3050, |
| "token_acc": 0.6831975602049648 |
| }, |
| { |
| "epoch": 0.530105847648794, |
| "grad_norm": 1.9921875, |
| "learning_rate": 9.83357329031504e-06, |
| "loss": 0.9997787475585938, |
| "step": 3055, |
| "token_acc": 0.6836862959420685 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 1.890625, |
| "learning_rate": 9.804882404134057e-06, |
| "loss": 0.9793942451477051, |
| "step": 3060, |
| "token_acc": 0.6905264857446551 |
| }, |
| { |
| "epoch": 0.5318410550060733, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.776193124621673e-06, |
| "loss": 1.0060349464416505, |
| "step": 3065, |
| "token_acc": 0.6837666900913563 |
| }, |
| { |
| "epoch": 0.5327086586847128, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.747505688015757e-06, |
| "loss": 0.9506141662597656, |
| "step": 3070, |
| "token_acc": 0.696441489065717 |
| }, |
| { |
| "epoch": 0.5335762623633524, |
| "grad_norm": 1.8203125, |
| "learning_rate": 9.718820330538999e-06, |
| "loss": 1.0000137329101562, |
| "step": 3075, |
| "token_acc": 0.6840303318042609 |
| }, |
| { |
| "epoch": 0.534443866041992, |
| "grad_norm": 1.90625, |
| "learning_rate": 9.690137288396967e-06, |
| "loss": 0.9879467010498046, |
| "step": 3080, |
| "token_acc": 0.6884837459463735 |
| }, |
| { |
| "epoch": 0.5353114697206316, |
| "grad_norm": 1.828125, |
| "learning_rate": 9.66145679777617e-06, |
| "loss": 1.0037842750549317, |
| "step": 3085, |
| "token_acc": 0.683606172775142 |
| }, |
| { |
| "epoch": 0.5361790733992712, |
| "grad_norm": 1.90625, |
| "learning_rate": 9.632779094842104e-06, |
| "loss": 0.9850837707519531, |
| "step": 3090, |
| "token_acc": 0.6906727747296649 |
| }, |
| { |
| "epoch": 0.5370466770779108, |
| "grad_norm": 1.9765625, |
| "learning_rate": 9.604104415737309e-06, |
| "loss": 1.0082507133483887, |
| "step": 3095, |
| "token_acc": 0.6840075020949999 |
| }, |
| { |
| "epoch": 0.5379142807565505, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.575432996579424e-06, |
| "loss": 0.9955901145935059, |
| "step": 3100, |
| "token_acc": 0.6857669735637754 |
| }, |
| { |
| "epoch": 0.53878188443519, |
| "grad_norm": 1.765625, |
| "learning_rate": 9.546765073459245e-06, |
| "loss": 0.9778296470642089, |
| "step": 3105, |
| "token_acc": 0.6909004764286278 |
| }, |
| { |
| "epoch": 0.5396494881138296, |
| "grad_norm": 1.8515625, |
| "learning_rate": 9.51810088243879e-06, |
| "loss": 1.0080193519592284, |
| "step": 3110, |
| "token_acc": 0.6846056403760251 |
| }, |
| { |
| "epoch": 0.5405170917924692, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.489440659549333e-06, |
| "loss": 1.0060848236083983, |
| "step": 3115, |
| "token_acc": 0.6859093319194062 |
| }, |
| { |
| "epoch": 0.5413846954711088, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.46078464078948e-06, |
| "loss": 0.9916322708129883, |
| "step": 3120, |
| "token_acc": 0.6852762549715146 |
| }, |
| { |
| "epoch": 0.5422522991497484, |
| "grad_norm": 1.8359375, |
| "learning_rate": 9.432133062123215e-06, |
| "loss": 0.9954086303710937, |
| "step": 3125, |
| "token_acc": 0.6874728114000975 |
| }, |
| { |
| "epoch": 0.543119902828388, |
| "grad_norm": 1.9609375, |
| "learning_rate": 9.40348615947796e-06, |
| "loss": 1.0074991226196288, |
| "step": 3130, |
| "token_acc": 0.681418392340236 |
| }, |
| { |
| "epoch": 0.5439875065070275, |
| "grad_norm": 1.8359375, |
| "learning_rate": 9.374844168742637e-06, |
| "loss": 1.0012994766235352, |
| "step": 3135, |
| "token_acc": 0.6855507942467278 |
| }, |
| { |
| "epoch": 0.5448551101856672, |
| "grad_norm": 1.8515625, |
| "learning_rate": 9.34620732576572e-06, |
| "loss": 1.0055302619934081, |
| "step": 3140, |
| "token_acc": 0.6849006828057107 |
| }, |
| { |
| "epoch": 0.5457227138643068, |
| "grad_norm": 1.9765625, |
| "learning_rate": 9.317575866353293e-06, |
| "loss": 0.9842160224914551, |
| "step": 3145, |
| "token_acc": 0.6883597598729373 |
| }, |
| { |
| "epoch": 0.5465903175429464, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.28895002626711e-06, |
| "loss": 1.0006650924682616, |
| "step": 3150, |
| "token_acc": 0.6856869530964238 |
| }, |
| { |
| "epoch": 0.547457921221586, |
| "grad_norm": 1.8359375, |
| "learning_rate": 9.260330041222656e-06, |
| "loss": 1.0168807983398438, |
| "step": 3155, |
| "token_acc": 0.6813513261486406 |
| }, |
| { |
| "epoch": 0.5483255249002256, |
| "grad_norm": 1.9453125, |
| "learning_rate": 9.231716146887203e-06, |
| "loss": 0.9734827041625976, |
| "step": 3160, |
| "token_acc": 0.693440864594789 |
| }, |
| { |
| "epoch": 0.5491931285788652, |
| "grad_norm": 1.90625, |
| "learning_rate": 9.203108578877866e-06, |
| "loss": 0.9954551696777344, |
| "step": 3165, |
| "token_acc": 0.685033919424062 |
| }, |
| { |
| "epoch": 0.5500607322575047, |
| "grad_norm": 1.859375, |
| "learning_rate": 9.174507572759672e-06, |
| "loss": 1.005191707611084, |
| "step": 3170, |
| "token_acc": 0.6829043026216833 |
| }, |
| { |
| "epoch": 0.5509283359361443, |
| "grad_norm": 1.859375, |
| "learning_rate": 9.145913364043604e-06, |
| "loss": 0.9932435035705567, |
| "step": 3175, |
| "token_acc": 0.6873352300905745 |
| }, |
| { |
| "epoch": 0.551795939614784, |
| "grad_norm": 1.90625, |
| "learning_rate": 9.117326188184696e-06, |
| "loss": 0.9784406661987305, |
| "step": 3180, |
| "token_acc": 0.691351665477983 |
| }, |
| { |
| "epoch": 0.5526635432934236, |
| "grad_norm": 1.8203125, |
| "learning_rate": 9.088746280580046e-06, |
| "loss": 1.0030339241027832, |
| "step": 3185, |
| "token_acc": 0.6849746393518213 |
| }, |
| { |
| "epoch": 0.5535311469720632, |
| "grad_norm": 1.9765625, |
| "learning_rate": 9.060173876566916e-06, |
| "loss": 1.0087509155273438, |
| "step": 3190, |
| "token_acc": 0.682428123685603 |
| }, |
| { |
| "epoch": 0.5543987506507028, |
| "grad_norm": 1.8125, |
| "learning_rate": 9.031609211420775e-06, |
| "loss": 1.0267830848693849, |
| "step": 3195, |
| "token_acc": 0.679297126313532 |
| }, |
| { |
| "epoch": 0.5552663543293423, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.003052520353372e-06, |
| "loss": 1.0051657676696777, |
| "step": 3200, |
| "token_acc": 0.6841281932693093 |
| }, |
| { |
| "epoch": 0.5561339580079819, |
| "grad_norm": 1.8046875, |
| "learning_rate": 8.974504038510793e-06, |
| "loss": 1.005373477935791, |
| "step": 3205, |
| "token_acc": 0.6836050245944957 |
| }, |
| { |
| "epoch": 0.5570015616866215, |
| "grad_norm": 1.8515625, |
| "learning_rate": 8.945964000971525e-06, |
| "loss": 0.9805338859558106, |
| "step": 3210, |
| "token_acc": 0.6906489566678965 |
| }, |
| { |
| "epoch": 0.5578691653652611, |
| "grad_norm": 1.8359375, |
| "learning_rate": 8.917432642744519e-06, |
| "loss": 1.0035972595214844, |
| "step": 3215, |
| "token_acc": 0.6831262001280136 |
| }, |
| { |
| "epoch": 0.5587367690439008, |
| "grad_norm": 1.8515625, |
| "learning_rate": 8.888910198767265e-06, |
| "loss": 0.9910804748535156, |
| "step": 3220, |
| "token_acc": 0.6850102007945882 |
| }, |
| { |
| "epoch": 0.5596043727225404, |
| "grad_norm": 1.796875, |
| "learning_rate": 8.860396903903844e-06, |
| "loss": 0.9914836883544922, |
| "step": 3225, |
| "token_acc": 0.6887413708576804 |
| }, |
| { |
| "epoch": 0.56047197640118, |
| "grad_norm": 1.8203125, |
| "learning_rate": 8.831892992943e-06, |
| "loss": 1.0112311363220214, |
| "step": 3230, |
| "token_acc": 0.6815932803989763 |
| }, |
| { |
| "epoch": 0.5613395800798195, |
| "grad_norm": 1.9609375, |
| "learning_rate": 8.803398700596208e-06, |
| "loss": 1.007247543334961, |
| "step": 3235, |
| "token_acc": 0.683875897072066 |
| }, |
| { |
| "epoch": 0.5622071837584591, |
| "grad_norm": 1.90625, |
| "learning_rate": 8.774914261495738e-06, |
| "loss": 1.0004298210144043, |
| "step": 3240, |
| "token_acc": 0.6841970344985766 |
| }, |
| { |
| "epoch": 0.5630747874370987, |
| "grad_norm": 1.8359375, |
| "learning_rate": 8.746439910192735e-06, |
| "loss": 0.9888349533081054, |
| "step": 3245, |
| "token_acc": 0.6879607213774719 |
| }, |
| { |
| "epoch": 0.5639423911157383, |
| "grad_norm": 1.890625, |
| "learning_rate": 8.717975881155261e-06, |
| "loss": 1.0053036689758301, |
| "step": 3250, |
| "token_acc": 0.6843277773304346 |
| }, |
| { |
| "epoch": 0.5648099947943779, |
| "grad_norm": 1.8125, |
| "learning_rate": 8.689522408766395e-06, |
| "loss": 1.006988525390625, |
| "step": 3255, |
| "token_acc": 0.6830830648001983 |
| }, |
| { |
| "epoch": 0.5656775984730176, |
| "grad_norm": 1.90625, |
| "learning_rate": 8.661079727322276e-06, |
| "loss": 1.0136844635009765, |
| "step": 3260, |
| "token_acc": 0.6810806425442155 |
| }, |
| { |
| "epoch": 0.5665452021516572, |
| "grad_norm": 1.8671875, |
| "learning_rate": 8.632648071030198e-06, |
| "loss": 1.0038190841674806, |
| "step": 3265, |
| "token_acc": 0.6849343777015168 |
| }, |
| { |
| "epoch": 0.5674128058302967, |
| "grad_norm": 1.9296875, |
| "learning_rate": 8.604227674006661e-06, |
| "loss": 0.9864459991455078, |
| "step": 3270, |
| "token_acc": 0.6862783616540615 |
| }, |
| { |
| "epoch": 0.5682804095089363, |
| "grad_norm": 1.8515625, |
| "learning_rate": 8.57581877027546e-06, |
| "loss": 0.9800386428833008, |
| "step": 3275, |
| "token_acc": 0.6911001694197374 |
| }, |
| { |
| "epoch": 0.5691480131875759, |
| "grad_norm": 1.8203125, |
| "learning_rate": 8.547421593765744e-06, |
| "loss": 0.9790647506713868, |
| "step": 3280, |
| "token_acc": 0.6886432619731929 |
| }, |
| { |
| "epoch": 0.5700156168662155, |
| "grad_norm": 1.9609375, |
| "learning_rate": 8.519036378310098e-06, |
| "loss": 0.9918664932250977, |
| "step": 3285, |
| "token_acc": 0.6854125633826426 |
| }, |
| { |
| "epoch": 0.5708832205448551, |
| "grad_norm": 1.8984375, |
| "learning_rate": 8.490663357642615e-06, |
| "loss": 0.9926240921020508, |
| "step": 3290, |
| "token_acc": 0.687613955720063 |
| }, |
| { |
| "epoch": 0.5717508242234947, |
| "grad_norm": 1.875, |
| "learning_rate": 8.462302765396975e-06, |
| "loss": 0.9821521759033203, |
| "step": 3295, |
| "token_acc": 0.6893415493905228 |
| }, |
| { |
| "epoch": 0.5726184279021344, |
| "grad_norm": 1.828125, |
| "learning_rate": 8.433954835104513e-06, |
| "loss": 1.0029169082641602, |
| "step": 3300, |
| "token_acc": 0.6836323546782512 |
| }, |
| { |
| "epoch": 0.5734860315807739, |
| "grad_norm": 1.921875, |
| "learning_rate": 8.4056198001923e-06, |
| "loss": 0.9930968284606934, |
| "step": 3305, |
| "token_acc": 0.6868556180002426 |
| }, |
| { |
| "epoch": 0.5743536352594135, |
| "grad_norm": 1.921875, |
| "learning_rate": 8.377297893981224e-06, |
| "loss": 0.9897697448730469, |
| "step": 3310, |
| "token_acc": 0.6850893984441819 |
| }, |
| { |
| "epoch": 0.5752212389380531, |
| "grad_norm": 1.8359375, |
| "learning_rate": 8.348989349684077e-06, |
| "loss": 1.0004033088684081, |
| "step": 3315, |
| "token_acc": 0.6834115743155585 |
| }, |
| { |
| "epoch": 0.5760888426166927, |
| "grad_norm": 1.953125, |
| "learning_rate": 8.320694400403608e-06, |
| "loss": 1.0031415939331054, |
| "step": 3320, |
| "token_acc": 0.6850924472948079 |
| }, |
| { |
| "epoch": 0.5769564462953323, |
| "grad_norm": 1.9140625, |
| "learning_rate": 8.292413279130625e-06, |
| "loss": 0.9991157531738282, |
| "step": 3325, |
| "token_acc": 0.6833071420830172 |
| }, |
| { |
| "epoch": 0.5778240499739719, |
| "grad_norm": 1.828125, |
| "learning_rate": 8.264146218742074e-06, |
| "loss": 1.0167976379394532, |
| "step": 3330, |
| "token_acc": 0.6805396906454517 |
| }, |
| { |
| "epoch": 0.5786916536526114, |
| "grad_norm": 2.046875, |
| "learning_rate": 8.235893451999118e-06, |
| "loss": 1.0147868156433106, |
| "step": 3335, |
| "token_acc": 0.6792890262751159 |
| }, |
| { |
| "epoch": 0.5795592573312511, |
| "grad_norm": 1.8515625, |
| "learning_rate": 8.207655211545218e-06, |
| "loss": 1.0142845153808593, |
| "step": 3340, |
| "token_acc": 0.6800431959683763 |
| }, |
| { |
| "epoch": 0.5804268610098907, |
| "grad_norm": 1.8125, |
| "learning_rate": 8.179431729904223e-06, |
| "loss": 1.012403964996338, |
| "step": 3345, |
| "token_acc": 0.6800115019148074 |
| }, |
| { |
| "epoch": 0.5812944646885303, |
| "grad_norm": 1.9609375, |
| "learning_rate": 8.151223239478453e-06, |
| "loss": 0.9996941566467286, |
| "step": 3350, |
| "token_acc": 0.6826174967983586 |
| }, |
| { |
| "epoch": 0.5821620683671699, |
| "grad_norm": 1.90625, |
| "learning_rate": 8.123029972546782e-06, |
| "loss": 1.0093581199645996, |
| "step": 3355, |
| "token_acc": 0.6819460251429169 |
| }, |
| { |
| "epoch": 0.5830296720458095, |
| "grad_norm": 1.8671875, |
| "learning_rate": 8.09485216126273e-06, |
| "loss": 1.0049400329589844, |
| "step": 3360, |
| "token_acc": 0.6827514040478966 |
| }, |
| { |
| "epoch": 0.583897275724449, |
| "grad_norm": 1.8828125, |
| "learning_rate": 8.066690037652552e-06, |
| "loss": 0.9991744995117188, |
| "step": 3365, |
| "token_acc": 0.6828673913638729 |
| }, |
| { |
| "epoch": 0.5847648794030886, |
| "grad_norm": 1.921875, |
| "learning_rate": 8.03854383361332e-06, |
| "loss": 0.9949298858642578, |
| "step": 3370, |
| "token_acc": 0.6877322396851174 |
| }, |
| { |
| "epoch": 0.5856324830817282, |
| "grad_norm": 1.8515625, |
| "learning_rate": 8.010413780911022e-06, |
| "loss": 1.0077406883239746, |
| "step": 3375, |
| "token_acc": 0.6826122846664953 |
| }, |
| { |
| "epoch": 0.5865000867603679, |
| "grad_norm": 1.9140625, |
| "learning_rate": 7.982300111178648e-06, |
| "loss": 1.0013755798339843, |
| "step": 3380, |
| "token_acc": 0.6858611685344359 |
| }, |
| { |
| "epoch": 0.5873676904390075, |
| "grad_norm": 1.921875, |
| "learning_rate": 7.954203055914289e-06, |
| "loss": 0.9829542160034179, |
| "step": 3385, |
| "token_acc": 0.6888979370249728 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 1.8671875, |
| "learning_rate": 7.926122846479224e-06, |
| "loss": 0.993384838104248, |
| "step": 3390, |
| "token_acc": 0.6845540146288179 |
| }, |
| { |
| "epoch": 0.5891028977962867, |
| "grad_norm": 1.828125, |
| "learning_rate": 7.898059714096016e-06, |
| "loss": 0.956721305847168, |
| "step": 3395, |
| "token_acc": 0.698894211628116 |
| }, |
| { |
| "epoch": 0.5899705014749262, |
| "grad_norm": 1.84375, |
| "learning_rate": 7.870013889846608e-06, |
| "loss": 0.9920453071594239, |
| "step": 3400, |
| "token_acc": 0.686382917252157 |
| }, |
| { |
| "epoch": 0.5908381051535658, |
| "grad_norm": 1.84375, |
| "learning_rate": 7.841985604670427e-06, |
| "loss": 0.9913934707641602, |
| "step": 3405, |
| "token_acc": 0.6889823114142937 |
| }, |
| { |
| "epoch": 0.5917057088322054, |
| "grad_norm": 2.0, |
| "learning_rate": 7.81397508936247e-06, |
| "loss": 0.9880316734313965, |
| "step": 3410, |
| "token_acc": 0.6867218573075777 |
| }, |
| { |
| "epoch": 0.592573312510845, |
| "grad_norm": 1.9609375, |
| "learning_rate": 7.78598257457142e-06, |
| "loss": 0.9705442428588867, |
| "step": 3415, |
| "token_acc": 0.6910044977511245 |
| }, |
| { |
| "epoch": 0.5934409161894847, |
| "grad_norm": 1.8046875, |
| "learning_rate": 7.758008290797727e-06, |
| "loss": 0.9677356719970703, |
| "step": 3420, |
| "token_acc": 0.6959344774631571 |
| }, |
| { |
| "epoch": 0.5943085198681243, |
| "grad_norm": 1.8828125, |
| "learning_rate": 7.730052468391726e-06, |
| "loss": 0.9935931205749512, |
| "step": 3425, |
| "token_acc": 0.6881914107130855 |
| }, |
| { |
| "epoch": 0.5951761235467639, |
| "grad_norm": 1.921875, |
| "learning_rate": 7.702115337551733e-06, |
| "loss": 1.028738307952881, |
| "step": 3430, |
| "token_acc": 0.6780585491818397 |
| }, |
| { |
| "epoch": 0.5960437272254034, |
| "grad_norm": 1.921875, |
| "learning_rate": 7.674197128322151e-06, |
| "loss": 1.0033409118652343, |
| "step": 3435, |
| "token_acc": 0.6833868116036933 |
| }, |
| { |
| "epoch": 0.596911330904043, |
| "grad_norm": 1.8046875, |
| "learning_rate": 7.646298070591578e-06, |
| "loss": 1.0005316734313965, |
| "step": 3440, |
| "token_acc": 0.6868071389260162 |
| }, |
| { |
| "epoch": 0.5977789345826826, |
| "grad_norm": 1.7265625, |
| "learning_rate": 7.618418394090907e-06, |
| "loss": 0.9753083229064942, |
| "step": 3445, |
| "token_acc": 0.6916532970218273 |
| }, |
| { |
| "epoch": 0.5986465382613222, |
| "grad_norm": 1.8984375, |
| "learning_rate": 7.59055832839144e-06, |
| "loss": 0.9871037483215332, |
| "step": 3450, |
| "token_acc": 0.688853524302102 |
| }, |
| { |
| "epoch": 0.5995141419399618, |
| "grad_norm": 1.890625, |
| "learning_rate": 7.562718102903002e-06, |
| "loss": 0.9996206283569335, |
| "step": 3455, |
| "token_acc": 0.6837434616393171 |
| }, |
| { |
| "epoch": 0.6003817456186015, |
| "grad_norm": 1.875, |
| "learning_rate": 7.534897946872042e-06, |
| "loss": 1.0057412147521974, |
| "step": 3460, |
| "token_acc": 0.6834609861177597 |
| }, |
| { |
| "epoch": 0.601249349297241, |
| "grad_norm": 1.890625, |
| "learning_rate": 7.507098089379749e-06, |
| "loss": 0.990781593322754, |
| "step": 3465, |
| "token_acc": 0.6880597411570862 |
| }, |
| { |
| "epoch": 0.6021169529758806, |
| "grad_norm": 1.8125, |
| "learning_rate": 7.479318759340171e-06, |
| "loss": 0.9857464790344238, |
| "step": 3470, |
| "token_acc": 0.689186540346292 |
| }, |
| { |
| "epoch": 0.6029845566545202, |
| "grad_norm": 1.828125, |
| "learning_rate": 7.451560185498318e-06, |
| "loss": 0.9758604049682618, |
| "step": 3475, |
| "token_acc": 0.6898560948081264 |
| }, |
| { |
| "epoch": 0.6038521603331598, |
| "grad_norm": 1.8671875, |
| "learning_rate": 7.423822596428291e-06, |
| "loss": 0.9707001686096192, |
| "step": 3480, |
| "token_acc": 0.6923664838627496 |
| }, |
| { |
| "epoch": 0.6047197640117994, |
| "grad_norm": 1.796875, |
| "learning_rate": 7.396106220531398e-06, |
| "loss": 1.0107527732849122, |
| "step": 3485, |
| "token_acc": 0.6832125667742106 |
| }, |
| { |
| "epoch": 0.605587367690439, |
| "grad_norm": 1.9296875, |
| "learning_rate": 7.368411286034265e-06, |
| "loss": 1.011655330657959, |
| "step": 3490, |
| "token_acc": 0.6814160469354903 |
| }, |
| { |
| "epoch": 0.6064549713690786, |
| "grad_norm": 1.8515625, |
| "learning_rate": 7.340738020986961e-06, |
| "loss": 1.0010527610778808, |
| "step": 3495, |
| "token_acc": 0.6855910839856707 |
| }, |
| { |
| "epoch": 0.6073225750477182, |
| "grad_norm": 1.8359375, |
| "learning_rate": 7.313086653261126e-06, |
| "loss": 1.0003108024597167, |
| "step": 3500, |
| "token_acc": 0.6845823427706937 |
| }, |
| { |
| "epoch": 0.6081901787263578, |
| "grad_norm": 1.90625, |
| "learning_rate": 7.285457410548084e-06, |
| "loss": 1.0062461853027345, |
| "step": 3505, |
| "token_acc": 0.6841428111933098 |
| }, |
| { |
| "epoch": 0.6090577824049974, |
| "grad_norm": 1.8515625, |
| "learning_rate": 7.2578505203569775e-06, |
| "loss": 1.0086194038391114, |
| "step": 3510, |
| "token_acc": 0.6810051221539865 |
| }, |
| { |
| "epoch": 0.609925386083637, |
| "grad_norm": 1.8984375, |
| "learning_rate": 7.230266210012886e-06, |
| "loss": 0.9880249977111817, |
| "step": 3515, |
| "token_acc": 0.6883796750337954 |
| }, |
| { |
| "epoch": 0.6107929897622766, |
| "grad_norm": 1.828125, |
| "learning_rate": 7.20270470665497e-06, |
| "loss": 0.9953752517700195, |
| "step": 3520, |
| "token_acc": 0.6858969161328684 |
| }, |
| { |
| "epoch": 0.6116605934409162, |
| "grad_norm": 1.8359375, |
| "learning_rate": 7.1751662372345745e-06, |
| "loss": 1.0096102714538575, |
| "step": 3525, |
| "token_acc": 0.6835760199396563 |
| }, |
| { |
| "epoch": 0.6125281971195558, |
| "grad_norm": 1.9296875, |
| "learning_rate": 7.1476510285133824e-06, |
| "loss": 1.01358003616333, |
| "step": 3530, |
| "token_acc": 0.6808007033204843 |
| }, |
| { |
| "epoch": 0.6133958007981953, |
| "grad_norm": 1.8671875, |
| "learning_rate": 7.1201593070615385e-06, |
| "loss": 0.9893976211547851, |
| "step": 3535, |
| "token_acc": 0.6872642713325582 |
| }, |
| { |
| "epoch": 0.614263404476835, |
| "grad_norm": 1.859375, |
| "learning_rate": 7.0926912992557825e-06, |
| "loss": 1.0022952079772949, |
| "step": 3540, |
| "token_acc": 0.6836349718409973 |
| }, |
| { |
| "epoch": 0.6151310081554746, |
| "grad_norm": 1.875, |
| "learning_rate": 7.065247231277592e-06, |
| "loss": 0.9951557159423828, |
| "step": 3545, |
| "token_acc": 0.6851784290675207 |
| }, |
| { |
| "epoch": 0.6159986118341142, |
| "grad_norm": 1.828125, |
| "learning_rate": 7.037827329111313e-06, |
| "loss": 1.0185998916625976, |
| "step": 3550, |
| "token_acc": 0.6794846010484963 |
| }, |
| { |
| "epoch": 0.6168662155127538, |
| "grad_norm": 1.8671875, |
| "learning_rate": 7.010431818542298e-06, |
| "loss": 1.0109454154968263, |
| "step": 3555, |
| "token_acc": 0.682662396471839 |
| }, |
| { |
| "epoch": 0.6177338191913934, |
| "grad_norm": 1.78125, |
| "learning_rate": 6.983060925155056e-06, |
| "loss": 0.990286922454834, |
| "step": 3560, |
| "token_acc": 0.6862885957035297 |
| }, |
| { |
| "epoch": 0.618601422870033, |
| "grad_norm": 1.84375, |
| "learning_rate": 6.955714874331388e-06, |
| "loss": 0.9858268737792969, |
| "step": 3565, |
| "token_acc": 0.6888843703402467 |
| }, |
| { |
| "epoch": 0.6194690265486725, |
| "grad_norm": 1.8203125, |
| "learning_rate": 6.928393891248529e-06, |
| "loss": 1.0141701698303223, |
| "step": 3570, |
| "token_acc": 0.6834784012484361 |
| }, |
| { |
| "epoch": 0.6203366302273121, |
| "grad_norm": 1.8125, |
| "learning_rate": 6.901098200877301e-06, |
| "loss": 0.967597770690918, |
| "step": 3575, |
| "token_acc": 0.6944241377018675 |
| }, |
| { |
| "epoch": 0.6212042339059518, |
| "grad_norm": 2.015625, |
| "learning_rate": 6.873828027980256e-06, |
| "loss": 0.9855113983154297, |
| "step": 3580, |
| "token_acc": 0.6862524757342923 |
| }, |
| { |
| "epoch": 0.6220718375845914, |
| "grad_norm": 1.9140625, |
| "learning_rate": 6.846583597109817e-06, |
| "loss": 0.9973045349121094, |
| "step": 3585, |
| "token_acc": 0.684243293722762 |
| }, |
| { |
| "epoch": 0.622939441263231, |
| "grad_norm": 1.9140625, |
| "learning_rate": 6.819365132606459e-06, |
| "loss": 0.9799047470092773, |
| "step": 3590, |
| "token_acc": 0.6892774554748672 |
| }, |
| { |
| "epoch": 0.6238070449418706, |
| "grad_norm": 1.75, |
| "learning_rate": 6.7921728585968215e-06, |
| "loss": 1.0055973052978515, |
| "step": 3595, |
| "token_acc": 0.6808834120188846 |
| }, |
| { |
| "epoch": 0.6246746486205101, |
| "grad_norm": 1.7265625, |
| "learning_rate": 6.765006998991889e-06, |
| "loss": 0.9758973121643066, |
| "step": 3600, |
| "token_acc": 0.6907637655417407 |
| }, |
| { |
| "epoch": 0.6255422522991497, |
| "grad_norm": 1.890625, |
| "learning_rate": 6.737867777485136e-06, |
| "loss": 1.0146740913391112, |
| "step": 3605, |
| "token_acc": 0.6811284150100989 |
| }, |
| { |
| "epoch": 0.6264098559777893, |
| "grad_norm": 1.90625, |
| "learning_rate": 6.710755417550698e-06, |
| "loss": 0.9987593650817871, |
| "step": 3610, |
| "token_acc": 0.6847243880941081 |
| }, |
| { |
| "epoch": 0.6272774596564289, |
| "grad_norm": 1.953125, |
| "learning_rate": 6.683670142441514e-06, |
| "loss": 0.9979434013366699, |
| "step": 3615, |
| "token_acc": 0.6853054139387396 |
| }, |
| { |
| "epoch": 0.6281450633350686, |
| "grad_norm": 1.8203125, |
| "learning_rate": 6.6566121751875e-06, |
| "loss": 0.9827108383178711, |
| "step": 3620, |
| "token_acc": 0.6896419200084816 |
| }, |
| { |
| "epoch": 0.6290126670137082, |
| "grad_norm": 1.875, |
| "learning_rate": 6.6295817385937104e-06, |
| "loss": 0.9979496002197266, |
| "step": 3625, |
| "token_acc": 0.6842961073185775 |
| }, |
| { |
| "epoch": 0.6298802706923478, |
| "grad_norm": 1.921875, |
| "learning_rate": 6.602579055238501e-06, |
| "loss": 0.9886339187622071, |
| "step": 3630, |
| "token_acc": 0.6858520767782801 |
| }, |
| { |
| "epoch": 0.6307478743709873, |
| "grad_norm": 1.9375, |
| "learning_rate": 6.575604347471696e-06, |
| "loss": 1.0002639770507813, |
| "step": 3635, |
| "token_acc": 0.6843852893576651 |
| }, |
| { |
| "epoch": 0.6316154780496269, |
| "grad_norm": 1.8984375, |
| "learning_rate": 6.548657837412764e-06, |
| "loss": 0.9971570014953614, |
| "step": 3640, |
| "token_acc": 0.6848995111352526 |
| }, |
| { |
| "epoch": 0.6324830817282665, |
| "grad_norm": 1.90625, |
| "learning_rate": 6.5217397469489765e-06, |
| "loss": 0.9921416282653809, |
| "step": 3645, |
| "token_acc": 0.6852397462075014 |
| }, |
| { |
| "epoch": 0.6333506854069061, |
| "grad_norm": 1.7421875, |
| "learning_rate": 6.494850297733591e-06, |
| "loss": 1.0081979751586914, |
| "step": 3650, |
| "token_acc": 0.6827699225310147 |
| }, |
| { |
| "epoch": 0.6342182890855457, |
| "grad_norm": 1.953125, |
| "learning_rate": 6.467989711184021e-06, |
| "loss": 0.9944825172424316, |
| "step": 3655, |
| "token_acc": 0.6852892695976437 |
| }, |
| { |
| "epoch": 0.6350858927641854, |
| "grad_norm": 1.875, |
| "learning_rate": 6.4411582084800215e-06, |
| "loss": 0.9934005737304688, |
| "step": 3660, |
| "token_acc": 0.6851075268817204 |
| }, |
| { |
| "epoch": 0.635953496442825, |
| "grad_norm": 1.90625, |
| "learning_rate": 6.414356010561853e-06, |
| "loss": 0.9901107788085938, |
| "step": 3665, |
| "token_acc": 0.6862788024738656 |
| }, |
| { |
| "epoch": 0.6368211001214645, |
| "grad_norm": 1.8359375, |
| "learning_rate": 6.387583338128471e-06, |
| "loss": 1.0017055511474608, |
| "step": 3670, |
| "token_acc": 0.6829901814126799 |
| }, |
| { |
| "epoch": 0.6376887038001041, |
| "grad_norm": 1.84375, |
| "learning_rate": 6.3608404116357096e-06, |
| "loss": 1.0016436576843262, |
| "step": 3675, |
| "token_acc": 0.6823521311023893 |
| }, |
| { |
| "epoch": 0.6385563074787437, |
| "grad_norm": 1.8515625, |
| "learning_rate": 6.334127451294461e-06, |
| "loss": 0.995360279083252, |
| "step": 3680, |
| "token_acc": 0.6857756640635555 |
| }, |
| { |
| "epoch": 0.6394239111573833, |
| "grad_norm": 1.8984375, |
| "learning_rate": 6.307444677068869e-06, |
| "loss": 1.0071782112121581, |
| "step": 3685, |
| "token_acc": 0.6841917710589074 |
| }, |
| { |
| "epoch": 0.6402915148360229, |
| "grad_norm": 1.7890625, |
| "learning_rate": 6.280792308674512e-06, |
| "loss": 0.9938779830932617, |
| "step": 3690, |
| "token_acc": 0.6878277558523004 |
| }, |
| { |
| "epoch": 0.6411591185146625, |
| "grad_norm": 1.9140625, |
| "learning_rate": 6.254170565576596e-06, |
| "loss": 0.9867862701416016, |
| "step": 3695, |
| "token_acc": 0.6865127083902706 |
| }, |
| { |
| "epoch": 0.6420267221933021, |
| "grad_norm": 1.828125, |
| "learning_rate": 6.227579666988149e-06, |
| "loss": 0.9970829010009765, |
| "step": 3700, |
| "token_acc": 0.6850032654838358 |
| }, |
| { |
| "epoch": 0.6428943258719417, |
| "grad_norm": 1.84375, |
| "learning_rate": 6.201019831868209e-06, |
| "loss": 0.9995267868041993, |
| "step": 3705, |
| "token_acc": 0.6846874095894374 |
| }, |
| { |
| "epoch": 0.6437619295505813, |
| "grad_norm": 1.84375, |
| "learning_rate": 6.174491278920034e-06, |
| "loss": 0.9917936325073242, |
| "step": 3710, |
| "token_acc": 0.6880288247439375 |
| }, |
| { |
| "epoch": 0.6446295332292209, |
| "grad_norm": 1.8046875, |
| "learning_rate": 6.147994226589287e-06, |
| "loss": 0.9787176132202149, |
| "step": 3715, |
| "token_acc": 0.6913672458526614 |
| }, |
| { |
| "epoch": 0.6454971369078605, |
| "grad_norm": 1.8359375, |
| "learning_rate": 6.121528893062246e-06, |
| "loss": 1.0017691612243653, |
| "step": 3720, |
| "token_acc": 0.6832628692610052 |
| }, |
| { |
| "epoch": 0.6463647405865001, |
| "grad_norm": 1.859375, |
| "learning_rate": 6.095095496264001e-06, |
| "loss": 0.997169303894043, |
| "step": 3725, |
| "token_acc": 0.6856908315278095 |
| }, |
| { |
| "epoch": 0.6472323442651396, |
| "grad_norm": 1.828125, |
| "learning_rate": 6.068694253856675e-06, |
| "loss": 0.9935990333557129, |
| "step": 3730, |
| "token_acc": 0.6857996759957544 |
| }, |
| { |
| "epoch": 0.6480999479437792, |
| "grad_norm": 1.9296875, |
| "learning_rate": 6.04232538323761e-06, |
| "loss": 0.995047664642334, |
| "step": 3735, |
| "token_acc": 0.6855156587473002 |
| }, |
| { |
| "epoch": 0.6489675516224189, |
| "grad_norm": 1.8671875, |
| "learning_rate": 6.015989101537586e-06, |
| "loss": 0.9964488983154297, |
| "step": 3740, |
| "token_acc": 0.6852130600180629 |
| }, |
| { |
| "epoch": 0.6498351553010585, |
| "grad_norm": 1.8046875, |
| "learning_rate": 5.989685625619039e-06, |
| "loss": 1.001780128479004, |
| "step": 3745, |
| "token_acc": 0.6852903955410754 |
| }, |
| { |
| "epoch": 0.6507027589796981, |
| "grad_norm": 1.8359375, |
| "learning_rate": 5.963415172074272e-06, |
| "loss": 0.9760993003845215, |
| "step": 3750, |
| "token_acc": 0.6886531679352932 |
| }, |
| { |
| "epoch": 0.6515703626583377, |
| "grad_norm": 1.84375, |
| "learning_rate": 5.937177957223661e-06, |
| "loss": 0.9900795936584472, |
| "step": 3755, |
| "token_acc": 0.6872612410739596 |
| }, |
| { |
| "epoch": 0.6524379663369773, |
| "grad_norm": 1.84375, |
| "learning_rate": 5.910974197113892e-06, |
| "loss": 1.001762866973877, |
| "step": 3760, |
| "token_acc": 0.6836190449665084 |
| }, |
| { |
| "epoch": 0.6533055700156168, |
| "grad_norm": 1.765625, |
| "learning_rate": 5.884804107516169e-06, |
| "loss": 0.9720080375671387, |
| "step": 3765, |
| "token_acc": 0.6952360976377127 |
| }, |
| { |
| "epoch": 0.6541731736942564, |
| "grad_norm": 1.890625, |
| "learning_rate": 5.858667903924439e-06, |
| "loss": 0.984315013885498, |
| "step": 3770, |
| "token_acc": 0.6863298561396332 |
| }, |
| { |
| "epoch": 0.655040777372896, |
| "grad_norm": 1.796875, |
| "learning_rate": 5.8325658015536205e-06, |
| "loss": 1.002072525024414, |
| "step": 3775, |
| "token_acc": 0.6841665768774916 |
| }, |
| { |
| "epoch": 0.6559083810515357, |
| "grad_norm": 1.875, |
| "learning_rate": 5.8064980153378335e-06, |
| "loss": 0.9898612976074219, |
| "step": 3780, |
| "token_acc": 0.686063766347234 |
| }, |
| { |
| "epoch": 0.6567759847301753, |
| "grad_norm": 1.84375, |
| "learning_rate": 5.780464759928623e-06, |
| "loss": 1.0027915000915528, |
| "step": 3785, |
| "token_acc": 0.6840225269854513 |
| }, |
| { |
| "epoch": 0.6576435884088149, |
| "grad_norm": 1.8984375, |
| "learning_rate": 5.7544662496931935e-06, |
| "loss": 0.9923629760742188, |
| "step": 3790, |
| "token_acc": 0.6860443020793746 |
| }, |
| { |
| "epoch": 0.6585111920874545, |
| "grad_norm": 1.8828125, |
| "learning_rate": 5.7285026987126526e-06, |
| "loss": 1.0032987594604492, |
| "step": 3795, |
| "token_acc": 0.683890081813487 |
| }, |
| { |
| "epoch": 0.659378795766094, |
| "grad_norm": 1.8203125, |
| "learning_rate": 5.7025743207802345e-06, |
| "loss": 1.0057662963867187, |
| "step": 3800, |
| "token_acc": 0.6835548723113827 |
| }, |
| { |
| "epoch": 0.6602463994447336, |
| "grad_norm": 1.8125, |
| "learning_rate": 5.676681329399543e-06, |
| "loss": 0.9910049438476562, |
| "step": 3805, |
| "token_acc": 0.6876422267858134 |
| }, |
| { |
| "epoch": 0.6611140031233732, |
| "grad_norm": 1.8359375, |
| "learning_rate": 5.650823937782803e-06, |
| "loss": 1.0060483932495117, |
| "step": 3810, |
| "token_acc": 0.6838536439827497 |
| }, |
| { |
| "epoch": 0.6619816068020128, |
| "grad_norm": 1.7890625, |
| "learning_rate": 5.625002358849096e-06, |
| "loss": 0.9882902145385742, |
| "step": 3815, |
| "token_acc": 0.6876298080917173 |
| }, |
| { |
| "epoch": 0.6628492104806525, |
| "grad_norm": 1.7734375, |
| "learning_rate": 5.599216805222609e-06, |
| "loss": 0.9882322311401367, |
| "step": 3820, |
| "token_acc": 0.6853805976085054 |
| }, |
| { |
| "epoch": 0.6637168141592921, |
| "grad_norm": 1.8203125, |
| "learning_rate": 5.573467489230879e-06, |
| "loss": 1.0068046569824218, |
| "step": 3825, |
| "token_acc": 0.6833182949170152 |
| }, |
| { |
| "epoch": 0.6645844178379317, |
| "grad_norm": 1.796875, |
| "learning_rate": 5.547754622903059e-06, |
| "loss": 0.995240306854248, |
| "step": 3830, |
| "token_acc": 0.6859673775279661 |
| }, |
| { |
| "epoch": 0.6654520215165712, |
| "grad_norm": 1.84375, |
| "learning_rate": 5.522078417968151e-06, |
| "loss": 0.9991961479187011, |
| "step": 3835, |
| "token_acc": 0.6834129511677283 |
| }, |
| { |
| "epoch": 0.6663196251952108, |
| "grad_norm": 1.7421875, |
| "learning_rate": 5.496439085853282e-06, |
| "loss": 0.9904547691345215, |
| "step": 3840, |
| "token_acc": 0.6883501895504571 |
| }, |
| { |
| "epoch": 0.6671872288738504, |
| "grad_norm": 1.8515625, |
| "learning_rate": 5.470836837681955e-06, |
| "loss": 0.9769336700439453, |
| "step": 3845, |
| "token_acc": 0.6896110755886686 |
| }, |
| { |
| "epoch": 0.66805483255249, |
| "grad_norm": 1.8984375, |
| "learning_rate": 5.445271884272303e-06, |
| "loss": 1.0078944206237792, |
| "step": 3850, |
| "token_acc": 0.6813604508440128 |
| }, |
| { |
| "epoch": 0.6689224362311296, |
| "grad_norm": 1.8515625, |
| "learning_rate": 5.4197444361353675e-06, |
| "loss": 1.0107319831848145, |
| "step": 3855, |
| "token_acc": 0.681410079867805 |
| }, |
| { |
| "epoch": 0.6697900399097693, |
| "grad_norm": 1.828125, |
| "learning_rate": 5.394254703473354e-06, |
| "loss": 0.964967918395996, |
| "step": 3860, |
| "token_acc": 0.6943124165554072 |
| }, |
| { |
| "epoch": 0.6706576435884088, |
| "grad_norm": 1.890625, |
| "learning_rate": 5.368802896177911e-06, |
| "loss": 0.9789441108703614, |
| "step": 3865, |
| "token_acc": 0.6884867885627476 |
| }, |
| { |
| "epoch": 0.6715252472670484, |
| "grad_norm": 1.8828125, |
| "learning_rate": 5.343389223828392e-06, |
| "loss": 0.9796417236328125, |
| "step": 3870, |
| "token_acc": 0.6890180582340962 |
| }, |
| { |
| "epoch": 0.672392850945688, |
| "grad_norm": 1.8828125, |
| "learning_rate": 5.318013895690131e-06, |
| "loss": 0.9787491798400879, |
| "step": 3875, |
| "token_acc": 0.6898387987482578 |
| }, |
| { |
| "epoch": 0.6732604546243276, |
| "grad_norm": 1.9375, |
| "learning_rate": 5.292677120712726e-06, |
| "loss": 0.9852935791015625, |
| "step": 3880, |
| "token_acc": 0.6870102408889257 |
| }, |
| { |
| "epoch": 0.6741280583029672, |
| "grad_norm": 1.9921875, |
| "learning_rate": 5.267379107528311e-06, |
| "loss": 0.9924633026123046, |
| "step": 3885, |
| "token_acc": 0.6847909474491753 |
| }, |
| { |
| "epoch": 0.6749956619816068, |
| "grad_norm": 1.859375, |
| "learning_rate": 5.242120064449845e-06, |
| "loss": 0.9971447944641113, |
| "step": 3890, |
| "token_acc": 0.6849295083489171 |
| }, |
| { |
| "epoch": 0.6758632656602463, |
| "grad_norm": 1.7890625, |
| "learning_rate": 5.216900199469391e-06, |
| "loss": 0.9826061248779296, |
| "step": 3895, |
| "token_acc": 0.6904354672313623 |
| }, |
| { |
| "epoch": 0.676730869338886, |
| "grad_norm": 1.859375, |
| "learning_rate": 5.191719720256407e-06, |
| "loss": 0.9958490371704102, |
| "step": 3900, |
| "token_acc": 0.6858113156286083 |
| }, |
| { |
| "epoch": 0.6775984730175256, |
| "grad_norm": 1.796875, |
| "learning_rate": 5.166578834156031e-06, |
| "loss": 0.9950273513793946, |
| "step": 3905, |
| "token_acc": 0.6859179612865821 |
| }, |
| { |
| "epoch": 0.6784660766961652, |
| "grad_norm": 1.859375, |
| "learning_rate": 5.14147774818738e-06, |
| "loss": 0.990997314453125, |
| "step": 3910, |
| "token_acc": 0.6872399539201735 |
| }, |
| { |
| "epoch": 0.6793336803748048, |
| "grad_norm": 1.921875, |
| "learning_rate": 5.1164166690418435e-06, |
| "loss": 0.9976764678955078, |
| "step": 3915, |
| "token_acc": 0.6839109763660167 |
| }, |
| { |
| "epoch": 0.6802012840534444, |
| "grad_norm": 1.8125, |
| "learning_rate": 5.091395803081376e-06, |
| "loss": 0.980461311340332, |
| "step": 3920, |
| "token_acc": 0.6900414130464994 |
| }, |
| { |
| "epoch": 0.681068887732084, |
| "grad_norm": 1.8125, |
| "learning_rate": 5.066415356336807e-06, |
| "loss": 1.005615234375, |
| "step": 3925, |
| "token_acc": 0.681454565176126 |
| }, |
| { |
| "epoch": 0.6819364914107235, |
| "grad_norm": 2.015625, |
| "learning_rate": 5.041475534506131e-06, |
| "loss": 0.993968391418457, |
| "step": 3930, |
| "token_acc": 0.6851136910077625 |
| }, |
| { |
| "epoch": 0.6828040950893631, |
| "grad_norm": 1.9453125, |
| "learning_rate": 5.01657654295284e-06, |
| "loss": 1.0097810745239257, |
| "step": 3935, |
| "token_acc": 0.6827556629888105 |
| }, |
| { |
| "epoch": 0.6836716987680028, |
| "grad_norm": 1.6953125, |
| "learning_rate": 4.991718586704196e-06, |
| "loss": 0.9924948692321778, |
| "step": 3940, |
| "token_acc": 0.6904830287206266 |
| }, |
| { |
| "epoch": 0.6845393024466424, |
| "grad_norm": 1.9296875, |
| "learning_rate": 4.9669018704495696e-06, |
| "loss": 0.9993215560913086, |
| "step": 3945, |
| "token_acc": 0.6851340222617751 |
| }, |
| { |
| "epoch": 0.685406906125282, |
| "grad_norm": 1.796875, |
| "learning_rate": 4.9421265985387475e-06, |
| "loss": 0.9833191871643067, |
| "step": 3950, |
| "token_acc": 0.6883911507101707 |
| }, |
| { |
| "epoch": 0.6862745098039216, |
| "grad_norm": 1.9296875, |
| "learning_rate": 4.9173929749802465e-06, |
| "loss": 1.0078816413879395, |
| "step": 3955, |
| "token_acc": 0.6850704225352112 |
| }, |
| { |
| "epoch": 0.6871421134825612, |
| "grad_norm": 1.875, |
| "learning_rate": 4.892701203439635e-06, |
| "loss": 1.0204105377197266, |
| "step": 3960, |
| "token_acc": 0.6796032157676348 |
| }, |
| { |
| "epoch": 0.6880097171612007, |
| "grad_norm": 1.9453125, |
| "learning_rate": 4.868051487237858e-06, |
| "loss": 0.973170280456543, |
| "step": 3965, |
| "token_acc": 0.6899492217684071 |
| }, |
| { |
| "epoch": 0.6888773208398403, |
| "grad_norm": 1.7421875, |
| "learning_rate": 4.843444029349564e-06, |
| "loss": 0.9647638320922851, |
| "step": 3970, |
| "token_acc": 0.6957806900520547 |
| }, |
| { |
| "epoch": 0.6897449245184799, |
| "grad_norm": 1.890625, |
| "learning_rate": 4.8188790324014274e-06, |
| "loss": 0.9891746520996094, |
| "step": 3975, |
| "token_acc": 0.685054815133276 |
| }, |
| { |
| "epoch": 0.6906125281971196, |
| "grad_norm": 1.8828125, |
| "learning_rate": 4.794356698670488e-06, |
| "loss": 0.9468636512756348, |
| "step": 3980, |
| "token_acc": 0.6995367131713369 |
| }, |
| { |
| "epoch": 0.6914801318757592, |
| "grad_norm": 1.8359375, |
| "learning_rate": 4.769877230082476e-06, |
| "loss": 0.9977554321289063, |
| "step": 3985, |
| "token_acc": 0.6852840924340428 |
| }, |
| { |
| "epoch": 0.6923477355543988, |
| "grad_norm": 1.9140625, |
| "learning_rate": 4.74544082821016e-06, |
| "loss": 1.004736328125, |
| "step": 3990, |
| "token_acc": 0.6820171598669235 |
| }, |
| { |
| "epoch": 0.6932153392330384, |
| "grad_norm": 1.8828125, |
| "learning_rate": 4.721047694271676e-06, |
| "loss": 1.0017391204833985, |
| "step": 3995, |
| "token_acc": 0.684533952315144 |
| }, |
| { |
| "epoch": 0.6940829429116779, |
| "grad_norm": 1.8828125, |
| "learning_rate": 4.69669802912888e-06, |
| "loss": 0.9762969970703125, |
| "step": 4000, |
| "token_acc": 0.6934091245841135 |
| }, |
| { |
| "epoch": 0.6949505465903175, |
| "grad_norm": 1.7890625, |
| "learning_rate": 4.672392033285695e-06, |
| "loss": 1.0025498390197753, |
| "step": 4005, |
| "token_acc": 0.6851401316784188 |
| }, |
| { |
| "epoch": 0.6958181502689571, |
| "grad_norm": 1.875, |
| "learning_rate": 4.648129906886445e-06, |
| "loss": 1.0146098136901855, |
| "step": 4010, |
| "token_acc": 0.6792288989232372 |
| }, |
| { |
| "epoch": 0.6966857539475967, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.623911849714226e-06, |
| "loss": 1.0010202407836915, |
| "step": 4015, |
| "token_acc": 0.6856064118699079 |
| }, |
| { |
| "epoch": 0.6975533576262364, |
| "grad_norm": 1.8671875, |
| "learning_rate": 4.599738061189244e-06, |
| "loss": 1.0105598449707032, |
| "step": 4020, |
| "token_acc": 0.682280948032655 |
| }, |
| { |
| "epoch": 0.698420961304876, |
| "grad_norm": 1.890625, |
| "learning_rate": 4.575608740367189e-06, |
| "loss": 0.9960094451904297, |
| "step": 4025, |
| "token_acc": 0.6854422794662214 |
| }, |
| { |
| "epoch": 0.6992885649835155, |
| "grad_norm": 1.875, |
| "learning_rate": 4.551524085937582e-06, |
| "loss": 0.9695888519287109, |
| "step": 4030, |
| "token_acc": 0.6933236382866208 |
| }, |
| { |
| "epoch": 0.7001561686621551, |
| "grad_norm": 1.796875, |
| "learning_rate": 4.527484296222149e-06, |
| "loss": 0.9828217506408692, |
| "step": 4035, |
| "token_acc": 0.6878763576059919 |
| }, |
| { |
| "epoch": 0.7010237723407947, |
| "grad_norm": 1.8984375, |
| "learning_rate": 4.503489569173179e-06, |
| "loss": 0.9933969497680664, |
| "step": 4040, |
| "token_acc": 0.6868701758147513 |
| }, |
| { |
| "epoch": 0.7018913760194343, |
| "grad_norm": 1.8203125, |
| "learning_rate": 4.479540102371904e-06, |
| "loss": 1.0078033447265624, |
| "step": 4045, |
| "token_acc": 0.6846505259554748 |
| }, |
| { |
| "epoch": 0.7027589796980739, |
| "grad_norm": 1.7578125, |
| "learning_rate": 4.455636093026865e-06, |
| "loss": 0.9774109840393066, |
| "step": 4050, |
| "token_acc": 0.69020612269789 |
| }, |
| { |
| "epoch": 0.7036265833767135, |
| "grad_norm": 1.8125, |
| "learning_rate": 4.431777737972287e-06, |
| "loss": 0.9925678253173829, |
| "step": 4055, |
| "token_acc": 0.6882775426446069 |
| }, |
| { |
| "epoch": 0.7044941870553532, |
| "grad_norm": 1.796875, |
| "learning_rate": 4.4079652336664645e-06, |
| "loss": 0.9903898239135742, |
| "step": 4060, |
| "token_acc": 0.6858611892801725 |
| }, |
| { |
| "epoch": 0.7053617907339927, |
| "grad_norm": 1.7734375, |
| "learning_rate": 4.384198776190137e-06, |
| "loss": 0.9989794731140137, |
| "step": 4065, |
| "token_acc": 0.6841588232951453 |
| }, |
| { |
| "epoch": 0.7062293944126323, |
| "grad_norm": 1.859375, |
| "learning_rate": 4.360478561244885e-06, |
| "loss": 0.982159423828125, |
| "step": 4070, |
| "token_acc": 0.6892994694174365 |
| }, |
| { |
| "epoch": 0.7070969980912719, |
| "grad_norm": 1.84375, |
| "learning_rate": 4.336804784151505e-06, |
| "loss": 0.9847228050231933, |
| "step": 4075, |
| "token_acc": 0.6867390010281942 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 1.8359375, |
| "learning_rate": 4.313177639848408e-06, |
| "loss": 1.0108787536621093, |
| "step": 4080, |
| "token_acc": 0.6838802388894016 |
| }, |
| { |
| "epoch": 0.7088322054485511, |
| "grad_norm": 1.90625, |
| "learning_rate": 4.2895973228900154e-06, |
| "loss": 0.9985545158386231, |
| "step": 4085, |
| "token_acc": 0.6844401828768361 |
| }, |
| { |
| "epoch": 0.7096998091271907, |
| "grad_norm": 1.859375, |
| "learning_rate": 4.2660640274451545e-06, |
| "loss": 0.9826979637145996, |
| "step": 4090, |
| "token_acc": 0.6863773965691221 |
| }, |
| { |
| "epoch": 0.7105674128058302, |
| "grad_norm": 1.84375, |
| "learning_rate": 4.242577947295462e-06, |
| "loss": 0.9989730834960937, |
| "step": 4095, |
| "token_acc": 0.6846098407914565 |
| }, |
| { |
| "epoch": 0.7114350164844699, |
| "grad_norm": 1.828125, |
| "learning_rate": 4.219139275833783e-06, |
| "loss": 1.000558090209961, |
| "step": 4100, |
| "token_acc": 0.6841488044823767 |
| }, |
| { |
| "epoch": 0.7123026201631095, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.1957482060625865e-06, |
| "loss": 0.9966065406799316, |
| "step": 4105, |
| "token_acc": 0.6879207664422579 |
| }, |
| { |
| "epoch": 0.7131702238417491, |
| "grad_norm": 1.8203125, |
| "learning_rate": 4.172404930592372e-06, |
| "loss": 0.9852560997009278, |
| "step": 4110, |
| "token_acc": 0.6896971139227118 |
| }, |
| { |
| "epoch": 0.7140378275203887, |
| "grad_norm": 1.765625, |
| "learning_rate": 4.149109641640079e-06, |
| "loss": 1.001215362548828, |
| "step": 4115, |
| "token_acc": 0.6853088591189812 |
| }, |
| { |
| "epoch": 0.7149054311990283, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.1258625310275145e-06, |
| "loss": 1.0101828575134277, |
| "step": 4120, |
| "token_acc": 0.681881495767706 |
| }, |
| { |
| "epoch": 0.7157730348776679, |
| "grad_norm": 1.8828125, |
| "learning_rate": 4.102663790179764e-06, |
| "loss": 0.9940977096557617, |
| "step": 4125, |
| "token_acc": 0.6862203534229258 |
| }, |
| { |
| "epoch": 0.7166406385563074, |
| "grad_norm": 1.8984375, |
| "learning_rate": 4.079513610123619e-06, |
| "loss": 0.9920468330383301, |
| "step": 4130, |
| "token_acc": 0.6872316721917288 |
| }, |
| { |
| "epoch": 0.717508242234947, |
| "grad_norm": 1.7734375, |
| "learning_rate": 4.056412181486003e-06, |
| "loss": 0.9854813575744629, |
| "step": 4135, |
| "token_acc": 0.6900252525252525 |
| }, |
| { |
| "epoch": 0.7183758459135867, |
| "grad_norm": 1.953125, |
| "learning_rate": 4.033359694492411e-06, |
| "loss": 0.9985934257507324, |
| "step": 4140, |
| "token_acc": 0.6840057676088909 |
| }, |
| { |
| "epoch": 0.7192434495922263, |
| "grad_norm": 1.8046875, |
| "learning_rate": 4.010356338965323e-06, |
| "loss": 0.9948851585388183, |
| "step": 4145, |
| "token_acc": 0.6854739461477084 |
| }, |
| { |
| "epoch": 0.7201110532708659, |
| "grad_norm": 1.96875, |
| "learning_rate": 3.98740230432266e-06, |
| "loss": 0.9762655258178711, |
| "step": 4150, |
| "token_acc": 0.6923338872694581 |
| }, |
| { |
| "epoch": 0.7209786569495055, |
| "grad_norm": 1.9140625, |
| "learning_rate": 3.9644977795762175e-06, |
| "loss": 0.988780403137207, |
| "step": 4155, |
| "token_acc": 0.6866318047733977 |
| }, |
| { |
| "epoch": 0.721846260628145, |
| "grad_norm": 1.953125, |
| "learning_rate": 3.941642953330102e-06, |
| "loss": 0.9889546394348144, |
| "step": 4160, |
| "token_acc": 0.6865713642503377 |
| }, |
| { |
| "epoch": 0.7227138643067846, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.9188380137791934e-06, |
| "loss": 0.9839936256408691, |
| "step": 4165, |
| "token_acc": 0.6882079424724933 |
| }, |
| { |
| "epoch": 0.7235814679854242, |
| "grad_norm": 1.8984375, |
| "learning_rate": 3.896083148707579e-06, |
| "loss": 0.9844943046569824, |
| "step": 4170, |
| "token_acc": 0.6876788477073265 |
| }, |
| { |
| "epoch": 0.7244490716640638, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.87337854548702e-06, |
| "loss": 0.9963854789733887, |
| "step": 4175, |
| "token_acc": 0.6848597774936757 |
| }, |
| { |
| "epoch": 0.7253166753427035, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.8507243910754015e-06, |
| "loss": 1.0020368576049805, |
| "step": 4180, |
| "token_acc": 0.6843103494659816 |
| }, |
| { |
| "epoch": 0.7261842790213431, |
| "grad_norm": 1.8671875, |
| "learning_rate": 3.828120872015193e-06, |
| "loss": 1.0066667556762696, |
| "step": 4185, |
| "token_acc": 0.6836107139967091 |
| }, |
| { |
| "epoch": 0.7270518826999827, |
| "grad_norm": 1.9140625, |
| "learning_rate": 3.8055681744319173e-06, |
| "loss": 1.0011329650878906, |
| "step": 4190, |
| "token_acc": 0.6850806824639539 |
| }, |
| { |
| "epoch": 0.7279194863786222, |
| "grad_norm": 1.8828125, |
| "learning_rate": 3.783066484032615e-06, |
| "loss": 1.0011292457580567, |
| "step": 4195, |
| "token_acc": 0.6820659087561429 |
| }, |
| { |
| "epoch": 0.7287870900572618, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.7606159861043123e-06, |
| "loss": 1.0115188598632812, |
| "step": 4200, |
| "token_acc": 0.6830158518715865 |
| }, |
| { |
| "epoch": 0.7296546937359014, |
| "grad_norm": 1.8125, |
| "learning_rate": 3.738216865512496e-06, |
| "loss": 0.9878059387207031, |
| "step": 4205, |
| "token_acc": 0.6888757571280839 |
| }, |
| { |
| "epoch": 0.730522297414541, |
| "grad_norm": 1.8046875, |
| "learning_rate": 3.7158693066996066e-06, |
| "loss": 0.9820815086364746, |
| "step": 4210, |
| "token_acc": 0.6891283735961433 |
| }, |
| { |
| "epoch": 0.7313899010931806, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.69357349368349e-06, |
| "loss": 1.001258373260498, |
| "step": 4215, |
| "token_acc": 0.6836823676196354 |
| }, |
| { |
| "epoch": 0.7322575047718203, |
| "grad_norm": 1.796875, |
| "learning_rate": 3.6713296100559084e-06, |
| "loss": 1.0037827491760254, |
| "step": 4220, |
| "token_acc": 0.6847890011370023 |
| }, |
| { |
| "epoch": 0.7331251084504599, |
| "grad_norm": 1.9296875, |
| "learning_rate": 3.649137838981014e-06, |
| "loss": 0.9784846305847168, |
| "step": 4225, |
| "token_acc": 0.6925998220664197 |
| }, |
| { |
| "epoch": 0.7339927121290994, |
| "grad_norm": 1.9453125, |
| "learning_rate": 3.6269983631938476e-06, |
| "loss": 0.98970947265625, |
| "step": 4230, |
| "token_acc": 0.6865225040519761 |
| }, |
| { |
| "epoch": 0.734860315807739, |
| "grad_norm": 1.828125, |
| "learning_rate": 3.604911364998832e-06, |
| "loss": 1.0065629005432128, |
| "step": 4235, |
| "token_acc": 0.6821462879099767 |
| }, |
| { |
| "epoch": 0.7357279194863786, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.582877026268269e-06, |
| "loss": 1.0006741523742675, |
| "step": 4240, |
| "token_acc": 0.6835158700622043 |
| }, |
| { |
| "epoch": 0.7365955231650182, |
| "grad_norm": 1.875, |
| "learning_rate": 3.560895528440844e-06, |
| "loss": 0.9968295097351074, |
| "step": 4245, |
| "token_acc": 0.6860261131570137 |
| }, |
| { |
| "epoch": 0.7374631268436578, |
| "grad_norm": 1.8984375, |
| "learning_rate": 3.5389670525201335e-06, |
| "loss": 0.994806957244873, |
| "step": 4250, |
| "token_acc": 0.6844711335861778 |
| }, |
| { |
| "epoch": 0.7383307305222974, |
| "grad_norm": 1.9296875, |
| "learning_rate": 3.5170917790731084e-06, |
| "loss": 0.9853558540344238, |
| "step": 4255, |
| "token_acc": 0.690081677065686 |
| }, |
| { |
| "epoch": 0.7391983342009371, |
| "grad_norm": 1.796875, |
| "learning_rate": 3.4952698882286564e-06, |
| "loss": 1.002675437927246, |
| "step": 4260, |
| "token_acc": 0.6839060402684564 |
| }, |
| { |
| "epoch": 0.7400659378795766, |
| "grad_norm": 1.890625, |
| "learning_rate": 3.473501559676088e-06, |
| "loss": 1.006124496459961, |
| "step": 4265, |
| "token_acc": 0.6798628939749822 |
| }, |
| { |
| "epoch": 0.7409335415582162, |
| "grad_norm": 1.875, |
| "learning_rate": 3.4517869726636667e-06, |
| "loss": 0.9663874626159668, |
| "step": 4270, |
| "token_acc": 0.6933329657757991 |
| }, |
| { |
| "epoch": 0.7418011452368558, |
| "grad_norm": 1.90625, |
| "learning_rate": 3.4301263059971234e-06, |
| "loss": 0.9783464431762695, |
| "step": 4275, |
| "token_acc": 0.690311533509431 |
| }, |
| { |
| "epoch": 0.7426687489154954, |
| "grad_norm": 1.90625, |
| "learning_rate": 3.408519738038202e-06, |
| "loss": 0.9907986640930175, |
| "step": 4280, |
| "token_acc": 0.6858886450905102 |
| }, |
| { |
| "epoch": 0.743536352594135, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.3869674467031633e-06, |
| "loss": 0.9949624061584472, |
| "step": 4285, |
| "token_acc": 0.6854110544056531 |
| }, |
| { |
| "epoch": 0.7444039562727746, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.3654696094613424e-06, |
| "loss": 1.0062894821166992, |
| "step": 4290, |
| "token_acc": 0.68370965995235 |
| }, |
| { |
| "epoch": 0.7452715599514141, |
| "grad_norm": 1.890625, |
| "learning_rate": 3.3440264033336787e-06, |
| "loss": 0.9806596755981445, |
| "step": 4295, |
| "token_acc": 0.6898993765722411 |
| }, |
| { |
| "epoch": 0.7461391636300538, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.3226380048912586e-06, |
| "loss": 0.9737249374389648, |
| "step": 4300, |
| "token_acc": 0.6907929820819113 |
| }, |
| { |
| "epoch": 0.7470067673086934, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.3013045902538634e-06, |
| "loss": 0.975331974029541, |
| "step": 4305, |
| "token_acc": 0.6905592319015476 |
| }, |
| { |
| "epoch": 0.747874370987333, |
| "grad_norm": 1.9609375, |
| "learning_rate": 3.2800263350885165e-06, |
| "loss": 0.9860298156738281, |
| "step": 4310, |
| "token_acc": 0.6891362690327527 |
| }, |
| { |
| "epoch": 0.7487419746659726, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.2588034146080404e-06, |
| "loss": 0.9883022308349609, |
| "step": 4315, |
| "token_acc": 0.6857588710224575 |
| }, |
| { |
| "epoch": 0.7496095783446122, |
| "grad_norm": 1.828125, |
| "learning_rate": 3.2376360035696085e-06, |
| "loss": 1.0138681411743165, |
| "step": 4320, |
| "token_acc": 0.6818151051185206 |
| }, |
| { |
| "epoch": 0.7504771820232518, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.216524276273313e-06, |
| "loss": 1.0137529373168945, |
| "step": 4325, |
| "token_acc": 0.6836542657647829 |
| }, |
| { |
| "epoch": 0.7513447857018913, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.1954684065607232e-06, |
| "loss": 0.9806119918823242, |
| "step": 4330, |
| "token_acc": 0.6892710892710893 |
| }, |
| { |
| "epoch": 0.7522123893805309, |
| "grad_norm": 1.8828125, |
| "learning_rate": 3.174468567813461e-06, |
| "loss": 1.0116167068481445, |
| "step": 4335, |
| "token_acc": 0.6821417273014869 |
| }, |
| { |
| "epoch": 0.7530799930591706, |
| "grad_norm": 1.921875, |
| "learning_rate": 3.1535249329517603e-06, |
| "loss": 1.0085960388183595, |
| "step": 4340, |
| "token_acc": 0.6825007871428767 |
| }, |
| { |
| "epoch": 0.7539475967378102, |
| "grad_norm": 1.8984375, |
| "learning_rate": 3.1326376744330667e-06, |
| "loss": 0.9790970802307128, |
| "step": 4345, |
| "token_acc": 0.689821249191562 |
| }, |
| { |
| "epoch": 0.7548152004164498, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.1118069642505886e-06, |
| "loss": 0.9997638702392578, |
| "step": 4350, |
| "token_acc": 0.6841588385994876 |
| }, |
| { |
| "epoch": 0.7556828040950894, |
| "grad_norm": 1.8125, |
| "learning_rate": 3.0910329739319033e-06, |
| "loss": 0.9993162155151367, |
| "step": 4355, |
| "token_acc": 0.6841426321221009 |
| }, |
| { |
| "epoch": 0.756550407773729, |
| "grad_norm": 1.9140625, |
| "learning_rate": 3.0703158745375316e-06, |
| "loss": 0.9740482330322265, |
| "step": 4360, |
| "token_acc": 0.6929492242406393 |
| }, |
| { |
| "epoch": 0.7574180114523685, |
| "grad_norm": 1.9140625, |
| "learning_rate": 3.0496558366595364e-06, |
| "loss": 0.9911387443542481, |
| "step": 4365, |
| "token_acc": 0.6877242474870587 |
| }, |
| { |
| "epoch": 0.7582856151310081, |
| "grad_norm": 1.8359375, |
| "learning_rate": 3.029053030420115e-06, |
| "loss": 1.001497173309326, |
| "step": 4370, |
| "token_acc": 0.6854735659622271 |
| }, |
| { |
| "epoch": 0.7591532188096477, |
| "grad_norm": 1.8828125, |
| "learning_rate": 3.0085076254701983e-06, |
| "loss": 0.9972357749938965, |
| "step": 4375, |
| "token_acc": 0.6851984268859492 |
| }, |
| { |
| "epoch": 0.7600208224882874, |
| "grad_norm": 1.8125, |
| "learning_rate": 2.988019790988056e-06, |
| "loss": 0.990943431854248, |
| "step": 4380, |
| "token_acc": 0.6880961127665075 |
| }, |
| { |
| "epoch": 0.760888426166927, |
| "grad_norm": 1.8359375, |
| "learning_rate": 2.9675896956778984e-06, |
| "loss": 0.9964810371398926, |
| "step": 4385, |
| "token_acc": 0.6846022286951177 |
| }, |
| { |
| "epoch": 0.7617560298455666, |
| "grad_norm": 1.8671875, |
| "learning_rate": 2.947217507768495e-06, |
| "loss": 0.9866546630859375, |
| "step": 4390, |
| "token_acc": 0.6860718843921116 |
| }, |
| { |
| "epoch": 0.7626236335242061, |
| "grad_norm": 1.796875, |
| "learning_rate": 2.926903395011781e-06, |
| "loss": 0.9983717918395996, |
| "step": 4395, |
| "token_acc": 0.6877645635960492 |
| }, |
| { |
| "epoch": 0.7634912372028457, |
| "grad_norm": 1.875, |
| "learning_rate": 2.9066475246814828e-06, |
| "loss": 1.0109498977661133, |
| "step": 4400, |
| "token_acc": 0.6828291696597227 |
| }, |
| { |
| "epoch": 0.7643588408814853, |
| "grad_norm": 1.8046875, |
| "learning_rate": 2.886450063571735e-06, |
| "loss": 0.9692567825317383, |
| "step": 4405, |
| "token_acc": 0.6919382320189973 |
| }, |
| { |
| "epoch": 0.7652264445601249, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.86631117799571e-06, |
| "loss": 0.9896286964416504, |
| "step": 4410, |
| "token_acc": 0.6858808026192449 |
| }, |
| { |
| "epoch": 0.7660940482387645, |
| "grad_norm": 1.8515625, |
| "learning_rate": 2.8462310337842523e-06, |
| "loss": 0.9920248031616211, |
| "step": 4415, |
| "token_acc": 0.6869760785115518 |
| }, |
| { |
| "epoch": 0.7669616519174042, |
| "grad_norm": 1.921875, |
| "learning_rate": 2.8262097962845058e-06, |
| "loss": 1.0015531539916993, |
| "step": 4420, |
| "token_acc": 0.6852368826004658 |
| }, |
| { |
| "epoch": 0.7678292555960438, |
| "grad_norm": 1.8671875, |
| "learning_rate": 2.806247630358554e-06, |
| "loss": 1.0034663200378418, |
| "step": 4425, |
| "token_acc": 0.684781729446976 |
| }, |
| { |
| "epoch": 0.7686968592746833, |
| "grad_norm": 1.8828125, |
| "learning_rate": 2.7863447003820642e-06, |
| "loss": 0.9939127922058105, |
| "step": 4430, |
| "token_acc": 0.6864493951901668 |
| }, |
| { |
| "epoch": 0.7695644629533229, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.7665011702429357e-06, |
| "loss": 0.9952418327331543, |
| "step": 4435, |
| "token_acc": 0.6857203881939902 |
| }, |
| { |
| "epoch": 0.7704320666319625, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.746717203339946e-06, |
| "loss": 0.9777667045593261, |
| "step": 4440, |
| "token_acc": 0.6909912776054509 |
| }, |
| { |
| "epoch": 0.7712996703106021, |
| "grad_norm": 1.8671875, |
| "learning_rate": 2.7269929625814085e-06, |
| "loss": 1.0063211441040039, |
| "step": 4445, |
| "token_acc": 0.6833169581450187 |
| }, |
| { |
| "epoch": 0.7721672739892417, |
| "grad_norm": 1.859375, |
| "learning_rate": 2.7073286103838293e-06, |
| "loss": 1.0100595474243164, |
| "step": 4450, |
| "token_acc": 0.6820285638719915 |
| }, |
| { |
| "epoch": 0.7730348776678813, |
| "grad_norm": 1.9140625, |
| "learning_rate": 2.6877243086705716e-06, |
| "loss": 0.9833673477172852, |
| "step": 4455, |
| "token_acc": 0.6882294325611871 |
| }, |
| { |
| "epoch": 0.773902481346521, |
| "grad_norm": 1.859375, |
| "learning_rate": 2.6681802188705196e-06, |
| "loss": 0.9901654243469238, |
| "step": 4460, |
| "token_acc": 0.6868834587465766 |
| }, |
| { |
| "epoch": 0.7747700850251605, |
| "grad_norm": 1.921875, |
| "learning_rate": 2.6486965019167544e-06, |
| "loss": 0.9956707000732422, |
| "step": 4465, |
| "token_acc": 0.6848234865946864 |
| }, |
| { |
| "epoch": 0.7756376887038001, |
| "grad_norm": 1.8515625, |
| "learning_rate": 2.629273318245219e-06, |
| "loss": 0.9965853691101074, |
| "step": 4470, |
| "token_acc": 0.6839174626010737 |
| }, |
| { |
| "epoch": 0.7765052923824397, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.6099108277934105e-06, |
| "loss": 0.9861255645751953, |
| "step": 4475, |
| "token_acc": 0.6883442417490601 |
| }, |
| { |
| "epoch": 0.7773728960610793, |
| "grad_norm": 1.9453125, |
| "learning_rate": 2.590609189999049e-06, |
| "loss": 1.0013001441955567, |
| "step": 4480, |
| "token_acc": 0.6833337796941535 |
| }, |
| { |
| "epoch": 0.7782404997397189, |
| "grad_norm": 1.9453125, |
| "learning_rate": 2.5713685637987818e-06, |
| "loss": 0.9976703643798828, |
| "step": 4485, |
| "token_acc": 0.6877284595300261 |
| }, |
| { |
| "epoch": 0.7791081034183585, |
| "grad_norm": 1.953125, |
| "learning_rate": 2.5521891076268555e-06, |
| "loss": 0.9790729522705078, |
| "step": 4490, |
| "token_acc": 0.6894264797255315 |
| }, |
| { |
| "epoch": 0.779975707096998, |
| "grad_norm": 1.765625, |
| "learning_rate": 2.5330709794138254e-06, |
| "loss": 0.9921565055847168, |
| "step": 4495, |
| "token_acc": 0.684914119045047 |
| }, |
| { |
| "epoch": 0.7808433107756377, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.5140143365852476e-06, |
| "loss": 0.9999216079711915, |
| "step": 4500, |
| "token_acc": 0.6869082423624708 |
| }, |
| { |
| "epoch": 0.7817109144542773, |
| "grad_norm": 1.828125, |
| "learning_rate": 2.4950193360603868e-06, |
| "loss": 0.9970880508422851, |
| "step": 4505, |
| "token_acc": 0.6865171230142891 |
| }, |
| { |
| "epoch": 0.7825785181329169, |
| "grad_norm": 1.8828125, |
| "learning_rate": 2.4760861342509235e-06, |
| "loss": 0.9840543746948243, |
| "step": 4510, |
| "token_acc": 0.6897681822438032 |
| }, |
| { |
| "epoch": 0.7834461218115565, |
| "grad_norm": 1.8203125, |
| "learning_rate": 2.4572148870596636e-06, |
| "loss": 1.0138338088989258, |
| "step": 4515, |
| "token_acc": 0.6816099820996104 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.438405749879258e-06, |
| "loss": 1.0032525062561035, |
| "step": 4520, |
| "token_acc": 0.6844471121782046 |
| }, |
| { |
| "epoch": 0.7851813291688357, |
| "grad_norm": 1.8359375, |
| "learning_rate": 2.4196588775909204e-06, |
| "loss": 1.0106260299682617, |
| "step": 4525, |
| "token_acc": 0.6811335272101303 |
| }, |
| { |
| "epoch": 0.7860489328474752, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.4009744245631515e-06, |
| "loss": 0.9920726776123047, |
| "step": 4530, |
| "token_acc": 0.6867518931683088 |
| }, |
| { |
| "epoch": 0.7869165365261148, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.3823525446504735e-06, |
| "loss": 0.985197639465332, |
| "step": 4535, |
| "token_acc": 0.6879341219882787 |
| }, |
| { |
| "epoch": 0.7877841402047545, |
| "grad_norm": 1.9453125, |
| "learning_rate": 2.363793391192155e-06, |
| "loss": 0.9904392242431641, |
| "step": 4540, |
| "token_acc": 0.6878958715534904 |
| }, |
| { |
| "epoch": 0.7886517438833941, |
| "grad_norm": 1.8984375, |
| "learning_rate": 2.345297117010954e-06, |
| "loss": 0.9871200561523438, |
| "step": 4545, |
| "token_acc": 0.687506753106429 |
| }, |
| { |
| "epoch": 0.7895193475620337, |
| "grad_norm": 1.8359375, |
| "learning_rate": 2.3268638744118555e-06, |
| "loss": 0.9928851127624512, |
| "step": 4550, |
| "token_acc": 0.6844927026075655 |
| }, |
| { |
| "epoch": 0.7903869512406733, |
| "grad_norm": 1.8046875, |
| "learning_rate": 2.308493815180827e-06, |
| "loss": 0.974305534362793, |
| "step": 4555, |
| "token_acc": 0.6914228654424733 |
| }, |
| { |
| "epoch": 0.7912545549193128, |
| "grad_norm": 1.8359375, |
| "learning_rate": 2.2901870905835533e-06, |
| "loss": 1.0047635078430175, |
| "step": 4560, |
| "token_acc": 0.6855282218262464 |
| }, |
| { |
| "epoch": 0.7921221585979524, |
| "grad_norm": 1.8828125, |
| "learning_rate": 2.2719438513642023e-06, |
| "loss": 1.0162674903869628, |
| "step": 4565, |
| "token_acc": 0.6807637282560736 |
| }, |
| { |
| "epoch": 0.792989762276592, |
| "grad_norm": 1.859375, |
| "learning_rate": 2.25376424774418e-06, |
| "loss": 1.004638671875, |
| "step": 4570, |
| "token_acc": 0.6815402254920696 |
| }, |
| { |
| "epoch": 0.7938573659552316, |
| "grad_norm": 1.8515625, |
| "learning_rate": 2.2356484294208945e-06, |
| "loss": 0.9928275108337402, |
| "step": 4575, |
| "token_acc": 0.6833461637156787 |
| }, |
| { |
| "epoch": 0.7947249696338713, |
| "grad_norm": 1.8359375, |
| "learning_rate": 2.2175965455665225e-06, |
| "loss": 0.9946788787841797, |
| "step": 4580, |
| "token_acc": 0.6848246423935879 |
| }, |
| { |
| "epoch": 0.7955925733125109, |
| "grad_norm": 1.8515625, |
| "learning_rate": 2.1996087448267813e-06, |
| "loss": 0.9975082397460937, |
| "step": 4585, |
| "token_acc": 0.6866429591314421 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 1.8828125, |
| "learning_rate": 2.1816851753197023e-06, |
| "loss": 0.9974835395812989, |
| "step": 4590, |
| "token_acc": 0.6866237463087606 |
| }, |
| { |
| "epoch": 0.79732778066979, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.163825984634419e-06, |
| "loss": 1.0059508323669433, |
| "step": 4595, |
| "token_acc": 0.681298324742268 |
| }, |
| { |
| "epoch": 0.7981953843484296, |
| "grad_norm": 1.859375, |
| "learning_rate": 2.146031319829942e-06, |
| "loss": 0.9988635063171387, |
| "step": 4600, |
| "token_acc": 0.6838742790766779 |
| }, |
| { |
| "epoch": 0.7990629880270692, |
| "grad_norm": 1.765625, |
| "learning_rate": 2.1283013274339535e-06, |
| "loss": 0.9845050811767578, |
| "step": 4605, |
| "token_acc": 0.6879293681268448 |
| }, |
| { |
| "epoch": 0.7999305917057088, |
| "grad_norm": 1.921875, |
| "learning_rate": 2.110636153441602e-06, |
| "loss": 0.9597654342651367, |
| "step": 4610, |
| "token_acc": 0.696718661601875 |
| }, |
| { |
| "epoch": 0.8007981953843484, |
| "grad_norm": 1.875, |
| "learning_rate": 2.0930359433142934e-06, |
| "loss": 1.0043936729431153, |
| "step": 4615, |
| "token_acc": 0.6844164224450837 |
| }, |
| { |
| "epoch": 0.8016657990629881, |
| "grad_norm": 1.8359375, |
| "learning_rate": 2.0755008419785037e-06, |
| "loss": 1.0047181129455567, |
| "step": 4620, |
| "token_acc": 0.6838641217477737 |
| }, |
| { |
| "epoch": 0.8025334027416277, |
| "grad_norm": 1.90625, |
| "learning_rate": 2.058030993824577e-06, |
| "loss": 0.9849211692810058, |
| "step": 4625, |
| "token_acc": 0.6874467883833129 |
| }, |
| { |
| "epoch": 0.8034010064202672, |
| "grad_norm": 1.953125, |
| "learning_rate": 2.040626542705536e-06, |
| "loss": 0.9540246963500977, |
| "step": 4630, |
| "token_acc": 0.6967229009113826 |
| }, |
| { |
| "epoch": 0.8042686100989068, |
| "grad_norm": 1.8046875, |
| "learning_rate": 2.023287631935904e-06, |
| "loss": 1.00880708694458, |
| "step": 4635, |
| "token_acc": 0.6815457263858633 |
| }, |
| { |
| "epoch": 0.8051362137775464, |
| "grad_norm": 1.6796875, |
| "learning_rate": 2.0060144042905227e-06, |
| "loss": 0.9796277999877929, |
| "step": 4640, |
| "token_acc": 0.690756012376665 |
| }, |
| { |
| "epoch": 0.806003817456186, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.9888070020033713e-06, |
| "loss": 0.9896170616149902, |
| "step": 4645, |
| "token_acc": 0.6856324413853025 |
| }, |
| { |
| "epoch": 0.8068714211348256, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.971665566766401e-06, |
| "loss": 1.0029498100280763, |
| "step": 4650, |
| "token_acc": 0.6831084917137528 |
| }, |
| { |
| "epoch": 0.8077390248134652, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.954590239728369e-06, |
| "loss": 0.9786740303039551, |
| "step": 4655, |
| "token_acc": 0.6886030063097072 |
| }, |
| { |
| "epoch": 0.8086066284921049, |
| "grad_norm": 1.9296875, |
| "learning_rate": 1.9375811614936703e-06, |
| "loss": 1.0019638061523437, |
| "step": 4660, |
| "token_acc": 0.6835257720567924 |
| }, |
| { |
| "epoch": 0.8094742321707444, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.9206384721211847e-06, |
| "loss": 0.9825675010681152, |
| "step": 4665, |
| "token_acc": 0.6896243896500885 |
| }, |
| { |
| "epoch": 0.810341835849384, |
| "grad_norm": 1.875, |
| "learning_rate": 1.9037623111231229e-06, |
| "loss": 1.002269172668457, |
| "step": 4670, |
| "token_acc": 0.6822305407169595 |
| }, |
| { |
| "epoch": 0.8112094395280236, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.8869528174638752e-06, |
| "loss": 0.9967728614807129, |
| "step": 4675, |
| "token_acc": 0.683384136015715 |
| }, |
| { |
| "epoch": 0.8120770432066632, |
| "grad_norm": 1.875, |
| "learning_rate": 1.8702101295588714e-06, |
| "loss": 0.9936102867126465, |
| "step": 4680, |
| "token_acc": 0.6876304142688162 |
| }, |
| { |
| "epoch": 0.8129446468853028, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.8535343852734333e-06, |
| "loss": 0.9896058082580567, |
| "step": 4685, |
| "token_acc": 0.6851116625310174 |
| }, |
| { |
| "epoch": 0.8138122505639424, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.8369257219216563e-06, |
| "loss": 0.999512004852295, |
| "step": 4690, |
| "token_acc": 0.6847072393860351 |
| }, |
| { |
| "epoch": 0.8146798542425819, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.8203842762652546e-06, |
| "loss": 0.9870369911193848, |
| "step": 4695, |
| "token_acc": 0.6873048561748488 |
| }, |
| { |
| "epoch": 0.8155474579212216, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.8039101845124552e-06, |
| "loss": 1.0059050559997558, |
| "step": 4700, |
| "token_acc": 0.6838551420357133 |
| }, |
| { |
| "epoch": 0.8164150615998612, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.7875035823168641e-06, |
| "loss": 1.0021234512329102, |
| "step": 4705, |
| "token_acc": 0.6836149967576725 |
| }, |
| { |
| "epoch": 0.8172826652785008, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.7711646047763586e-06, |
| "loss": 1.0115555763244628, |
| "step": 4710, |
| "token_acc": 0.6826202404154295 |
| }, |
| { |
| "epoch": 0.8181502689571404, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.7548933864319661e-06, |
| "loss": 0.9789422035217286, |
| "step": 4715, |
| "token_acc": 0.6882753373099447 |
| }, |
| { |
| "epoch": 0.81901787263578, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.7386900612667635e-06, |
| "loss": 0.992159366607666, |
| "step": 4720, |
| "token_acc": 0.6854386416259326 |
| }, |
| { |
| "epoch": 0.8198854763144195, |
| "grad_norm": 1.875, |
| "learning_rate": 1.722554762704769e-06, |
| "loss": 0.9974750518798828, |
| "step": 4725, |
| "token_acc": 0.6872423661616824 |
| }, |
| { |
| "epoch": 0.8207530799930591, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.706487623609846e-06, |
| "loss": 1.0016369819641113, |
| "step": 4730, |
| "token_acc": 0.6850419346958717 |
| }, |
| { |
| "epoch": 0.8216206836716987, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.6904887762846068e-06, |
| "loss": 0.9975146293640137, |
| "step": 4735, |
| "token_acc": 0.6852138558677419 |
| }, |
| { |
| "epoch": 0.8224882873503384, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.6745583524693275e-06, |
| "loss": 0.9930521965026855, |
| "step": 4740, |
| "token_acc": 0.6858060739712228 |
| }, |
| { |
| "epoch": 0.823355891028978, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.658696483340858e-06, |
| "loss": 0.990367317199707, |
| "step": 4745, |
| "token_acc": 0.690088659520974 |
| }, |
| { |
| "epoch": 0.8242234947076176, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.6429032995115446e-06, |
| "loss": 0.9985919952392578, |
| "step": 4750, |
| "token_acc": 0.6834251915580051 |
| }, |
| { |
| "epoch": 0.8250910983862572, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.6271789310281515e-06, |
| "loss": 0.9976622581481933, |
| "step": 4755, |
| "token_acc": 0.6856444289207125 |
| }, |
| { |
| "epoch": 0.8259587020648967, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.6115235073708024e-06, |
| "loss": 0.9920053482055664, |
| "step": 4760, |
| "token_acc": 0.6863879817112998 |
| }, |
| { |
| "epoch": 0.8268263057435363, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.5959371574518934e-06, |
| "loss": 1.0084431648254395, |
| "step": 4765, |
| "token_acc": 0.6817347253306887 |
| }, |
| { |
| "epoch": 0.8276939094221759, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.580420009615048e-06, |
| "loss": 0.9829930305480957, |
| "step": 4770, |
| "token_acc": 0.690848938000397 |
| }, |
| { |
| "epoch": 0.8285615131008155, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.564972191634051e-06, |
| "loss": 0.9950210571289062, |
| "step": 4775, |
| "token_acc": 0.6865175616096914 |
| }, |
| { |
| "epoch": 0.8294291167794552, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.5495938307118052e-06, |
| "loss": 0.9901968002319336, |
| "step": 4780, |
| "token_acc": 0.686112085480694 |
| }, |
| { |
| "epoch": 0.8302967204580948, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.5342850534792753e-06, |
| "loss": 0.9941259384155273, |
| "step": 4785, |
| "token_acc": 0.6846874957604699 |
| }, |
| { |
| "epoch": 0.8311643241367344, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.5190459859944506e-06, |
| "loss": 0.9872735977172852, |
| "step": 4790, |
| "token_acc": 0.6862535758770308 |
| }, |
| { |
| "epoch": 0.8320319278153739, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.5038767537413035e-06, |
| "loss": 0.9889012336730957, |
| "step": 4795, |
| "token_acc": 0.6895972043893206 |
| }, |
| { |
| "epoch": 0.8328995314940135, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.4887774816287604e-06, |
| "loss": 0.9911365509033203, |
| "step": 4800, |
| "token_acc": 0.6869375511317153 |
| }, |
| { |
| "epoch": 0.8337671351726531, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.4737482939896675e-06, |
| "loss": 1.0037782669067383, |
| "step": 4805, |
| "token_acc": 0.6832780223501523 |
| }, |
| { |
| "epoch": 0.8346347388512927, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.4587893145797738e-06, |
| "loss": 0.9940081596374511, |
| "step": 4810, |
| "token_acc": 0.6838530744774403 |
| }, |
| { |
| "epoch": 0.8355023425299323, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.4439006665767042e-06, |
| "loss": 0.9779527664184571, |
| "step": 4815, |
| "token_acc": 0.6904385373836274 |
| }, |
| { |
| "epoch": 0.836369946208572, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.4290824725789542e-06, |
| "loss": 0.9910070419311523, |
| "step": 4820, |
| "token_acc": 0.6858737818147412 |
| }, |
| { |
| "epoch": 0.8372375498872116, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.4143348546048706e-06, |
| "loss": 1.0061234474182128, |
| "step": 4825, |
| "token_acc": 0.6816261604255763 |
| }, |
| { |
| "epoch": 0.8381051535658511, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.3996579340916583e-06, |
| "loss": 1.0056955337524414, |
| "step": 4830, |
| "token_acc": 0.6805094883366865 |
| }, |
| { |
| "epoch": 0.8389727572444907, |
| "grad_norm": 1.875, |
| "learning_rate": 1.3850518318943685e-06, |
| "loss": 0.9870254516601562, |
| "step": 4835, |
| "token_acc": 0.6884185128317614 |
| }, |
| { |
| "epoch": 0.8398403609231303, |
| "grad_norm": 1.78125, |
| "learning_rate": 1.3705166682849103e-06, |
| "loss": 0.9996889114379883, |
| "step": 4840, |
| "token_acc": 0.6827413811061348 |
| }, |
| { |
| "epoch": 0.8407079646017699, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.3560525629510567e-06, |
| "loss": 1.0011041641235352, |
| "step": 4845, |
| "token_acc": 0.6841137322872151 |
| }, |
| { |
| "epoch": 0.8415755682804095, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.341659634995467e-06, |
| "loss": 0.9982816696166992, |
| "step": 4850, |
| "token_acc": 0.6866214715232295 |
| }, |
| { |
| "epoch": 0.842443171959049, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.327338002934695e-06, |
| "loss": 1.0017461776733398, |
| "step": 4855, |
| "token_acc": 0.6838751233417388 |
| }, |
| { |
| "epoch": 0.8433107756376887, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.3130877846982204e-06, |
| "loss": 0.9614505767822266, |
| "step": 4860, |
| "token_acc": 0.6950054819491962 |
| }, |
| { |
| "epoch": 0.8441783793163283, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.2989090976274765e-06, |
| "loss": 1.0008953094482422, |
| "step": 4865, |
| "token_acc": 0.6828094757789712 |
| }, |
| { |
| "epoch": 0.8450459829949679, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.28480205847488e-06, |
| "loss": 0.987119197845459, |
| "step": 4870, |
| "token_acc": 0.6882399580718422 |
| }, |
| { |
| "epoch": 0.8459135866736075, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.2707667834028782e-06, |
| "loss": 1.0048983573913575, |
| "step": 4875, |
| "token_acc": 0.6831559340074508 |
| }, |
| { |
| "epoch": 0.8467811903522471, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.256803387982981e-06, |
| "loss": 0.9872228622436523, |
| "step": 4880, |
| "token_acc": 0.6871233979735624 |
| }, |
| { |
| "epoch": 0.8476487940308867, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.2429119871948203e-06, |
| "loss": 0.9801000595092774, |
| "step": 4885, |
| "token_acc": 0.690729556130764 |
| }, |
| { |
| "epoch": 0.8485163977095262, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.2290926954251937e-06, |
| "loss": 0.9848250389099121, |
| "step": 4890, |
| "token_acc": 0.6876951737632853 |
| }, |
| { |
| "epoch": 0.8493840013881658, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.2153456264671337e-06, |
| "loss": 0.986370849609375, |
| "step": 4895, |
| "token_acc": 0.6873767258382643 |
| }, |
| { |
| "epoch": 0.8502516050668055, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.2016708935189591e-06, |
| "loss": 0.9943758010864258, |
| "step": 4900, |
| "token_acc": 0.6850112466771181 |
| }, |
| { |
| "epoch": 0.8511192087454451, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.1880686091833482e-06, |
| "loss": 1.000884437561035, |
| "step": 4905, |
| "token_acc": 0.683370710159701 |
| }, |
| { |
| "epoch": 0.8519868124240847, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.174538885466412e-06, |
| "loss": 0.9865160942077636, |
| "step": 4910, |
| "token_acc": 0.6890235069467308 |
| }, |
| { |
| "epoch": 0.8528544161027243, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.1610818337767716e-06, |
| "loss": 0.9991436004638672, |
| "step": 4915, |
| "token_acc": 0.6858561584726297 |
| }, |
| { |
| "epoch": 0.8537220197813639, |
| "grad_norm": 1.765625, |
| "learning_rate": 1.147697564924639e-06, |
| "loss": 0.9727308273315429, |
| "step": 4920, |
| "token_acc": 0.6912715446298077 |
| }, |
| { |
| "epoch": 0.8545896234600034, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.1343861891209106e-06, |
| "loss": 1.0181291580200196, |
| "step": 4925, |
| "token_acc": 0.6820682501542784 |
| }, |
| { |
| "epoch": 0.855457227138643, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.121147815976248e-06, |
| "loss": 1.0026049613952637, |
| "step": 4930, |
| "token_acc": 0.683598010267869 |
| }, |
| { |
| "epoch": 0.8563248308172826, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.1079825545001887e-06, |
| "loss": 0.9865102767944336, |
| "step": 4935, |
| "token_acc": 0.6898536402969927 |
| }, |
| { |
| "epoch": 0.8571924344959223, |
| "grad_norm": 1.9140625, |
| "learning_rate": 1.0948905131002407e-06, |
| "loss": 1.0127381324768066, |
| "step": 4940, |
| "token_acc": 0.6825190010857763 |
| }, |
| { |
| "epoch": 0.8580600381745619, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.081871799580989e-06, |
| "loss": 0.9863951683044434, |
| "step": 4945, |
| "token_acc": 0.6883122286792139 |
| }, |
| { |
| "epoch": 0.8589276418532015, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.0689265211432132e-06, |
| "loss": 0.9868002891540527, |
| "step": 4950, |
| "token_acc": 0.6885490091767522 |
| }, |
| { |
| "epoch": 0.8597952455318411, |
| "grad_norm": 1.75, |
| "learning_rate": 1.0560547843830016e-06, |
| "loss": 0.9947976112365723, |
| "step": 4955, |
| "token_acc": 0.6865387356336733 |
| }, |
| { |
| "epoch": 0.8606628492104806, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.0432566952908696e-06, |
| "loss": 1.0024614334106445, |
| "step": 4960, |
| "token_acc": 0.683781453319746 |
| }, |
| { |
| "epoch": 0.8615304528891202, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.030532359250901e-06, |
| "loss": 0.9844224929809571, |
| "step": 4965, |
| "token_acc": 0.6905729592779288 |
| }, |
| { |
| "epoch": 0.8623980565677598, |
| "grad_norm": 1.921875, |
| "learning_rate": 1.0178818810398616e-06, |
| "loss": 1.004835605621338, |
| "step": 4970, |
| "token_acc": 0.6851532852387675 |
| }, |
| { |
| "epoch": 0.8632656602463994, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.0053053648263477e-06, |
| "loss": 0.9801043510437012, |
| "step": 4975, |
| "token_acc": 0.6884155757432423 |
| }, |
| { |
| "epoch": 0.8641332639250391, |
| "grad_norm": 1.96875, |
| "learning_rate": 9.92802914169927e-07, |
| "loss": 0.9909211158752441, |
| "step": 4980, |
| "token_acc": 0.6887015132838522 |
| }, |
| { |
| "epoch": 0.8650008676036787, |
| "grad_norm": 1.9296875, |
| "learning_rate": 9.803746320202812e-07, |
| "loss": 1.0029238700866698, |
| "step": 4985, |
| "token_acc": 0.6833788400406263 |
| }, |
| { |
| "epoch": 0.8658684712823183, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.680206207163666e-07, |
| "loss": 0.990473747253418, |
| "step": 4990, |
| "token_acc": 0.6859917435513812 |
| }, |
| { |
| "epoch": 0.8667360749609578, |
| "grad_norm": 1.8671875, |
| "learning_rate": 9.557409819855645e-07, |
| "loss": 0.9845627784729004, |
| "step": 4995, |
| "token_acc": 0.6892265928567627 |
| }, |
| { |
| "epoch": 0.8676036786395974, |
| "grad_norm": 1.7890625, |
| "learning_rate": 9.435358169428444e-07, |
| "loss": 0.9981782913208008, |
| "step": 5000, |
| "token_acc": 0.6857057648919893 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 5763, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.988905759887589e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|