Text Generation
PEFT
Safetensors
English
reranking
information-retrieval
listwise
lora
generative
conversational
Instructions to use abdoelsayed/dear-8b-reranker-listwise-lora-v1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use abdoelsayed/dear-8b-reranker-listwise-lora-v1 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct") model = PeftModel.from_pretrained(base_model, "abdoelsayed/dear-8b-reranker-listwise-lora-v1") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.8057726999398676, | |
| "eval_steps": 500, | |
| "global_step": 7000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004008819402685909, | |
| "grad_norm": 0.7079163193702698, | |
| "learning_rate": 1.2016021361815755e-06, | |
| "loss": 0.4167, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008017638805371818, | |
| "grad_norm": 0.7350050210952759, | |
| "learning_rate": 2.5367156208277703e-06, | |
| "loss": 0.4259, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012026458208057728, | |
| "grad_norm": 0.8004947900772095, | |
| "learning_rate": 3.871829105473966e-06, | |
| "loss": 0.4058, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.016035277610743637, | |
| "grad_norm": 0.7146145701408386, | |
| "learning_rate": 5.206942590120161e-06, | |
| "loss": 0.3923, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.020044097013429546, | |
| "grad_norm": 0.7212072610855103, | |
| "learning_rate": 6.542056074766355e-06, | |
| "loss": 0.3844, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.024052916416115455, | |
| "grad_norm": 0.6941127181053162, | |
| "learning_rate": 7.87716955941255e-06, | |
| "loss": 0.3679, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028061735818801364, | |
| "grad_norm": 0.5748901963233948, | |
| "learning_rate": 9.212283044058744e-06, | |
| "loss": 0.3518, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.032070555221487274, | |
| "grad_norm": 0.5205843448638916, | |
| "learning_rate": 1.054739652870494e-05, | |
| "loss": 0.3631, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03607937462417318, | |
| "grad_norm": 0.5671967267990112, | |
| "learning_rate": 1.1882510013351136e-05, | |
| "loss": 0.3526, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04008819402685909, | |
| "grad_norm": 0.6912459135055542, | |
| "learning_rate": 1.321762349799733e-05, | |
| "loss": 0.3186, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.044097013429545, | |
| "grad_norm": 0.6880580186843872, | |
| "learning_rate": 1.4552736982643526e-05, | |
| "loss": 0.3127, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04810583283223091, | |
| "grad_norm": 0.7009742259979248, | |
| "learning_rate": 1.588785046728972e-05, | |
| "loss": 0.3092, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05211465223491682, | |
| "grad_norm": 0.7727891802787781, | |
| "learning_rate": 1.7222963951935918e-05, | |
| "loss": 0.3138, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05612347163760273, | |
| "grad_norm": 0.6901352405548096, | |
| "learning_rate": 1.855807743658211e-05, | |
| "loss": 0.3014, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06013229104028864, | |
| "grad_norm": 0.8163438439369202, | |
| "learning_rate": 1.9893190921228304e-05, | |
| "loss": 0.312, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06414111044297455, | |
| "grad_norm": 0.8670288324356079, | |
| "learning_rate": 2.12283044058745e-05, | |
| "loss": 0.2875, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06814992984566046, | |
| "grad_norm": 1.0734412670135498, | |
| "learning_rate": 2.2563417890520698e-05, | |
| "loss": 0.2919, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07215874924834637, | |
| "grad_norm": 0.704152524471283, | |
| "learning_rate": 2.389853137516689e-05, | |
| "loss": 0.3094, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07616756865103227, | |
| "grad_norm": 0.7793599963188171, | |
| "learning_rate": 2.5233644859813084e-05, | |
| "loss": 0.2992, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08017638805371818, | |
| "grad_norm": 0.8480731248855591, | |
| "learning_rate": 2.656875834445928e-05, | |
| "loss": 0.2957, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0841852074564041, | |
| "grad_norm": 0.8737421631813049, | |
| "learning_rate": 2.7903871829105478e-05, | |
| "loss": 0.2794, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08819402685909, | |
| "grad_norm": 0.8049966096878052, | |
| "learning_rate": 2.923898531375167e-05, | |
| "loss": 0.2767, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09220284626177591, | |
| "grad_norm": 0.8555333614349365, | |
| "learning_rate": 3.0574098798397864e-05, | |
| "loss": 0.2899, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09621166566446182, | |
| "grad_norm": 0.8982564806938171, | |
| "learning_rate": 3.190921228304406e-05, | |
| "loss": 0.2661, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10022048506714773, | |
| "grad_norm": 0.7863436937332153, | |
| "learning_rate": 3.324432576769025e-05, | |
| "loss": 0.284, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10422930446983364, | |
| "grad_norm": 0.861031711101532, | |
| "learning_rate": 3.457943925233645e-05, | |
| "loss": 0.266, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10823812387251955, | |
| "grad_norm": 0.7962524890899658, | |
| "learning_rate": 3.5914552736982644e-05, | |
| "loss": 0.2589, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11224694327520546, | |
| "grad_norm": 0.7888882756233215, | |
| "learning_rate": 3.7249666221628844e-05, | |
| "loss": 0.2692, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11625576267789137, | |
| "grad_norm": 1.0324134826660156, | |
| "learning_rate": 3.858477970627504e-05, | |
| "loss": 0.2785, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12026458208057728, | |
| "grad_norm": 0.8519011735916138, | |
| "learning_rate": 3.991989319092123e-05, | |
| "loss": 0.2837, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12427340148326319, | |
| "grad_norm": 0.9894006848335266, | |
| "learning_rate": 4.1255006675567424e-05, | |
| "loss": 0.2729, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1282822208859491, | |
| "grad_norm": 0.8675716519355774, | |
| "learning_rate": 4.259012016021362e-05, | |
| "loss": 0.2637, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.132291040288635, | |
| "grad_norm": 0.8296481370925903, | |
| "learning_rate": 4.392523364485982e-05, | |
| "loss": 0.2597, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1362998596913209, | |
| "grad_norm": 0.7083739042282104, | |
| "learning_rate": 4.526034712950601e-05, | |
| "loss": 0.2731, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14030867909400682, | |
| "grad_norm": 0.8215944766998291, | |
| "learning_rate": 4.6595460614152204e-05, | |
| "loss": 0.2655, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14431749849669273, | |
| "grad_norm": 0.7739771008491516, | |
| "learning_rate": 4.79305740987984e-05, | |
| "loss": 0.2661, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14832631789937864, | |
| "grad_norm": 0.6515551805496216, | |
| "learning_rate": 4.92656875834446e-05, | |
| "loss": 0.272, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15233513730206455, | |
| "grad_norm": 0.7235403060913086, | |
| "learning_rate": 5.0600801068090784e-05, | |
| "loss": 0.295, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15634395670475046, | |
| "grad_norm": 0.7624292373657227, | |
| "learning_rate": 5.1935914552736984e-05, | |
| "loss": 0.2622, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16035277610743637, | |
| "grad_norm": 0.7019667029380798, | |
| "learning_rate": 5.327102803738318e-05, | |
| "loss": 0.2562, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16436159551012228, | |
| "grad_norm": 0.7686800360679626, | |
| "learning_rate": 5.460614152202938e-05, | |
| "loss": 0.2726, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1683704149128082, | |
| "grad_norm": 0.6799090504646301, | |
| "learning_rate": 5.594125500667558e-05, | |
| "loss": 0.266, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1723792343154941, | |
| "grad_norm": 0.6165328025817871, | |
| "learning_rate": 5.7276368491321764e-05, | |
| "loss": 0.2706, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17638805371818, | |
| "grad_norm": 2.7531023025512695, | |
| "learning_rate": 5.8611481975967965e-05, | |
| "loss": 0.2645, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1803968731208659, | |
| "grad_norm": 0.7134599685668945, | |
| "learning_rate": 5.994659546061415e-05, | |
| "loss": 0.2823, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18440569252355182, | |
| "grad_norm": 0.8196555376052856, | |
| "learning_rate": 6.128170894526035e-05, | |
| "loss": 0.2568, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18841451192623773, | |
| "grad_norm": 0.7205436825752258, | |
| "learning_rate": 6.261682242990654e-05, | |
| "loss": 0.2638, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19242333132892364, | |
| "grad_norm": 0.6776229739189148, | |
| "learning_rate": 6.395193591455274e-05, | |
| "loss": 0.2565, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19643215073160955, | |
| "grad_norm": 0.5640079379081726, | |
| "learning_rate": 6.528704939919892e-05, | |
| "loss": 0.2652, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.20044097013429546, | |
| "grad_norm": 0.6904841661453247, | |
| "learning_rate": 6.662216288384512e-05, | |
| "loss": 0.2761, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20444978953698137, | |
| "grad_norm": 0.602130651473999, | |
| "learning_rate": 6.795727636849132e-05, | |
| "loss": 0.2628, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.20845860893966728, | |
| "grad_norm": 0.5913508534431458, | |
| "learning_rate": 6.929238985313752e-05, | |
| "loss": 0.2655, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2124674283423532, | |
| "grad_norm": 0.6655270457267761, | |
| "learning_rate": 7.062750333778372e-05, | |
| "loss": 0.2453, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2164762477450391, | |
| "grad_norm": 0.7957231998443604, | |
| "learning_rate": 7.196261682242991e-05, | |
| "loss": 0.2572, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.220485067147725, | |
| "grad_norm": 0.6448558568954468, | |
| "learning_rate": 7.329773030707611e-05, | |
| "loss": 0.2674, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22449388655041091, | |
| "grad_norm": 0.6882309317588806, | |
| "learning_rate": 7.46328437917223e-05, | |
| "loss": 0.2634, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.22850270595309682, | |
| "grad_norm": 0.5508169531822205, | |
| "learning_rate": 7.59679572763685e-05, | |
| "loss": 0.2821, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23251152535578273, | |
| "grad_norm": 0.567504346370697, | |
| "learning_rate": 7.73030707610147e-05, | |
| "loss": 0.2623, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23652034475846864, | |
| "grad_norm": 0.5799248218536377, | |
| "learning_rate": 7.863818424566088e-05, | |
| "loss": 0.2574, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24052916416115455, | |
| "grad_norm": 0.6579704880714417, | |
| "learning_rate": 7.997329773030708e-05, | |
| "loss": 0.2639, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24453798356384046, | |
| "grad_norm": 0.6886210441589355, | |
| "learning_rate": 8.130841121495327e-05, | |
| "loss": 0.2604, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.24854680296652637, | |
| "grad_norm": 0.702531635761261, | |
| "learning_rate": 8.264352469959947e-05, | |
| "loss": 0.2549, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.25255562236921225, | |
| "grad_norm": 0.605786144733429, | |
| "learning_rate": 8.397863818424566e-05, | |
| "loss": 0.2515, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2565644417718982, | |
| "grad_norm": 0.7157173752784729, | |
| "learning_rate": 8.531375166889186e-05, | |
| "loss": 0.2565, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26057326117458407, | |
| "grad_norm": 0.552195131778717, | |
| "learning_rate": 8.664886515353804e-05, | |
| "loss": 0.2675, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.26458208057727, | |
| "grad_norm": 0.7387903928756714, | |
| "learning_rate": 8.798397863818424e-05, | |
| "loss": 0.2593, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2685908999799559, | |
| "grad_norm": 0.5697731971740723, | |
| "learning_rate": 8.931909212283044e-05, | |
| "loss": 0.2525, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2725997193826418, | |
| "grad_norm": 0.5313478708267212, | |
| "learning_rate": 9.065420560747664e-05, | |
| "loss": 0.2503, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2766085387853277, | |
| "grad_norm": 0.5595772862434387, | |
| "learning_rate": 9.198931909212284e-05, | |
| "loss": 0.2455, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.28061735818801364, | |
| "grad_norm": 0.6393229365348816, | |
| "learning_rate": 9.332443257676903e-05, | |
| "loss": 0.2541, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2846261775906995, | |
| "grad_norm": 0.6859897375106812, | |
| "learning_rate": 9.465954606141523e-05, | |
| "loss": 0.2552, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.28863499699338546, | |
| "grad_norm": 0.5158377289772034, | |
| "learning_rate": 9.599465954606142e-05, | |
| "loss": 0.2658, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.29264381639607134, | |
| "grad_norm": 0.5928187966346741, | |
| "learning_rate": 9.732977303070762e-05, | |
| "loss": 0.2515, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2966526357987573, | |
| "grad_norm": 0.5400727391242981, | |
| "learning_rate": 9.86648865153538e-05, | |
| "loss": 0.255, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.30066145520144316, | |
| "grad_norm": 0.6557461023330688, | |
| "learning_rate": 0.0001, | |
| "loss": 0.266, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3046702746041291, | |
| "grad_norm": 0.5242008566856384, | |
| "learning_rate": 9.999945572080073e-05, | |
| "loss": 0.2581, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.308679094006815, | |
| "grad_norm": 0.6318579912185669, | |
| "learning_rate": 9.999782289505249e-05, | |
| "loss": 0.2487, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3126879134095009, | |
| "grad_norm": 0.5616021156311035, | |
| "learning_rate": 9.999510155830382e-05, | |
| "loss": 0.2477, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3166967328121868, | |
| "grad_norm": 0.6462226510047913, | |
| "learning_rate": 9.999129176980139e-05, | |
| "loss": 0.2534, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.32070555221487274, | |
| "grad_norm": 0.6198021769523621, | |
| "learning_rate": 9.998639361248875e-05, | |
| "loss": 0.2526, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3247143716175586, | |
| "grad_norm": 0.7665322422981262, | |
| "learning_rate": 9.99804071930045e-05, | |
| "loss": 0.243, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.32872319102024455, | |
| "grad_norm": 0.4766557514667511, | |
| "learning_rate": 9.997333264168e-05, | |
| "loss": 0.2535, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.33273201042293044, | |
| "grad_norm": 0.5588070154190063, | |
| "learning_rate": 9.996517011253648e-05, | |
| "loss": 0.2597, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3367408298256164, | |
| "grad_norm": 0.5454280376434326, | |
| "learning_rate": 9.995591978328171e-05, | |
| "loss": 0.244, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.34074964922830225, | |
| "grad_norm": 0.49733200669288635, | |
| "learning_rate": 9.994558185530623e-05, | |
| "loss": 0.2537, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3447584686309882, | |
| "grad_norm": 0.5581687688827515, | |
| "learning_rate": 9.993415655367875e-05, | |
| "loss": 0.2404, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.34876728803367407, | |
| "grad_norm": 0.6027563810348511, | |
| "learning_rate": 9.992164412714143e-05, | |
| "loss": 0.2482, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.35277610743636, | |
| "grad_norm": 0.5277743935585022, | |
| "learning_rate": 9.990804484810444e-05, | |
| "loss": 0.2495, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3567849268390459, | |
| "grad_norm": 0.4644189774990082, | |
| "learning_rate": 9.989335901263996e-05, | |
| "loss": 0.2484, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3607937462417318, | |
| "grad_norm": 0.5761633515357971, | |
| "learning_rate": 9.987758694047575e-05, | |
| "loss": 0.2449, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3648025656444177, | |
| "grad_norm": 0.5238028168678284, | |
| "learning_rate": 9.986072897498829e-05, | |
| "loss": 0.2492, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.36881138504710365, | |
| "grad_norm": 0.7435324788093567, | |
| "learning_rate": 9.984278548319515e-05, | |
| "loss": 0.2329, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3728202044497895, | |
| "grad_norm": 0.5836918950080872, | |
| "learning_rate": 9.982375685574712e-05, | |
| "loss": 0.2366, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.37682902385247546, | |
| "grad_norm": 0.5967078804969788, | |
| "learning_rate": 9.980364350691962e-05, | |
| "loss": 0.2323, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.38083784325516135, | |
| "grad_norm": 0.5183435082435608, | |
| "learning_rate": 9.978244587460376e-05, | |
| "loss": 0.2496, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3848466626578473, | |
| "grad_norm": 0.5637623071670532, | |
| "learning_rate": 9.976016442029675e-05, | |
| "loss": 0.2469, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.38885548206053316, | |
| "grad_norm": 0.5092435479164124, | |
| "learning_rate": 9.973679962909189e-05, | |
| "loss": 0.2423, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3928643014632191, | |
| "grad_norm": 0.589830219745636, | |
| "learning_rate": 9.971235200966795e-05, | |
| "loss": 0.2327, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.396873120865905, | |
| "grad_norm": 0.47097843885421753, | |
| "learning_rate": 9.968682209427817e-05, | |
| "loss": 0.2597, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4008819402685909, | |
| "grad_norm": 0.5486937761306763, | |
| "learning_rate": 9.966021043873864e-05, | |
| "loss": 0.2471, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4048907596712768, | |
| "grad_norm": 0.7114580273628235, | |
| "learning_rate": 9.963251762241616e-05, | |
| "loss": 0.2438, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.40889957907396274, | |
| "grad_norm": 0.6333823800086975, | |
| "learning_rate": 9.96037442482157e-05, | |
| "loss": 0.2264, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4129083984766486, | |
| "grad_norm": 0.6404210329055786, | |
| "learning_rate": 9.95738909425672e-05, | |
| "loss": 0.2388, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.41691721787933456, | |
| "grad_norm": 0.4633651673793793, | |
| "learning_rate": 9.954295835541203e-05, | |
| "loss": 0.2438, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.42092603728202044, | |
| "grad_norm": 0.5816488265991211, | |
| "learning_rate": 9.951094716018871e-05, | |
| "loss": 0.2397, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4249348566847064, | |
| "grad_norm": 0.4773317277431488, | |
| "learning_rate": 9.947785805381836e-05, | |
| "loss": 0.2549, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.42894367608739226, | |
| "grad_norm": 0.6512565016746521, | |
| "learning_rate": 9.944369175668948e-05, | |
| "loss": 0.2341, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4329524954900782, | |
| "grad_norm": 0.5367722511291504, | |
| "learning_rate": 9.940844901264225e-05, | |
| "loss": 0.2331, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4369613148927641, | |
| "grad_norm": 0.46036213636398315, | |
| "learning_rate": 9.937213058895237e-05, | |
| "loss": 0.2506, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.44097013429545, | |
| "grad_norm": 0.5731292366981506, | |
| "learning_rate": 9.933473727631435e-05, | |
| "loss": 0.2458, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4449789536981359, | |
| "grad_norm": 0.4861133396625519, | |
| "learning_rate": 9.929626988882428e-05, | |
| "loss": 0.2385, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.44898777310082183, | |
| "grad_norm": 0.5132921934127808, | |
| "learning_rate": 9.925672926396212e-05, | |
| "loss": 0.239, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4529965925035077, | |
| "grad_norm": 0.5186300873756409, | |
| "learning_rate": 9.921611626257344e-05, | |
| "loss": 0.2342, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.45700541190619365, | |
| "grad_norm": 0.5068562626838684, | |
| "learning_rate": 9.917443176885073e-05, | |
| "loss": 0.2377, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.46101423130887953, | |
| "grad_norm": 0.5708321928977966, | |
| "learning_rate": 9.913167669031409e-05, | |
| "loss": 0.2245, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.46502305071156547, | |
| "grad_norm": 0.5989340543746948, | |
| "learning_rate": 9.908785195779153e-05, | |
| "loss": 0.2235, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.46903187011425135, | |
| "grad_norm": 0.5365070700645447, | |
| "learning_rate": 9.904295852539867e-05, | |
| "loss": 0.2434, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4730406895169373, | |
| "grad_norm": 0.625773549079895, | |
| "learning_rate": 9.899699737051793e-05, | |
| "loss": 0.23, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.47704950891962317, | |
| "grad_norm": 0.5225462317466736, | |
| "learning_rate": 9.894996949377738e-05, | |
| "loss": 0.2219, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4810583283223091, | |
| "grad_norm": 0.6289816498756409, | |
| "learning_rate": 9.890187591902879e-05, | |
| "loss": 0.2428, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.485067147724995, | |
| "grad_norm": 0.5729430317878723, | |
| "learning_rate": 9.885271769332547e-05, | |
| "loss": 0.2267, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4890759671276809, | |
| "grad_norm": 0.4762110412120819, | |
| "learning_rate": 9.880249588689941e-05, | |
| "loss": 0.2306, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4930847865303668, | |
| "grad_norm": 0.5188407301902771, | |
| "learning_rate": 9.875121159313797e-05, | |
| "loss": 0.2389, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.49709360593305274, | |
| "grad_norm": 0.630754828453064, | |
| "learning_rate": 9.869886592856016e-05, | |
| "loss": 0.2262, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5011024253357387, | |
| "grad_norm": 0.5617574453353882, | |
| "learning_rate": 9.864546003279222e-05, | |
| "loss": 0.2362, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5051112447384245, | |
| "grad_norm": 0.6316712498664856, | |
| "learning_rate": 9.859099506854285e-05, | |
| "loss": 0.2265, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5091200641411104, | |
| "grad_norm": 0.4762302339076996, | |
| "learning_rate": 9.8535472221578e-05, | |
| "loss": 0.2286, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5131288835437964, | |
| "grad_norm": 0.6005476117134094, | |
| "learning_rate": 9.847889270069483e-05, | |
| "loss": 0.2217, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5171377029464823, | |
| "grad_norm": 0.5312756299972534, | |
| "learning_rate": 9.842125773769563e-05, | |
| "loss": 0.2285, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5211465223491681, | |
| "grad_norm": 0.7248687744140625, | |
| "learning_rate": 9.836256858736086e-05, | |
| "loss": 0.2354, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5251553417518541, | |
| "grad_norm": 0.5596895813941956, | |
| "learning_rate": 9.830282652742186e-05, | |
| "loss": 0.2286, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.52916416115454, | |
| "grad_norm": 0.6484787464141846, | |
| "learning_rate": 9.824203285853305e-05, | |
| "loss": 0.2325, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.533172980557226, | |
| "grad_norm": 0.5286840200424194, | |
| "learning_rate": 9.81801889042436e-05, | |
| "loss": 0.2213, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5371817999599118, | |
| "grad_norm": 0.5632983446121216, | |
| "learning_rate": 9.811729601096865e-05, | |
| "loss": 0.2262, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5411906193625977, | |
| "grad_norm": 0.6314755082130432, | |
| "learning_rate": 9.805335554795993e-05, | |
| "loss": 0.226, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5451994387652837, | |
| "grad_norm": 0.5536089539527893, | |
| "learning_rate": 9.798836890727601e-05, | |
| "loss": 0.2363, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5492082581679696, | |
| "grad_norm": 0.5642661452293396, | |
| "learning_rate": 9.792233750375193e-05, | |
| "loss": 0.2367, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5532170775706554, | |
| "grad_norm": 0.4720064103603363, | |
| "learning_rate": 9.785526277496851e-05, | |
| "loss": 0.2278, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5572258969733414, | |
| "grad_norm": 0.568137526512146, | |
| "learning_rate": 9.778714618122091e-05, | |
| "loss": 0.2135, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5612347163760273, | |
| "grad_norm": 0.5233467221260071, | |
| "learning_rate": 9.771798920548693e-05, | |
| "loss": 0.2243, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5652435357787132, | |
| "grad_norm": 0.5088178515434265, | |
| "learning_rate": 9.764779335339473e-05, | |
| "loss": 0.2438, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.569252355181399, | |
| "grad_norm": 0.6083818078041077, | |
| "learning_rate": 9.757656015318998e-05, | |
| "loss": 0.2223, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.573261174584085, | |
| "grad_norm": 0.5877081155776978, | |
| "learning_rate": 9.750429115570264e-05, | |
| "loss": 0.2298, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5772699939867709, | |
| "grad_norm": 0.6110019683837891, | |
| "learning_rate": 9.743098793431321e-05, | |
| "loss": 0.2323, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5812788133894567, | |
| "grad_norm": 0.5051080584526062, | |
| "learning_rate": 9.735665208491842e-05, | |
| "loss": 0.2436, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5852876327921427, | |
| "grad_norm": 0.5243321657180786, | |
| "learning_rate": 9.728128522589655e-05, | |
| "loss": 0.2338, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5892964521948286, | |
| "grad_norm": 0.6249774694442749, | |
| "learning_rate": 9.720488899807214e-05, | |
| "loss": 0.226, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5933052715975146, | |
| "grad_norm": 0.5004896521568298, | |
| "learning_rate": 9.71274650646803e-05, | |
| "loss": 0.2144, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5973140910002004, | |
| "grad_norm": 0.6254176497459412, | |
| "learning_rate": 9.704901511133048e-05, | |
| "loss": 0.219, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6013229104028863, | |
| "grad_norm": 0.5976850390434265, | |
| "learning_rate": 9.696954084596979e-05, | |
| "loss": 0.2323, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6053317298055723, | |
| "grad_norm": 0.588320791721344, | |
| "learning_rate": 9.688904399884583e-05, | |
| "loss": 0.2049, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6093405492082582, | |
| "grad_norm": 0.655425488948822, | |
| "learning_rate": 9.680752632246896e-05, | |
| "loss": 0.224, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.613349368610944, | |
| "grad_norm": 0.6558622121810913, | |
| "learning_rate": 9.672498959157422e-05, | |
| "loss": 0.2201, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.61735818801363, | |
| "grad_norm": 0.6564059853553772, | |
| "learning_rate": 9.664143560308263e-05, | |
| "loss": 0.2075, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6213670074163159, | |
| "grad_norm": 0.573246419429779, | |
| "learning_rate": 9.655686617606212e-05, | |
| "loss": 0.2091, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6253758268190018, | |
| "grad_norm": 0.6015535593032837, | |
| "learning_rate": 9.647128315168788e-05, | |
| "loss": 0.2221, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6293846462216877, | |
| "grad_norm": 0.6874203085899353, | |
| "learning_rate": 9.638468839320232e-05, | |
| "loss": 0.213, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6333934656243736, | |
| "grad_norm": 0.5189663171768188, | |
| "learning_rate": 9.629708378587445e-05, | |
| "loss": 0.2161, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6374022850270595, | |
| "grad_norm": 0.571725070476532, | |
| "learning_rate": 9.62084712369589e-05, | |
| "loss": 0.2236, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6414111044297455, | |
| "grad_norm": 0.6262040734291077, | |
| "learning_rate": 9.61188526756544e-05, | |
| "loss": 0.2346, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6454199238324313, | |
| "grad_norm": 0.6156971454620361, | |
| "learning_rate": 9.602823005306164e-05, | |
| "loss": 0.2089, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6494287432351172, | |
| "grad_norm": 0.5515331625938416, | |
| "learning_rate": 9.5936605342141e-05, | |
| "loss": 0.2225, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6534375626378032, | |
| "grad_norm": 0.6686428785324097, | |
| "learning_rate": 9.584398053766941e-05, | |
| "loss": 0.2189, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6574463820404891, | |
| "grad_norm": 0.5298424959182739, | |
| "learning_rate": 9.575035765619708e-05, | |
| "loss": 0.2297, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6614552014431749, | |
| "grad_norm": 0.6391364932060242, | |
| "learning_rate": 9.565573873600349e-05, | |
| "loss": 0.2441, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6654640208458609, | |
| "grad_norm": 0.6574255228042603, | |
| "learning_rate": 9.556012583705303e-05, | |
| "loss": 0.2329, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6694728402485468, | |
| "grad_norm": 0.5856221914291382, | |
| "learning_rate": 9.546352104095019e-05, | |
| "loss": 0.2001, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6734816596512327, | |
| "grad_norm": 0.6181838512420654, | |
| "learning_rate": 9.536592645089421e-05, | |
| "loss": 0.2255, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6774904790539186, | |
| "grad_norm": 0.635492742061615, | |
| "learning_rate": 9.52673441916333e-05, | |
| "loss": 0.1973, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6814992984566045, | |
| "grad_norm": 0.6166744232177734, | |
| "learning_rate": 9.51677764094184e-05, | |
| "loss": 0.2248, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6855081178592904, | |
| "grad_norm": 0.6294150352478027, | |
| "learning_rate": 9.506722527195639e-05, | |
| "loss": 0.2123, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6895169372619764, | |
| "grad_norm": 0.6106050610542297, | |
| "learning_rate": 9.496569296836301e-05, | |
| "loss": 0.208, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6935257566646622, | |
| "grad_norm": 0.6652440428733826, | |
| "learning_rate": 9.486318170911508e-05, | |
| "loss": 0.2112, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6975345760673481, | |
| "grad_norm": 0.5508642792701721, | |
| "learning_rate": 9.475969372600246e-05, | |
| "loss": 0.2299, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7015433954700341, | |
| "grad_norm": 0.5851196050643921, | |
| "learning_rate": 9.465523127207938e-05, | |
| "loss": 0.2283, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.70555221487272, | |
| "grad_norm": 0.6574164628982544, | |
| "learning_rate": 9.454979662161547e-05, | |
| "loss": 0.2149, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7095610342754058, | |
| "grad_norm": 0.562202513217926, | |
| "learning_rate": 9.444339207004626e-05, | |
| "loss": 0.2162, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7135698536780918, | |
| "grad_norm": 0.5654606223106384, | |
| "learning_rate": 9.433601993392308e-05, | |
| "loss": 0.2283, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7175786730807777, | |
| "grad_norm": 0.5194072127342224, | |
| "learning_rate": 9.422768255086274e-05, | |
| "loss": 0.2266, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7215874924834637, | |
| "grad_norm": 0.651335597038269, | |
| "learning_rate": 9.411838227949663e-05, | |
| "loss": 0.1999, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7255963118861495, | |
| "grad_norm": 0.6659877300262451, | |
| "learning_rate": 9.400812149941932e-05, | |
| "loss": 0.2148, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7296051312888354, | |
| "grad_norm": 0.6771412491798401, | |
| "learning_rate": 9.389690261113672e-05, | |
| "loss": 0.2233, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7336139506915214, | |
| "grad_norm": 0.8170326948165894, | |
| "learning_rate": 9.378472803601397e-05, | |
| "loss": 0.2282, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7376227700942073, | |
| "grad_norm": 0.6430959701538086, | |
| "learning_rate": 9.36716002162226e-05, | |
| "loss": 0.2036, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7416315894968931, | |
| "grad_norm": 0.6288866996765137, | |
| "learning_rate": 9.355752161468731e-05, | |
| "loss": 0.2223, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.745640408899579, | |
| "grad_norm": 0.7772784233093262, | |
| "learning_rate": 9.344249471503259e-05, | |
| "loss": 0.2183, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.749649228302265, | |
| "grad_norm": 0.6505045890808105, | |
| "learning_rate": 9.332652202152833e-05, | |
| "loss": 0.2126, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7536580477049509, | |
| "grad_norm": 0.5706261992454529, | |
| "learning_rate": 9.320960605903553e-05, | |
| "loss": 0.2107, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7576668671076368, | |
| "grad_norm": 0.5667653679847717, | |
| "learning_rate": 9.309174937295126e-05, | |
| "loss": 0.2036, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7616756865103227, | |
| "grad_norm": 0.6292815208435059, | |
| "learning_rate": 9.297295452915323e-05, | |
| "loss": 0.2038, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7656845059130086, | |
| "grad_norm": 0.6061923503875732, | |
| "learning_rate": 9.285322411394393e-05, | |
| "loss": 0.2183, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7696933253156946, | |
| "grad_norm": 0.7514089941978455, | |
| "learning_rate": 9.273256073399434e-05, | |
| "loss": 0.2135, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7737021447183804, | |
| "grad_norm": 0.6030351519584656, | |
| "learning_rate": 9.261096701628718e-05, | |
| "loss": 0.2098, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7777109641210663, | |
| "grad_norm": 0.7148683667182922, | |
| "learning_rate": 9.248844560805969e-05, | |
| "loss": 0.2085, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7817197835237523, | |
| "grad_norm": 0.7136949300765991, | |
| "learning_rate": 9.236499917674606e-05, | |
| "loss": 0.1998, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7857286029264382, | |
| "grad_norm": 0.7132196426391602, | |
| "learning_rate": 9.224063040991924e-05, | |
| "loss": 0.2082, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.789737422329124, | |
| "grad_norm": 0.5503913164138794, | |
| "learning_rate": 9.211534201523255e-05, | |
| "loss": 0.2238, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.79374624173181, | |
| "grad_norm": 0.7679104804992676, | |
| "learning_rate": 9.198913672036072e-05, | |
| "loss": 0.1971, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7977550611344959, | |
| "grad_norm": 0.9002260565757751, | |
| "learning_rate": 9.186201727294036e-05, | |
| "loss": 0.1998, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8017638805371818, | |
| "grad_norm": 0.5790923833847046, | |
| "learning_rate": 9.173398644051035e-05, | |
| "loss": 0.2113, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8057726999398677, | |
| "grad_norm": 0.6548293828964233, | |
| "learning_rate": 9.160504701045145e-05, | |
| "loss": 0.1969, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8097815193425536, | |
| "grad_norm": 0.6647776961326599, | |
| "learning_rate": 9.147520178992563e-05, | |
| "loss": 0.1979, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8137903387452395, | |
| "grad_norm": 0.6299743056297302, | |
| "learning_rate": 9.134445360581503e-05, | |
| "loss": 0.206, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8177991581479255, | |
| "grad_norm": 0.6221920847892761, | |
| "learning_rate": 9.121280530466027e-05, | |
| "loss": 0.1889, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8218079775506113, | |
| "grad_norm": 0.6568713784217834, | |
| "learning_rate": 9.108025975259869e-05, | |
| "loss": 0.2094, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8258167969532972, | |
| "grad_norm": 0.8146998882293701, | |
| "learning_rate": 9.094681983530173e-05, | |
| "loss": 0.2159, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.8298256163559832, | |
| "grad_norm": 0.6871969103813171, | |
| "learning_rate": 9.081248845791227e-05, | |
| "loss": 0.1827, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8338344357586691, | |
| "grad_norm": 0.7771655321121216, | |
| "learning_rate": 9.067726854498127e-05, | |
| "loss": 0.1995, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8378432551613549, | |
| "grad_norm": 0.8692470192909241, | |
| "learning_rate": 9.054116304040416e-05, | |
| "loss": 0.202, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8418520745640409, | |
| "grad_norm": 0.5309840440750122, | |
| "learning_rate": 9.040417490735676e-05, | |
| "loss": 0.2159, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8458608939667268, | |
| "grad_norm": 0.6645334362983704, | |
| "learning_rate": 9.026630712823072e-05, | |
| "loss": 0.2175, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8498697133694127, | |
| "grad_norm": 0.613962709903717, | |
| "learning_rate": 9.012756270456861e-05, | |
| "loss": 0.2081, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8538785327720986, | |
| "grad_norm": 0.6764446496963501, | |
| "learning_rate": 8.99879446569986e-05, | |
| "loss": 0.213, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8578873521747845, | |
| "grad_norm": 0.6048487424850464, | |
| "learning_rate": 8.984745602516865e-05, | |
| "loss": 0.1879, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8618961715774704, | |
| "grad_norm": 0.5892179608345032, | |
| "learning_rate": 8.970609986768035e-05, | |
| "loss": 0.1827, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8659049909801564, | |
| "grad_norm": 0.7431573867797852, | |
| "learning_rate": 8.956387926202234e-05, | |
| "loss": 0.2055, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8699138103828422, | |
| "grad_norm": 0.6326702833175659, | |
| "learning_rate": 8.942079730450335e-05, | |
| "loss": 0.206, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8739226297855281, | |
| "grad_norm": 0.6847805976867676, | |
| "learning_rate": 8.927685711018467e-05, | |
| "loss": 0.2161, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8779314491882141, | |
| "grad_norm": 0.636877179145813, | |
| "learning_rate": 8.913206181281248e-05, | |
| "loss": 0.2014, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8819402685909, | |
| "grad_norm": 0.756361722946167, | |
| "learning_rate": 8.89864145647495e-05, | |
| "loss": 0.2063, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8859490879935858, | |
| "grad_norm": 0.5681055784225464, | |
| "learning_rate": 8.883991853690646e-05, | |
| "loss": 0.1997, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8899579073962718, | |
| "grad_norm": 0.6439403891563416, | |
| "learning_rate": 8.869257691867296e-05, | |
| "loss": 0.2029, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8939667267989577, | |
| "grad_norm": 0.6258695721626282, | |
| "learning_rate": 8.854439291784813e-05, | |
| "loss": 0.2062, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8979755462016437, | |
| "grad_norm": 0.6915255188941956, | |
| "learning_rate": 8.839536976057075e-05, | |
| "loss": 0.2008, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9019843656043295, | |
| "grad_norm": 0.7225965857505798, | |
| "learning_rate": 8.824551069124898e-05, | |
| "loss": 0.1915, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9059931850070154, | |
| "grad_norm": 0.784816563129425, | |
| "learning_rate": 8.809481897248983e-05, | |
| "loss": 0.1897, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9100020044097014, | |
| "grad_norm": 0.7496415972709656, | |
| "learning_rate": 8.7943297885028e-05, | |
| "loss": 0.198, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9140108238123873, | |
| "grad_norm": 0.6198856830596924, | |
| "learning_rate": 8.779095072765453e-05, | |
| "loss": 0.2055, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9180196432150731, | |
| "grad_norm": 0.6876329183578491, | |
| "learning_rate": 8.763778081714498e-05, | |
| "loss": 0.1969, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9220284626177591, | |
| "grad_norm": 0.7026522159576416, | |
| "learning_rate": 8.748379148818722e-05, | |
| "loss": 0.1811, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.926037282020445, | |
| "grad_norm": 0.6701675057411194, | |
| "learning_rate": 8.732898609330875e-05, | |
| "loss": 0.1902, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9300461014231309, | |
| "grad_norm": 0.6713166236877441, | |
| "learning_rate": 8.717336800280386e-05, | |
| "loss": 0.2093, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9340549208258168, | |
| "grad_norm": 0.7247043251991272, | |
| "learning_rate": 8.701694060466014e-05, | |
| "loss": 0.1916, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9380637402285027, | |
| "grad_norm": 0.6550298929214478, | |
| "learning_rate": 8.685970730448475e-05, | |
| "loss": 0.2034, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9420725596311886, | |
| "grad_norm": 0.7075363397598267, | |
| "learning_rate": 8.670167152543026e-05, | |
| "loss": 0.1823, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9460813790338746, | |
| "grad_norm": 0.7122249603271484, | |
| "learning_rate": 8.654283670812017e-05, | |
| "loss": 0.1941, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.9500901984365604, | |
| "grad_norm": 0.6687220335006714, | |
| "learning_rate": 8.638320631057397e-05, | |
| "loss": 0.1933, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9540990178392463, | |
| "grad_norm": 0.635455310344696, | |
| "learning_rate": 8.622278380813186e-05, | |
| "loss": 0.1967, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9581078372419323, | |
| "grad_norm": 0.7970702052116394, | |
| "learning_rate": 8.606157269337906e-05, | |
| "loss": 0.1901, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9621166566446182, | |
| "grad_norm": 0.7364137768745422, | |
| "learning_rate": 8.589957647606988e-05, | |
| "loss": 0.1945, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.966125476047304, | |
| "grad_norm": 0.7844299674034119, | |
| "learning_rate": 8.573679868305114e-05, | |
| "loss": 0.1821, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.97013429544999, | |
| "grad_norm": 0.8092600703239441, | |
| "learning_rate": 8.557324285818552e-05, | |
| "loss": 0.1934, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9741431148526759, | |
| "grad_norm": 0.66877281665802, | |
| "learning_rate": 8.540891256227437e-05, | |
| "loss": 0.2021, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9781519342553618, | |
| "grad_norm": 0.7711961269378662, | |
| "learning_rate": 8.524381137298014e-05, | |
| "loss": 0.1801, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9821607536580477, | |
| "grad_norm": 0.6817704439163208, | |
| "learning_rate": 8.507794288474856e-05, | |
| "loss": 0.1928, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9861695730607336, | |
| "grad_norm": 0.8401746153831482, | |
| "learning_rate": 8.491131070873038e-05, | |
| "loss": 0.1884, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9901783924634195, | |
| "grad_norm": 0.7808353900909424, | |
| "learning_rate": 8.474391847270265e-05, | |
| "loss": 0.1966, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9941872118661055, | |
| "grad_norm": 0.6367965340614319, | |
| "learning_rate": 8.45757698209899e-05, | |
| "loss": 0.1892, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9981960312687913, | |
| "grad_norm": 0.7107962369918823, | |
| "learning_rate": 8.440686841438462e-05, | |
| "loss": 0.1961, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.002004409701343, | |
| "grad_norm": 0.593016505241394, | |
| "learning_rate": 8.423721793006775e-05, | |
| "loss": 0.1773, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.006013229104029, | |
| "grad_norm": 0.7551445364952087, | |
| "learning_rate": 8.406682206152845e-05, | |
| "loss": 0.1733, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.0100220485067148, | |
| "grad_norm": 0.8561877608299255, | |
| "learning_rate": 8.389568451848382e-05, | |
| "loss": 0.1594, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.0140308679094008, | |
| "grad_norm": 0.8644078969955444, | |
| "learning_rate": 8.372380902679804e-05, | |
| "loss": 0.179, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.0180396873120865, | |
| "grad_norm": 0.778167188167572, | |
| "learning_rate": 8.355119932840129e-05, | |
| "loss": 0.1616, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.0220485067147724, | |
| "grad_norm": 0.7065404057502747, | |
| "learning_rate": 8.337785918120837e-05, | |
| "loss": 0.1768, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.0260573261174584, | |
| "grad_norm": 0.8743630051612854, | |
| "learning_rate": 8.320379235903668e-05, | |
| "loss": 0.1687, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.0300661455201443, | |
| "grad_norm": 1.0897860527038574, | |
| "learning_rate": 8.302900265152427e-05, | |
| "loss": 0.1558, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.0340749649228302, | |
| "grad_norm": 0.7313379645347595, | |
| "learning_rate": 8.285349386404722e-05, | |
| "loss": 0.16, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.0380837843255162, | |
| "grad_norm": 0.8040058016777039, | |
| "learning_rate": 8.267726981763682e-05, | |
| "loss": 0.1571, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.0420926037282021, | |
| "grad_norm": 0.8637468218803406, | |
| "learning_rate": 8.250033434889637e-05, | |
| "loss": 0.16, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.046101423130888, | |
| "grad_norm": 0.7505359053611755, | |
| "learning_rate": 8.232269130991769e-05, | |
| "loss": 0.1597, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.0501102425335738, | |
| "grad_norm": 0.8430061340332031, | |
| "learning_rate": 8.214434456819725e-05, | |
| "loss": 0.1723, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.0541190619362597, | |
| "grad_norm": 0.66597580909729, | |
| "learning_rate": 8.196529800655188e-05, | |
| "loss": 0.1751, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.0581278813389456, | |
| "grad_norm": 0.8823577761650085, | |
| "learning_rate": 8.178555552303437e-05, | |
| "loss": 0.1701, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.0621367007416316, | |
| "grad_norm": 0.9401513338088989, | |
| "learning_rate": 8.160512103084851e-05, | |
| "loss": 0.1564, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.0661455201443175, | |
| "grad_norm": 0.7342818379402161, | |
| "learning_rate": 8.142399845826394e-05, | |
| "loss": 0.1507, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.0701543395470035, | |
| "grad_norm": 0.8487102389335632, | |
| "learning_rate": 8.12421917485306e-05, | |
| "loss": 0.1633, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.0741631589496894, | |
| "grad_norm": 0.8836720585823059, | |
| "learning_rate": 8.105970485979295e-05, | |
| "loss": 0.1682, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.0781719783523753, | |
| "grad_norm": 0.6858396530151367, | |
| "learning_rate": 8.087654176500366e-05, | |
| "loss": 0.1723, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.082180797755061, | |
| "grad_norm": 1.028981328010559, | |
| "learning_rate": 8.069270645183722e-05, | |
| "loss": 0.1555, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.086189617157747, | |
| "grad_norm": 0.9475600719451904, | |
| "learning_rate": 8.050820292260313e-05, | |
| "loss": 0.1591, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.090198436560433, | |
| "grad_norm": 0.683160126209259, | |
| "learning_rate": 8.032303519415874e-05, | |
| "loss": 0.1703, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.0942072559631189, | |
| "grad_norm": 0.8751930594444275, | |
| "learning_rate": 8.013720729782173e-05, | |
| "loss": 0.1489, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.0982160753658048, | |
| "grad_norm": 0.8032315373420715, | |
| "learning_rate": 7.995072327928243e-05, | |
| "loss": 0.1439, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.1022248947684907, | |
| "grad_norm": 0.7631738185882568, | |
| "learning_rate": 7.976358719851579e-05, | |
| "loss": 0.1676, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.1062337141711767, | |
| "grad_norm": 0.7207862734794617, | |
| "learning_rate": 7.957580312969283e-05, | |
| "loss": 0.1494, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.1102425335738626, | |
| "grad_norm": 0.6857604384422302, | |
| "learning_rate": 7.938737516109207e-05, | |
| "loss": 0.1594, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.1142513529765483, | |
| "grad_norm": 1.0340170860290527, | |
| "learning_rate": 7.919830739501043e-05, | |
| "loss": 0.1621, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.1182601723792343, | |
| "grad_norm": 0.7190383672714233, | |
| "learning_rate": 7.900860394767402e-05, | |
| "loss": 0.1638, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.1222689917819202, | |
| "grad_norm": 0.8485333919525146, | |
| "learning_rate": 7.881826894914846e-05, | |
| "loss": 0.1619, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1262778111846061, | |
| "grad_norm": 0.8466002345085144, | |
| "learning_rate": 7.862730654324899e-05, | |
| "loss": 0.1448, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.130286630587292, | |
| "grad_norm": 0.7490071058273315, | |
| "learning_rate": 7.843572088745019e-05, | |
| "loss": 0.1649, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.134295449989978, | |
| "grad_norm": 0.7291231751441956, | |
| "learning_rate": 7.824351615279557e-05, | |
| "loss": 0.1604, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.138304269392664, | |
| "grad_norm": 1.1249662637710571, | |
| "learning_rate": 7.80506965238067e-05, | |
| "loss": 0.1383, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.1423130887953499, | |
| "grad_norm": 0.8020785450935364, | |
| "learning_rate": 7.785726619839212e-05, | |
| "loss": 0.1565, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.1463219081980356, | |
| "grad_norm": 0.9652583599090576, | |
| "learning_rate": 7.766322938775589e-05, | |
| "loss": 0.1513, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.1503307276007215, | |
| "grad_norm": 0.8806086182594299, | |
| "learning_rate": 7.746859031630605e-05, | |
| "loss": 0.1607, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.1543395470034075, | |
| "grad_norm": 0.9319799542427063, | |
| "learning_rate": 7.72733532215625e-05, | |
| "loss": 0.1588, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.1583483664060934, | |
| "grad_norm": 0.9107722640037537, | |
| "learning_rate": 7.707752235406485e-05, | |
| "loss": 0.1445, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.1623571858087793, | |
| "grad_norm": 0.9413526654243469, | |
| "learning_rate": 7.688110197727975e-05, | |
| "loss": 0.1589, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.1663660052114653, | |
| "grad_norm": 0.9594728350639343, | |
| "learning_rate": 7.668409636750828e-05, | |
| "loss": 0.1584, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.1703748246141512, | |
| "grad_norm": 0.7484379410743713, | |
| "learning_rate": 7.648650981379264e-05, | |
| "loss": 0.1582, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.174383644016837, | |
| "grad_norm": 0.840965747833252, | |
| "learning_rate": 7.628834661782288e-05, | |
| "loss": 0.1563, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.1783924634195229, | |
| "grad_norm": 0.7504467368125916, | |
| "learning_rate": 7.608961109384321e-05, | |
| "loss": 0.145, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.1824012828222088, | |
| "grad_norm": 1.0212056636810303, | |
| "learning_rate": 7.589030756855813e-05, | |
| "loss": 0.1562, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.1864101022248947, | |
| "grad_norm": 0.9360294342041016, | |
| "learning_rate": 7.569044038103813e-05, | |
| "loss": 0.156, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.1904189216275807, | |
| "grad_norm": 0.649131178855896, | |
| "learning_rate": 7.549001388262535e-05, | |
| "loss": 0.1713, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.1944277410302666, | |
| "grad_norm": 1.106505274772644, | |
| "learning_rate": 7.528903243683874e-05, | |
| "loss": 0.1475, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.1984365604329525, | |
| "grad_norm": 0.8083673119544983, | |
| "learning_rate": 7.508750041927914e-05, | |
| "loss": 0.1512, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.2024453798356385, | |
| "grad_norm": 0.7395840287208557, | |
| "learning_rate": 7.488542221753394e-05, | |
| "loss": 0.1481, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2064541992383244, | |
| "grad_norm": 0.923462986946106, | |
| "learning_rate": 7.46828022310816e-05, | |
| "loss": 0.1537, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.2104630186410101, | |
| "grad_norm": 0.8510660529136658, | |
| "learning_rate": 7.44796448711959e-05, | |
| "loss": 0.1525, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.214471838043696, | |
| "grad_norm": 0.881767749786377, | |
| "learning_rate": 7.427595456084981e-05, | |
| "loss": 0.1641, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.218480657446382, | |
| "grad_norm": 0.8366743326187134, | |
| "learning_rate": 7.407173573461934e-05, | |
| "loss": 0.1502, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.222489476849068, | |
| "grad_norm": 0.8755321502685547, | |
| "learning_rate": 7.386699283858683e-05, | |
| "loss": 0.1495, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.2264982962517539, | |
| "grad_norm": 0.841222882270813, | |
| "learning_rate": 7.366173033024428e-05, | |
| "loss": 0.1423, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.2305071156544398, | |
| "grad_norm": 0.8285235166549683, | |
| "learning_rate": 7.345595267839621e-05, | |
| "loss": 0.1632, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.2345159350571258, | |
| "grad_norm": 0.764156699180603, | |
| "learning_rate": 7.324966436306246e-05, | |
| "loss": 0.1466, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.2385247544598115, | |
| "grad_norm": 1.1134533882141113, | |
| "learning_rate": 7.30428698753806e-05, | |
| "loss": 0.1393, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.2425335738624974, | |
| "grad_norm": 0.8127875328063965, | |
| "learning_rate": 7.283557371750813e-05, | |
| "loss": 0.1597, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2465423932651833, | |
| "grad_norm": 0.8257074356079102, | |
| "learning_rate": 7.262778040252455e-05, | |
| "loss": 0.1659, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.2505512126678693, | |
| "grad_norm": 0.7807098031044006, | |
| "learning_rate": 7.2419494454333e-05, | |
| "loss": 0.1476, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.2545600320705552, | |
| "grad_norm": 0.7114003300666809, | |
| "learning_rate": 7.221072040756188e-05, | |
| "loss": 0.1467, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.2585688514732412, | |
| "grad_norm": 0.7870392203330994, | |
| "learning_rate": 7.2001462807466e-05, | |
| "loss": 0.1471, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.262577670875927, | |
| "grad_norm": 0.6909427046775818, | |
| "learning_rate": 7.179172620982774e-05, | |
| "loss": 0.1575, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.266586490278613, | |
| "grad_norm": 0.8754594922065735, | |
| "learning_rate": 7.158151518085776e-05, | |
| "loss": 0.155, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.270595309681299, | |
| "grad_norm": 0.7454276084899902, | |
| "learning_rate": 7.137083429709573e-05, | |
| "loss": 0.1431, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.274604129083985, | |
| "grad_norm": 0.9142866134643555, | |
| "learning_rate": 7.115968814531052e-05, | |
| "loss": 0.1342, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.2786129484866706, | |
| "grad_norm": 0.8666753768920898, | |
| "learning_rate": 7.09480813224005e-05, | |
| "loss": 0.142, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.2826217678893566, | |
| "grad_norm": 0.8461101651191711, | |
| "learning_rate": 7.073601843529333e-05, | |
| "loss": 0.1396, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2866305872920425, | |
| "grad_norm": 0.8602980375289917, | |
| "learning_rate": 7.052350410084574e-05, | |
| "loss": 0.1435, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.2906394066947284, | |
| "grad_norm": 1.0527535676956177, | |
| "learning_rate": 7.031054294574303e-05, | |
| "loss": 0.1474, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.2946482260974144, | |
| "grad_norm": 0.84455806016922, | |
| "learning_rate": 7.009713960639826e-05, | |
| "loss": 0.1565, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.2986570455001, | |
| "grad_norm": 0.7223050594329834, | |
| "learning_rate": 6.98832987288514e-05, | |
| "loss": 0.1482, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.302665864902786, | |
| "grad_norm": 0.8750767111778259, | |
| "learning_rate": 6.966902496866807e-05, | |
| "loss": 0.1611, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.306674684305472, | |
| "grad_norm": 0.7444009184837341, | |
| "learning_rate": 6.945432299083834e-05, | |
| "loss": 0.1647, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.310683503708158, | |
| "grad_norm": 1.013881802558899, | |
| "learning_rate": 6.9239197469675e-05, | |
| "loss": 0.1412, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.3146923231108438, | |
| "grad_norm": 0.8479213118553162, | |
| "learning_rate": 6.902365308871193e-05, | |
| "loss": 0.1369, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.3187011425135298, | |
| "grad_norm": 0.8772777318954468, | |
| "learning_rate": 6.880769454060201e-05, | |
| "loss": 0.1501, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.3227099619162157, | |
| "grad_norm": 0.8388547301292419, | |
| "learning_rate": 6.859132652701514e-05, | |
| "loss": 0.1402, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.3267187813189016, | |
| "grad_norm": 0.8197916746139526, | |
| "learning_rate": 6.837455375853561e-05, | |
| "loss": 0.1351, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.3307276007215876, | |
| "grad_norm": 0.9061885476112366, | |
| "learning_rate": 6.815738095455984e-05, | |
| "loss": 0.139, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.3347364201242735, | |
| "grad_norm": 0.721653938293457, | |
| "learning_rate": 6.793981284319339e-05, | |
| "loss": 0.1556, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.3387452395269592, | |
| "grad_norm": 0.9494278430938721, | |
| "learning_rate": 6.772185416114814e-05, | |
| "loss": 0.1423, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.3427540589296452, | |
| "grad_norm": 0.8513092994689941, | |
| "learning_rate": 6.750350965363919e-05, | |
| "loss": 0.1393, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.346762878332331, | |
| "grad_norm": 0.8258860111236572, | |
| "learning_rate": 6.728478407428151e-05, | |
| "loss": 0.146, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.350771697735017, | |
| "grad_norm": 0.8146616220474243, | |
| "learning_rate": 6.706568218498639e-05, | |
| "loss": 0.148, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.354780517137703, | |
| "grad_norm": 0.9726580381393433, | |
| "learning_rate": 6.684620875585787e-05, | |
| "loss": 0.1404, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.358789336540389, | |
| "grad_norm": 1.0220385789871216, | |
| "learning_rate": 6.662636856508887e-05, | |
| "loss": 0.1504, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.3627981559430746, | |
| "grad_norm": 0.9221115708351135, | |
| "learning_rate": 6.640616639885708e-05, | |
| "loss": 0.1407, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.3668069753457606, | |
| "grad_norm": 0.9321884512901306, | |
| "learning_rate": 6.618560705122086e-05, | |
| "loss": 0.1286, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.3708157947484465, | |
| "grad_norm": 0.8789135217666626, | |
| "learning_rate": 6.596469532401483e-05, | |
| "loss": 0.1478, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.3748246141511324, | |
| "grad_norm": 0.8220512270927429, | |
| "learning_rate": 6.574343602674528e-05, | |
| "loss": 0.1439, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.3788334335538184, | |
| "grad_norm": 1.0369560718536377, | |
| "learning_rate": 6.552183397648555e-05, | |
| "loss": 0.1323, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.3828422529565043, | |
| "grad_norm": 1.0133991241455078, | |
| "learning_rate": 6.529989399777109e-05, | |
| "loss": 0.1472, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.3868510723591903, | |
| "grad_norm": 0.9306389093399048, | |
| "learning_rate": 6.507762092249448e-05, | |
| "loss": 0.1446, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.3908598917618762, | |
| "grad_norm": 1.021039366722107, | |
| "learning_rate": 6.485501958980016e-05, | |
| "loss": 0.1341, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.3948687111645621, | |
| "grad_norm": 0.7612369656562805, | |
| "learning_rate": 6.463209484597913e-05, | |
| "loss": 0.1437, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.398877530567248, | |
| "grad_norm": 0.7720378041267395, | |
| "learning_rate": 6.440885154436344e-05, | |
| "loss": 0.1184, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.4028863499699338, | |
| "grad_norm": 0.9269343614578247, | |
| "learning_rate": 6.418529454522051e-05, | |
| "loss": 0.1474, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4068951693726197, | |
| "grad_norm": 0.8597378730773926, | |
| "learning_rate": 6.396142871564731e-05, | |
| "loss": 0.1395, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.4109039887753057, | |
| "grad_norm": 0.9362756013870239, | |
| "learning_rate": 6.373725892946443e-05, | |
| "loss": 0.1476, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.4149128081779916, | |
| "grad_norm": 0.8636417388916016, | |
| "learning_rate": 6.351279006710994e-05, | |
| "loss": 0.1333, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.4189216275806775, | |
| "grad_norm": 0.9320933818817139, | |
| "learning_rate": 6.328802701553313e-05, | |
| "loss": 0.1464, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.4229304469833635, | |
| "grad_norm": 1.1692008972167969, | |
| "learning_rate": 6.306297466808818e-05, | |
| "loss": 0.1515, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.4269392663860492, | |
| "grad_norm": 0.7800849676132202, | |
| "learning_rate": 6.283763792442751e-05, | |
| "loss": 0.1414, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.4309480857887351, | |
| "grad_norm": 1.0798330307006836, | |
| "learning_rate": 6.261202169039526e-05, | |
| "loss": 0.1478, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.434956905191421, | |
| "grad_norm": 0.8681895136833191, | |
| "learning_rate": 6.23861308779203e-05, | |
| "loss": 0.1413, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.438965724594107, | |
| "grad_norm": 1.3371766805648804, | |
| "learning_rate": 6.21599704049095e-05, | |
| "loss": 0.132, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.442974543996793, | |
| "grad_norm": 0.923513650894165, | |
| "learning_rate": 6.19335451951405e-05, | |
| "loss": 0.1435, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.4469833633994789, | |
| "grad_norm": 0.9107206463813782, | |
| "learning_rate": 6.170686017815456e-05, | |
| "loss": 0.1219, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.4509921828021648, | |
| "grad_norm": 0.9753092527389526, | |
| "learning_rate": 6.147992028914926e-05, | |
| "loss": 0.1426, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.4550010022048507, | |
| "grad_norm": 0.9150570631027222, | |
| "learning_rate": 6.125273046887106e-05, | |
| "loss": 0.1342, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.4590098216075367, | |
| "grad_norm": 1.0572060346603394, | |
| "learning_rate": 6.10252956635077e-05, | |
| "loss": 0.1274, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.4630186410102226, | |
| "grad_norm": 0.7989734411239624, | |
| "learning_rate": 6.079762082458049e-05, | |
| "loss": 0.1385, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.4670274604129083, | |
| "grad_norm": 0.8875731229782104, | |
| "learning_rate": 6.056971090883665e-05, | |
| "loss": 0.1413, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.4710362798155943, | |
| "grad_norm": 0.9534810185432434, | |
| "learning_rate": 6.0341570878141184e-05, | |
| "loss": 0.1267, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.4750450992182802, | |
| "grad_norm": 0.7729069590568542, | |
| "learning_rate": 6.0113205699369056e-05, | |
| "loss": 0.1469, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.4790539186209661, | |
| "grad_norm": 0.6528967022895813, | |
| "learning_rate": 5.988462034429692e-05, | |
| "loss": 0.1314, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.483062738023652, | |
| "grad_norm": 1.0471932888031006, | |
| "learning_rate": 5.965581978949494e-05, | |
| "loss": 0.1294, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.487071557426338, | |
| "grad_norm": 0.8370137810707092, | |
| "learning_rate": 5.942680901621842e-05, | |
| "loss": 0.1507, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.4910803768290237, | |
| "grad_norm": 0.7025067210197449, | |
| "learning_rate": 5.9197593010299377e-05, | |
| "loss": 0.1386, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.4950891962317097, | |
| "grad_norm": 0.9664121866226196, | |
| "learning_rate": 5.8968176762037985e-05, | |
| "loss": 0.145, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.4990980156343956, | |
| "grad_norm": 0.8898931741714478, | |
| "learning_rate": 5.87385652660939e-05, | |
| "loss": 0.1386, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.5031068350370815, | |
| "grad_norm": 0.750616192817688, | |
| "learning_rate": 5.850876352137759e-05, | |
| "loss": 0.153, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.5071156544397675, | |
| "grad_norm": 1.0957409143447876, | |
| "learning_rate": 5.827877653094144e-05, | |
| "loss": 0.1329, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.5111244738424534, | |
| "grad_norm": 0.8789597749710083, | |
| "learning_rate": 5.8048609301870816e-05, | |
| "loss": 0.1329, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.5151332932451393, | |
| "grad_norm": 0.7944477200508118, | |
| "learning_rate": 5.781826684517515e-05, | |
| "loss": 0.1256, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.5191421126478253, | |
| "grad_norm": 0.8657981753349304, | |
| "learning_rate": 5.758775417567878e-05, | |
| "loss": 0.1266, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.5231509320505112, | |
| "grad_norm": 0.8267760276794434, | |
| "learning_rate": 5.73570763119117e-05, | |
| "loss": 0.1269, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5271597514531972, | |
| "grad_norm": 0.9449699521064758, | |
| "learning_rate": 5.7126238276000474e-05, | |
| "loss": 0.1331, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.531168570855883, | |
| "grad_norm": 1.0582398176193237, | |
| "learning_rate": 5.689524509355873e-05, | |
| "loss": 0.1277, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.5351773902585688, | |
| "grad_norm": 0.8139535784721375, | |
| "learning_rate": 5.6664101793577865e-05, | |
| "loss": 0.1275, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.5391862096612547, | |
| "grad_norm": 0.7074098587036133, | |
| "learning_rate": 5.643281340831745e-05, | |
| "loss": 0.1307, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.5431950290639407, | |
| "grad_norm": 0.858897864818573, | |
| "learning_rate": 5.6201384973195825e-05, | |
| "loss": 0.1296, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.5472038484666266, | |
| "grad_norm": 0.984902560710907, | |
| "learning_rate": 5.596982152668029e-05, | |
| "loss": 0.1315, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.5512126678693123, | |
| "grad_norm": 0.9450563192367554, | |
| "learning_rate": 5.5738128110177523e-05, | |
| "loss": 0.1275, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.5552214872719983, | |
| "grad_norm": 1.13248610496521, | |
| "learning_rate": 5.550630976792385e-05, | |
| "loss": 0.1364, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.5592303066746842, | |
| "grad_norm": 0.9023851752281189, | |
| "learning_rate": 5.5274371546875304e-05, | |
| "loss": 0.1262, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.5632391260773701, | |
| "grad_norm": 0.9542123079299927, | |
| "learning_rate": 5.5042318496597876e-05, | |
| "loss": 0.1398, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.567247945480056, | |
| "grad_norm": 0.8645676374435425, | |
| "learning_rate": 5.4810155669157495e-05, | |
| "loss": 0.1356, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.571256764882742, | |
| "grad_norm": 0.8348353505134583, | |
| "learning_rate": 5.457788811901008e-05, | |
| "loss": 0.1431, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.575265584285428, | |
| "grad_norm": 0.8592683672904968, | |
| "learning_rate": 5.434552090289145e-05, | |
| "loss": 0.1243, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.579274403688114, | |
| "grad_norm": 0.9037445187568665, | |
| "learning_rate": 5.411305907970734e-05, | |
| "loss": 0.1201, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.5832832230907998, | |
| "grad_norm": 0.7110516428947449, | |
| "learning_rate": 5.3880507710423134e-05, | |
| "loss": 0.1331, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.5872920424934858, | |
| "grad_norm": 0.8847816586494446, | |
| "learning_rate": 5.3647871857953735e-05, | |
| "loss": 0.1224, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.5913008618961717, | |
| "grad_norm": 0.9340296983718872, | |
| "learning_rate": 5.341515658705339e-05, | |
| "loss": 0.1315, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.5953096812988576, | |
| "grad_norm": 0.9499775767326355, | |
| "learning_rate": 5.318236696420534e-05, | |
| "loss": 0.1338, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.5993185007015434, | |
| "grad_norm": 0.9325523972511292, | |
| "learning_rate": 5.294950805751158e-05, | |
| "loss": 0.1277, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.6033273201042293, | |
| "grad_norm": 0.9514039158821106, | |
| "learning_rate": 5.271658493658245e-05, | |
| "loss": 0.1287, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6073361395069152, | |
| "grad_norm": 1.022368311882019, | |
| "learning_rate": 5.248360267242637e-05, | |
| "loss": 0.1363, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.6113449589096012, | |
| "grad_norm": 0.8409161567687988, | |
| "learning_rate": 5.2250566337339326e-05, | |
| "loss": 0.1341, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.6153537783122869, | |
| "grad_norm": 1.0613347291946411, | |
| "learning_rate": 5.201748100479452e-05, | |
| "loss": 0.1329, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.6193625977149728, | |
| "grad_norm": 0.8661359548568726, | |
| "learning_rate": 5.178435174933188e-05, | |
| "loss": 0.119, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.6233714171176588, | |
| "grad_norm": 0.9642584919929504, | |
| "learning_rate": 5.15511836464476e-05, | |
| "loss": 0.1279, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.6273802365203447, | |
| "grad_norm": 0.9616632461547852, | |
| "learning_rate": 5.131798177248357e-05, | |
| "loss": 0.1294, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.6313890559230306, | |
| "grad_norm": 1.1416373252868652, | |
| "learning_rate": 5.108475120451702e-05, | |
| "loss": 0.1394, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.6353978753257166, | |
| "grad_norm": 0.9488154649734497, | |
| "learning_rate": 5.085149702024977e-05, | |
| "loss": 0.1222, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.6394066947284025, | |
| "grad_norm": 1.030707597732544, | |
| "learning_rate": 5.061822429789788e-05, | |
| "loss": 0.1304, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.6434155141310884, | |
| "grad_norm": 1.0803980827331543, | |
| "learning_rate": 5.038493811608095e-05, | |
| "loss": 0.1326, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.6474243335337744, | |
| "grad_norm": 0.8971238136291504, | |
| "learning_rate": 5.015164355371164e-05, | |
| "loss": 0.1163, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.6514331529364603, | |
| "grad_norm": 0.7943403124809265, | |
| "learning_rate": 4.9918345689885035e-05, | |
| "loss": 0.1268, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.6554419723391463, | |
| "grad_norm": 1.109113097190857, | |
| "learning_rate": 4.968504960376815e-05, | |
| "loss": 0.1289, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.6594507917418322, | |
| "grad_norm": 1.1698325872421265, | |
| "learning_rate": 4.945176037448923e-05, | |
| "loss": 0.1138, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.663459611144518, | |
| "grad_norm": 1.1132344007492065, | |
| "learning_rate": 4.9218483081027284e-05, | |
| "loss": 0.1244, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.6674684305472038, | |
| "grad_norm": 0.8619892001152039, | |
| "learning_rate": 4.8985222802101475e-05, | |
| "loss": 0.1296, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.6714772499498898, | |
| "grad_norm": 1.010392427444458, | |
| "learning_rate": 4.875198461606047e-05, | |
| "loss": 0.1307, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.6754860693525757, | |
| "grad_norm": 0.8872926831245422, | |
| "learning_rate": 4.851877360077203e-05, | |
| "loss": 0.1241, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.6794948887552614, | |
| "grad_norm": 1.035994052886963, | |
| "learning_rate": 4.828559483351233e-05, | |
| "loss": 0.112, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.6835037081579474, | |
| "grad_norm": 1.1755554676055908, | |
| "learning_rate": 4.805245339085548e-05, | |
| "loss": 0.1198, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.6875125275606333, | |
| "grad_norm": 1.008541226387024, | |
| "learning_rate": 4.781935434856299e-05, | |
| "loss": 0.1348, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.6915213469633192, | |
| "grad_norm": 1.0429742336273193, | |
| "learning_rate": 4.758630278147327e-05, | |
| "loss": 0.1205, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.6955301663660052, | |
| "grad_norm": 0.8936703205108643, | |
| "learning_rate": 4.735330376339111e-05, | |
| "loss": 0.119, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.6995389857686911, | |
| "grad_norm": 0.9886868596076965, | |
| "learning_rate": 4.712036236697728e-05, | |
| "loss": 0.1084, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.703547805171377, | |
| "grad_norm": 0.9149814248085022, | |
| "learning_rate": 4.6887483663638084e-05, | |
| "loss": 0.1303, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.707556624574063, | |
| "grad_norm": 0.9031015634536743, | |
| "learning_rate": 4.665467272341484e-05, | |
| "loss": 0.109, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.711565443976749, | |
| "grad_norm": 1.041288137435913, | |
| "learning_rate": 4.6421934614873654e-05, | |
| "loss": 0.1246, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.7155742633794349, | |
| "grad_norm": 0.9827173352241516, | |
| "learning_rate": 4.6189274404994984e-05, | |
| "loss": 0.1252, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.7195830827821208, | |
| "grad_norm": 1.0415915250778198, | |
| "learning_rate": 4.595669715906333e-05, | |
| "loss": 0.1122, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.7235919021848067, | |
| "grad_norm": 1.0126681327819824, | |
| "learning_rate": 4.572420794055698e-05, | |
| "loss": 0.1213, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.7276007215874924, | |
| "grad_norm": 0.9639745354652405, | |
| "learning_rate": 4.549181181103778e-05, | |
| "loss": 0.1279, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.7316095409901784, | |
| "grad_norm": 1.1144078969955444, | |
| "learning_rate": 4.5259513830040875e-05, | |
| "loss": 0.1189, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.7356183603928643, | |
| "grad_norm": 1.139124870300293, | |
| "learning_rate": 4.502731905496463e-05, | |
| "loss": 0.1112, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.7396271797955503, | |
| "grad_norm": 1.0518343448638916, | |
| "learning_rate": 4.479523254096055e-05, | |
| "loss": 0.1321, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.743635999198236, | |
| "grad_norm": 0.7808403968811035, | |
| "learning_rate": 4.456325934082302e-05, | |
| "loss": 0.1391, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.747644818600922, | |
| "grad_norm": 1.047770619392395, | |
| "learning_rate": 4.433140450487962e-05, | |
| "loss": 0.1302, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.7516536380036078, | |
| "grad_norm": 0.9837223291397095, | |
| "learning_rate": 4.409967308088091e-05, | |
| "loss": 0.1193, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.7556624574062938, | |
| "grad_norm": 1.0093597173690796, | |
| "learning_rate": 4.3868070113890626e-05, | |
| "loss": 0.1163, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.7596712768089797, | |
| "grad_norm": 1.1313358545303345, | |
| "learning_rate": 4.36366006461759e-05, | |
| "loss": 0.1274, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.7636800962116657, | |
| "grad_norm": 0.9579795598983765, | |
| "learning_rate": 4.340526971709735e-05, | |
| "loss": 0.1103, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.7676889156143516, | |
| "grad_norm": 1.0444706678390503, | |
| "learning_rate": 4.317408236299952e-05, | |
| "loss": 0.1121, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.7716977350170375, | |
| "grad_norm": 0.9483968019485474, | |
| "learning_rate": 4.2943043617101134e-05, | |
| "loss": 0.1086, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.7757065544197235, | |
| "grad_norm": 1.0954207181930542, | |
| "learning_rate": 4.2712158509385495e-05, | |
| "loss": 0.1166, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.7797153738224094, | |
| "grad_norm": 1.169009804725647, | |
| "learning_rate": 4.2481432066491114e-05, | |
| "loss": 0.1164, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.7837241932250953, | |
| "grad_norm": 0.9690777063369751, | |
| "learning_rate": 4.2250869311602124e-05, | |
| "loss": 0.1237, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.7877330126277813, | |
| "grad_norm": 1.0763111114501953, | |
| "learning_rate": 4.2020475264338966e-05, | |
| "loss": 0.1382, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.791741832030467, | |
| "grad_norm": 0.924728274345398, | |
| "learning_rate": 4.179025494064916e-05, | |
| "loss": 0.104, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.795750651433153, | |
| "grad_norm": 0.9748139977455139, | |
| "learning_rate": 4.156021335269806e-05, | |
| "loss": 0.1071, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.7997594708358389, | |
| "grad_norm": 1.1556870937347412, | |
| "learning_rate": 4.133035550875968e-05, | |
| "loss": 0.1137, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.8037682902385248, | |
| "grad_norm": 1.1552350521087646, | |
| "learning_rate": 4.110068641310775e-05, | |
| "loss": 0.1207, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8077771096412105, | |
| "grad_norm": 1.115271806716919, | |
| "learning_rate": 4.0871211065906786e-05, | |
| "loss": 0.1205, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.8117859290438965, | |
| "grad_norm": 0.9051127433776855, | |
| "learning_rate": 4.0641934463103054e-05, | |
| "loss": 0.1123, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.8157947484465824, | |
| "grad_norm": 1.0964293479919434, | |
| "learning_rate": 4.0412861596316013e-05, | |
| "loss": 0.1092, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.8198035678492683, | |
| "grad_norm": 1.308677315711975, | |
| "learning_rate": 4.0183997452729534e-05, | |
| "loss": 0.1182, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.8238123872519543, | |
| "grad_norm": 0.9863505959510803, | |
| "learning_rate": 3.99553470149833e-05, | |
| "loss": 0.1138, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.8278212066546402, | |
| "grad_norm": 0.9477949142456055, | |
| "learning_rate": 3.9726915261064426e-05, | |
| "loss": 0.123, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.8318300260573261, | |
| "grad_norm": 1.130746841430664, | |
| "learning_rate": 3.9498707164198984e-05, | |
| "loss": 0.1096, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.835838845460012, | |
| "grad_norm": 1.0901241302490234, | |
| "learning_rate": 3.927072769274377e-05, | |
| "loss": 0.1062, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.839847664862698, | |
| "grad_norm": 0.79862380027771, | |
| "learning_rate": 3.904298181007817e-05, | |
| "loss": 0.1117, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.843856484265384, | |
| "grad_norm": 0.8396957516670227, | |
| "learning_rate": 3.881547447449606e-05, | |
| "loss": 0.1247, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.84786530366807, | |
| "grad_norm": 1.0613499879837036, | |
| "learning_rate": 3.858821063909782e-05, | |
| "loss": 0.1101, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.8518741230707558, | |
| "grad_norm": 1.147533655166626, | |
| "learning_rate": 3.8361195251682614e-05, | |
| "loss": 0.1141, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.8558829424734415, | |
| "grad_norm": 1.1135718822479248, | |
| "learning_rate": 3.8134433254640576e-05, | |
| "loss": 0.1266, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.8598917618761275, | |
| "grad_norm": 1.0798869132995605, | |
| "learning_rate": 3.790792958484522e-05, | |
| "loss": 0.1132, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.8639005812788134, | |
| "grad_norm": 0.9285503029823303, | |
| "learning_rate": 3.7681689173545984e-05, | |
| "loss": 0.1059, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.8679094006814991, | |
| "grad_norm": 1.1934738159179688, | |
| "learning_rate": 3.745571694626088e-05, | |
| "loss": 0.1013, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.871918220084185, | |
| "grad_norm": 1.0734087228775024, | |
| "learning_rate": 3.7230017822669204e-05, | |
| "loss": 0.1056, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.875927039486871, | |
| "grad_norm": 0.9423579573631287, | |
| "learning_rate": 3.700459671650452e-05, | |
| "loss": 0.1193, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.879935858889557, | |
| "grad_norm": 0.9041392803192139, | |
| "learning_rate": 3.677945853544755e-05, | |
| "loss": 0.1098, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.8839446782922429, | |
| "grad_norm": 1.1040509939193726, | |
| "learning_rate": 3.6554608181019465e-05, | |
| "loss": 0.1195, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.8879534976949288, | |
| "grad_norm": 1.2079628705978394, | |
| "learning_rate": 3.633005054847514e-05, | |
| "loss": 0.12, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.8919623170976148, | |
| "grad_norm": 0.9661321640014648, | |
| "learning_rate": 3.6105790526696445e-05, | |
| "loss": 0.1128, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.8959711365003007, | |
| "grad_norm": 1.2310171127319336, | |
| "learning_rate": 3.588183299808604e-05, | |
| "loss": 0.1165, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.8999799559029866, | |
| "grad_norm": 0.9907431602478027, | |
| "learning_rate": 3.565818283846089e-05, | |
| "loss": 0.1037, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.9039887753056726, | |
| "grad_norm": 0.9235789775848389, | |
| "learning_rate": 3.543484491694615e-05, | |
| "loss": 0.0974, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.9079975947083585, | |
| "grad_norm": 1.1032791137695312, | |
| "learning_rate": 3.521182409586925e-05, | |
| "loss": 0.1223, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.9120064141110444, | |
| "grad_norm": 1.138131856918335, | |
| "learning_rate": 3.4989125230653965e-05, | |
| "loss": 0.1085, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.9160152335137302, | |
| "grad_norm": 1.0244325399398804, | |
| "learning_rate": 3.476675316971466e-05, | |
| "loss": 0.0997, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.920024052916416, | |
| "grad_norm": 1.141847014427185, | |
| "learning_rate": 3.454471275435083e-05, | |
| "loss": 0.1054, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.924032872319102, | |
| "grad_norm": 0.9330345988273621, | |
| "learning_rate": 3.4323008818641696e-05, | |
| "loss": 0.1065, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.928041691721788, | |
| "grad_norm": 0.9627101421356201, | |
| "learning_rate": 3.410164618934082e-05, | |
| "loss": 0.0913, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.9320505111244737, | |
| "grad_norm": 0.9817176461219788, | |
| "learning_rate": 3.388062968577124e-05, | |
| "loss": 0.1243, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.9360593305271596, | |
| "grad_norm": 1.1931806802749634, | |
| "learning_rate": 3.3659964119720356e-05, | |
| "loss": 0.1068, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.9400681499298456, | |
| "grad_norm": 1.1554603576660156, | |
| "learning_rate": 3.3439654295335274e-05, | |
| "loss": 0.1116, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.9440769693325315, | |
| "grad_norm": 1.0284534692764282, | |
| "learning_rate": 3.321970500901819e-05, | |
| "loss": 0.1021, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.9480857887352174, | |
| "grad_norm": 0.9820400476455688, | |
| "learning_rate": 3.3000121049321956e-05, | |
| "loss": 0.093, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.9520946081379034, | |
| "grad_norm": 0.9649590849876404, | |
| "learning_rate": 3.2780907196845845e-05, | |
| "loss": 0.105, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.9561034275405893, | |
| "grad_norm": 1.1404318809509277, | |
| "learning_rate": 3.256206822413145e-05, | |
| "loss": 0.1028, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.9601122469432752, | |
| "grad_norm": 0.8916597366333008, | |
| "learning_rate": 3.234360889555884e-05, | |
| "loss": 0.114, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.9641210663459612, | |
| "grad_norm": 1.0824633836746216, | |
| "learning_rate": 3.2125533967242704e-05, | |
| "loss": 0.1047, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.9681298857486471, | |
| "grad_norm": 1.3187285661697388, | |
| "learning_rate": 3.190784818692897e-05, | |
| "loss": 0.1035, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.972138705151333, | |
| "grad_norm": 1.2455309629440308, | |
| "learning_rate": 3.169055629389132e-05, | |
| "loss": 0.1032, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.976147524554019, | |
| "grad_norm": 0.8298673629760742, | |
| "learning_rate": 3.147366301882805e-05, | |
| "loss": 0.1028, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.9801563439567047, | |
| "grad_norm": 1.0020873546600342, | |
| "learning_rate": 3.1257173083759086e-05, | |
| "loss": 0.1167, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.9841651633593906, | |
| "grad_norm": 1.1114490032196045, | |
| "learning_rate": 3.104109120192317e-05, | |
| "loss": 0.0998, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.9881739827620766, | |
| "grad_norm": 1.0216310024261475, | |
| "learning_rate": 3.082542207767523e-05, | |
| "loss": 0.1189, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.9921828021647625, | |
| "grad_norm": 1.2210197448730469, | |
| "learning_rate": 3.0610170406384045e-05, | |
| "loss": 0.1088, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.9961916215674482, | |
| "grad_norm": 1.0631357431411743, | |
| "learning_rate": 3.0395340874329837e-05, | |
| "loss": 0.1098, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.2607094049453735, | |
| "learning_rate": 3.0180938158602483e-05, | |
| "loss": 0.1112, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.004008819402686, | |
| "grad_norm": 1.0189579725265503, | |
| "learning_rate": 2.996696692699952e-05, | |
| "loss": 0.0646, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.008017638805372, | |
| "grad_norm": 1.3740028142929077, | |
| "learning_rate": 2.9753431837924545e-05, | |
| "loss": 0.083, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.012026458208058, | |
| "grad_norm": 1.5331498384475708, | |
| "learning_rate": 2.9540337540285868e-05, | |
| "loss": 0.0717, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.0160352776107437, | |
| "grad_norm": 1.4585199356079102, | |
| "learning_rate": 2.9327688673395236e-05, | |
| "loss": 0.071, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.0200440970134297, | |
| "grad_norm": 1.2551562786102295, | |
| "learning_rate": 2.911548986686683e-05, | |
| "loss": 0.0805, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.0240529164161156, | |
| "grad_norm": 0.9197141528129578, | |
| "learning_rate": 2.890374574051654e-05, | |
| "loss": 0.0747, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.0280617358188016, | |
| "grad_norm": 1.359002947807312, | |
| "learning_rate": 2.869246090426131e-05, | |
| "loss": 0.0746, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.0320705552214875, | |
| "grad_norm": 1.1969698667526245, | |
| "learning_rate": 2.8481639958018758e-05, | |
| "loss": 0.0703, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.036079374624173, | |
| "grad_norm": 1.179650902748108, | |
| "learning_rate": 2.827128749160715e-05, | |
| "loss": 0.0744, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.040088194026859, | |
| "grad_norm": 1.3521727323532104, | |
| "learning_rate": 2.8061408084645358e-05, | |
| "loss": 0.0712, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.044097013429545, | |
| "grad_norm": 1.1871998310089111, | |
| "learning_rate": 2.78520063064532e-05, | |
| "loss": 0.0617, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.048105832832231, | |
| "grad_norm": 1.5966202020645142, | |
| "learning_rate": 2.7643086715951964e-05, | |
| "loss": 0.0822, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.0521146522349167, | |
| "grad_norm": 1.5227017402648926, | |
| "learning_rate": 2.7434653861565175e-05, | |
| "loss": 0.0782, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.0561234716376027, | |
| "grad_norm": 1.2805331945419312, | |
| "learning_rate": 2.7226712281119448e-05, | |
| "loss": 0.065, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.0601322910402886, | |
| "grad_norm": 1.0091133117675781, | |
| "learning_rate": 2.701926650174592e-05, | |
| "loss": 0.0771, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.0641411104429745, | |
| "grad_norm": 1.114707589149475, | |
| "learning_rate": 2.6812321039781507e-05, | |
| "loss": 0.0796, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.0681499298456605, | |
| "grad_norm": 1.2408510446548462, | |
| "learning_rate": 2.6605880400670573e-05, | |
| "loss": 0.0624, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.0721587492483464, | |
| "grad_norm": 1.6427772045135498, | |
| "learning_rate": 2.639994907886697e-05, | |
| "loss": 0.0682, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.0761675686510324, | |
| "grad_norm": 1.5963749885559082, | |
| "learning_rate": 2.61945315577361e-05, | |
| "loss": 0.0594, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.0801763880537183, | |
| "grad_norm": 1.3586746454238892, | |
| "learning_rate": 2.5989632309457318e-05, | |
| "loss": 0.0764, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.0841852074564042, | |
| "grad_norm": 1.2391273975372314, | |
| "learning_rate": 2.5785255794926573e-05, | |
| "loss": 0.0554, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.08819402685909, | |
| "grad_norm": 1.079005241394043, | |
| "learning_rate": 2.558140646365929e-05, | |
| "loss": 0.0618, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.092202846261776, | |
| "grad_norm": 1.494195580482483, | |
| "learning_rate": 2.537808875369351e-05, | |
| "loss": 0.0745, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.0962116656644616, | |
| "grad_norm": 1.301558256149292, | |
| "learning_rate": 2.5175307091493255e-05, | |
| "loss": 0.0661, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.1002204850671475, | |
| "grad_norm": 1.9365872144699097, | |
| "learning_rate": 2.497306589185212e-05, | |
| "loss": 0.0726, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.1042293044698335, | |
| "grad_norm": 1.3354028463363647, | |
| "learning_rate": 2.4771369557797264e-05, | |
| "loss": 0.0742, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.1082381238725194, | |
| "grad_norm": 1.2166097164154053, | |
| "learning_rate": 2.4570222480493437e-05, | |
| "loss": 0.0763, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.1122469432752053, | |
| "grad_norm": 1.8834477663040161, | |
| "learning_rate": 2.4369629039147458e-05, | |
| "loss": 0.0657, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.1162557626778913, | |
| "grad_norm": 1.174580693244934, | |
| "learning_rate": 2.416959360091283e-05, | |
| "loss": 0.0725, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.120264582080577, | |
| "grad_norm": 1.636049747467041, | |
| "learning_rate": 2.397012052079469e-05, | |
| "loss": 0.0677, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.124273401483263, | |
| "grad_norm": 1.0291727781295776, | |
| "learning_rate": 2.3771214141554932e-05, | |
| "loss": 0.072, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.128282220885949, | |
| "grad_norm": 1.4774961471557617, | |
| "learning_rate": 2.3572878793617785e-05, | |
| "loss": 0.0626, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.132291040288635, | |
| "grad_norm": 1.3647609949111938, | |
| "learning_rate": 2.3375118794975436e-05, | |
| "loss": 0.0822, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.136299859691321, | |
| "grad_norm": 1.2686548233032227, | |
| "learning_rate": 2.3177938451093994e-05, | |
| "loss": 0.0654, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.140308679094007, | |
| "grad_norm": 1.0904122591018677, | |
| "learning_rate": 2.298134205481986e-05, | |
| "loss": 0.0788, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.144317498496693, | |
| "grad_norm": 1.5554347038269043, | |
| "learning_rate": 2.278533388628621e-05, | |
| "loss": 0.0618, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.148326317899379, | |
| "grad_norm": 1.4490818977355957, | |
| "learning_rate": 2.2589918212819787e-05, | |
| "loss": 0.0714, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.1523351373020647, | |
| "grad_norm": 1.056925892829895, | |
| "learning_rate": 2.2395099288848066e-05, | |
| "loss": 0.0787, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.1563439567047507, | |
| "grad_norm": 1.642364740371704, | |
| "learning_rate": 2.2200881355806565e-05, | |
| "loss": 0.0766, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.1603527761074366, | |
| "grad_norm": 1.0628970861434937, | |
| "learning_rate": 2.2007268642046476e-05, | |
| "loss": 0.0557, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.164361595510122, | |
| "grad_norm": 0.9546886086463928, | |
| "learning_rate": 2.181426536274277e-05, | |
| "loss": 0.0591, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.168370414912808, | |
| "grad_norm": 1.6997793912887573, | |
| "learning_rate": 2.1621875719802258e-05, | |
| "loss": 0.069, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.172379234315494, | |
| "grad_norm": 1.2331558465957642, | |
| "learning_rate": 2.1430103901772135e-05, | |
| "loss": 0.0765, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.17638805371818, | |
| "grad_norm": 1.4007397890090942, | |
| "learning_rate": 2.1238954083748887e-05, | |
| "loss": 0.0759, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.180396873120866, | |
| "grad_norm": 1.1436303853988647, | |
| "learning_rate": 2.1048430427287304e-05, | |
| "loss": 0.0681, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.1844056925235518, | |
| "grad_norm": 0.89713454246521, | |
| "learning_rate": 2.085853708030991e-05, | |
| "loss": 0.0701, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.1884145119262377, | |
| "grad_norm": 1.5042600631713867, | |
| "learning_rate": 2.0669278177016664e-05, | |
| "loss": 0.0654, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.1924233313289236, | |
| "grad_norm": 1.211078405380249, | |
| "learning_rate": 2.0480657837794963e-05, | |
| "loss": 0.069, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.1964321507316096, | |
| "grad_norm": 0.9574674367904663, | |
| "learning_rate": 2.0292680169129828e-05, | |
| "loss": 0.0623, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.2004409701342955, | |
| "grad_norm": 1.1876091957092285, | |
| "learning_rate": 2.0105349263514728e-05, | |
| "loss": 0.0637, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.2044497895369815, | |
| "grad_norm": 1.3990014791488647, | |
| "learning_rate": 1.991866919936226e-05, | |
| "loss": 0.0659, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.2084586089396674, | |
| "grad_norm": 0.9827179312705994, | |
| "learning_rate": 1.9732644040915427e-05, | |
| "loss": 0.0603, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.2124674283423533, | |
| "grad_norm": 1.0140836238861084, | |
| "learning_rate": 1.9547277838159222e-05, | |
| "loss": 0.0574, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.2164762477450393, | |
| "grad_norm": 1.0066441297531128, | |
| "learning_rate": 1.936257462673238e-05, | |
| "loss": 0.0693, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.220485067147725, | |
| "grad_norm": 1.0355478525161743, | |
| "learning_rate": 1.9178538427839537e-05, | |
| "loss": 0.0623, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.2244938865504107, | |
| "grad_norm": 1.0241667032241821, | |
| "learning_rate": 1.8995173248163716e-05, | |
| "loss": 0.0575, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.2285027059530966, | |
| "grad_norm": 1.0945931673049927, | |
| "learning_rate": 1.8812483079779008e-05, | |
| "loss": 0.0617, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.2325115253557826, | |
| "grad_norm": 1.2159384489059448, | |
| "learning_rate": 1.863047190006375e-05, | |
| "loss": 0.0764, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.2365203447584685, | |
| "grad_norm": 1.3158025741577148, | |
| "learning_rate": 1.8449143671613962e-05, | |
| "loss": 0.0663, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.2405291641611544, | |
| "grad_norm": 1.1542671918869019, | |
| "learning_rate": 1.8268502342156918e-05, | |
| "loss": 0.064, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.2445379835638404, | |
| "grad_norm": 1.233852744102478, | |
| "learning_rate": 1.808855184446535e-05, | |
| "loss": 0.0708, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.2485468029665263, | |
| "grad_norm": 1.0461921691894531, | |
| "learning_rate": 1.7909296096271783e-05, | |
| "loss": 0.0611, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.2525556223692123, | |
| "grad_norm": 1.2904634475708008, | |
| "learning_rate": 1.773073900018321e-05, | |
| "loss": 0.0598, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.256564441771898, | |
| "grad_norm": 1.213394284248352, | |
| "learning_rate": 1.7552884443596168e-05, | |
| "loss": 0.0608, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.260573261174584, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 1.73757362986121e-05, | |
| "loss": 0.0638, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.26458208057727, | |
| "grad_norm": 1.0718966722488403, | |
| "learning_rate": 1.7199298421952987e-05, | |
| "loss": 0.0628, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.268590899979956, | |
| "grad_norm": 1.5006955862045288, | |
| "learning_rate": 1.7023574654877482e-05, | |
| "loss": 0.0591, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.272599719382642, | |
| "grad_norm": 1.0694504976272583, | |
| "learning_rate": 1.684856882309729e-05, | |
| "loss": 0.0699, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.276608538785328, | |
| "grad_norm": 1.068630337715149, | |
| "learning_rate": 1.6674284736693713e-05, | |
| "loss": 0.0599, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.280617358188014, | |
| "grad_norm": 0.9531617760658264, | |
| "learning_rate": 1.6500726190034888e-05, | |
| "loss": 0.0595, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.2846261775906997, | |
| "grad_norm": 1.1300429105758667, | |
| "learning_rate": 1.6327896961693086e-05, | |
| "loss": 0.0704, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.2886349969933857, | |
| "grad_norm": 1.248582124710083, | |
| "learning_rate": 1.6155800814362475e-05, | |
| "loss": 0.0591, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.292643816396071, | |
| "grad_norm": 1.2277759313583374, | |
| "learning_rate": 1.598444149477718e-05, | |
| "loss": 0.0644, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.296652635798757, | |
| "grad_norm": 1.4432833194732666, | |
| "learning_rate": 1.5813822733629745e-05, | |
| "loss": 0.0715, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.300661455201443, | |
| "grad_norm": 1.1492823362350464, | |
| "learning_rate": 1.5643948245489836e-05, | |
| "loss": 0.0525, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.304670274604129, | |
| "grad_norm": 1.4520362615585327, | |
| "learning_rate": 1.547482172872351e-05, | |
| "loss": 0.0536, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.308679094006815, | |
| "grad_norm": 1.236132025718689, | |
| "learning_rate": 1.530644686541258e-05, | |
| "loss": 0.0584, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.312687913409501, | |
| "grad_norm": 1.4177806377410889, | |
| "learning_rate": 1.5138827321274435e-05, | |
| "loss": 0.0597, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.316696732812187, | |
| "grad_norm": 1.0297455787658691, | |
| "learning_rate": 1.497196674558235e-05, | |
| "loss": 0.0627, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.3207055522148727, | |
| "grad_norm": 1.1963504552841187, | |
| "learning_rate": 1.4805868771085946e-05, | |
| "loss": 0.0627, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.3247143716175587, | |
| "grad_norm": 1.5588128566741943, | |
| "learning_rate": 1.4640537013932121e-05, | |
| "loss": 0.0609, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.3287231910202446, | |
| "grad_norm": 1.5374112129211426, | |
| "learning_rate": 1.4475975073586345e-05, | |
| "loss": 0.0716, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.3327320104229305, | |
| "grad_norm": 1.6463807821273804, | |
| "learning_rate": 1.431218653275424e-05, | |
| "loss": 0.0737, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.3367408298256165, | |
| "grad_norm": 1.3641928434371948, | |
| "learning_rate": 1.4149174957303629e-05, | |
| "loss": 0.0672, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.3407496492283024, | |
| "grad_norm": 1.259701132774353, | |
| "learning_rate": 1.398694389618696e-05, | |
| "loss": 0.0759, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.3447584686309884, | |
| "grad_norm": 1.2060563564300537, | |
| "learning_rate": 1.3825496881363864e-05, | |
| "loss": 0.0628, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.348767288033674, | |
| "grad_norm": 1.3083685636520386, | |
| "learning_rate": 1.3664837427724431e-05, | |
| "loss": 0.0578, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.35277610743636, | |
| "grad_norm": 1.3214398622512817, | |
| "learning_rate": 1.3504969033012615e-05, | |
| "loss": 0.06, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.3567849268390457, | |
| "grad_norm": 1.1904963254928589, | |
| "learning_rate": 1.3345895177750094e-05, | |
| "loss": 0.0617, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.3607937462417317, | |
| "grad_norm": 1.1517525911331177, | |
| "learning_rate": 1.3187619325160483e-05, | |
| "loss": 0.0528, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.3648025656444176, | |
| "grad_norm": 1.424729824066162, | |
| "learning_rate": 1.3030144921093979e-05, | |
| "loss": 0.0652, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.3688113850471035, | |
| "grad_norm": 1.4582880735397339, | |
| "learning_rate": 1.2873475393952245e-05, | |
| "loss": 0.0641, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.3728202044497895, | |
| "grad_norm": 1.2188777923583984, | |
| "learning_rate": 1.2717614154613877e-05, | |
| "loss": 0.067, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.3768290238524754, | |
| "grad_norm": 1.2932417392730713, | |
| "learning_rate": 1.2562564596360144e-05, | |
| "loss": 0.0535, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.3808378432551613, | |
| "grad_norm": 1.2565412521362305, | |
| "learning_rate": 1.2408330094800974e-05, | |
| "loss": 0.0642, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.3848466626578473, | |
| "grad_norm": 1.1354554891586304, | |
| "learning_rate": 1.225491400780162e-05, | |
| "loss": 0.0518, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.388855482060533, | |
| "grad_norm": 1.0824904441833496, | |
| "learning_rate": 1.2102319675409491e-05, | |
| "loss": 0.0593, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.392864301463219, | |
| "grad_norm": 1.3248436450958252, | |
| "learning_rate": 1.1950550419781414e-05, | |
| "loss": 0.0606, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.396873120865905, | |
| "grad_norm": 1.3530750274658203, | |
| "learning_rate": 1.1799609545111363e-05, | |
| "loss": 0.058, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.400881940268591, | |
| "grad_norm": 1.5529499053955078, | |
| "learning_rate": 1.1649500337558478e-05, | |
| "loss": 0.066, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.404890759671277, | |
| "grad_norm": 0.9849441647529602, | |
| "learning_rate": 1.15002260651755e-05, | |
| "loss": 0.0657, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.408899579073963, | |
| "grad_norm": 1.6223032474517822, | |
| "learning_rate": 1.1351789977837696e-05, | |
| "loss": 0.0687, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.412908398476649, | |
| "grad_norm": 1.4085158109664917, | |
| "learning_rate": 1.1204195307172094e-05, | |
| "loss": 0.0608, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.416917217879335, | |
| "grad_norm": 1.329626441001892, | |
| "learning_rate": 1.1057445266487016e-05, | |
| "loss": 0.0619, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.4209260372820203, | |
| "grad_norm": 1.2898280620574951, | |
| "learning_rate": 1.091154305070226e-05, | |
| "loss": 0.0653, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.424934856684706, | |
| "grad_norm": 1.290812611579895, | |
| "learning_rate": 1.0766491836279486e-05, | |
| "loss": 0.0636, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.428943676087392, | |
| "grad_norm": 1.226360559463501, | |
| "learning_rate": 1.0622294781153036e-05, | |
| "loss": 0.0486, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.432952495490078, | |
| "grad_norm": 1.4300650358200073, | |
| "learning_rate": 1.047895502466122e-05, | |
| "loss": 0.0711, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.436961314892764, | |
| "grad_norm": 1.4043900966644287, | |
| "learning_rate": 1.0336475687477964e-05, | |
| "loss": 0.0625, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.44097013429545, | |
| "grad_norm": 1.0565260648727417, | |
| "learning_rate": 1.0194859871544831e-05, | |
| "loss": 0.0561, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.444978953698136, | |
| "grad_norm": 1.1184768676757812, | |
| "learning_rate": 1.0054110660003551e-05, | |
| "loss": 0.0584, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.448987773100822, | |
| "grad_norm": 1.2461347579956055, | |
| "learning_rate": 9.914231117128841e-06, | |
| "loss": 0.0709, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.4529965925035078, | |
| "grad_norm": 1.2430334091186523, | |
| "learning_rate": 9.77522428826173e-06, | |
| "loss": 0.0606, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.4570054119061937, | |
| "grad_norm": 1.2371201515197754, | |
| "learning_rate": 9.637093199743236e-06, | |
| "loss": 0.0627, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.4610142313088796, | |
| "grad_norm": 1.1069798469543457, | |
| "learning_rate": 9.499840858848497e-06, | |
| "loss": 0.0564, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.4650230507115656, | |
| "grad_norm": 1.2197552919387817, | |
| "learning_rate": 9.363470253721268e-06, | |
| "loss": 0.0611, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.4690318701142515, | |
| "grad_norm": 1.005348563194275, | |
| "learning_rate": 9.227984353308926e-06, | |
| "loss": 0.0513, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.4730406895169375, | |
| "grad_norm": 1.237045407295227, | |
| "learning_rate": 9.09338610729773e-06, | |
| "loss": 0.0598, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.477049508919623, | |
| "grad_norm": 1.4536449909210205, | |
| "learning_rate": 8.959678446048725e-06, | |
| "loss": 0.0587, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.481058328322309, | |
| "grad_norm": 1.0354303121566772, | |
| "learning_rate": 8.826864280533853e-06, | |
| "loss": 0.0589, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.485067147724995, | |
| "grad_norm": 1.427465796470642, | |
| "learning_rate": 8.694946502272628e-06, | |
| "loss": 0.0482, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.4890759671276808, | |
| "grad_norm": 0.8941324353218079, | |
| "learning_rate": 8.563927983269154e-06, | |
| "loss": 0.0635, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.4930847865303667, | |
| "grad_norm": 1.0785568952560425, | |
| "learning_rate": 8.433811575949618e-06, | |
| "loss": 0.0622, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.4970936059330526, | |
| "grad_norm": 1.0514103174209595, | |
| "learning_rate": 8.304600113100181e-06, | |
| "loss": 0.0566, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.5011024253357386, | |
| "grad_norm": 1.5485719442367554, | |
| "learning_rate": 8.1762964078053e-06, | |
| "loss": 0.051, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.5051112447384245, | |
| "grad_norm": 1.6941360235214233, | |
| "learning_rate": 8.048903253386515e-06, | |
| "loss": 0.0497, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.5091200641411104, | |
| "grad_norm": 1.2973785400390625, | |
| "learning_rate": 7.922423423341551e-06, | |
| "loss": 0.0544, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.5131288835437964, | |
| "grad_norm": 1.2612277269363403, | |
| "learning_rate": 7.796859671284045e-06, | |
| "loss": 0.0614, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.5171377029464823, | |
| "grad_norm": 1.012540340423584, | |
| "learning_rate": 7.672214730883565e-06, | |
| "loss": 0.0655, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.5211465223491683, | |
| "grad_norm": 1.5126792192459106, | |
| "learning_rate": 7.548491315806011e-06, | |
| "loss": 0.055, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.525155341751854, | |
| "grad_norm": 1.1852082014083862, | |
| "learning_rate": 7.425692119654648e-06, | |
| "loss": 0.0621, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.52916416115454, | |
| "grad_norm": 1.29707670211792, | |
| "learning_rate": 7.3038198159114005e-06, | |
| "loss": 0.0605, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.533172980557226, | |
| "grad_norm": 1.53734290599823, | |
| "learning_rate": 7.1828770578786616e-06, | |
| "loss": 0.0581, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.537181799959912, | |
| "grad_norm": 1.385482668876648, | |
| "learning_rate": 7.062866478621538e-06, | |
| "loss": 0.0601, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.541190619362598, | |
| "grad_norm": 1.2017461061477661, | |
| "learning_rate": 6.943790690910512e-06, | |
| "loss": 0.0504, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.545199438765284, | |
| "grad_norm": 1.387803077697754, | |
| "learning_rate": 6.825652287164541e-06, | |
| "loss": 0.0574, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.54920825816797, | |
| "grad_norm": 1.3186421394348145, | |
| "learning_rate": 6.708453839394657e-06, | |
| "loss": 0.0585, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.5532170775706553, | |
| "grad_norm": 1.398294448852539, | |
| "learning_rate": 6.592197899147984e-06, | |
| "loss": 0.0694, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.5572258969733412, | |
| "grad_norm": 1.121980905532837, | |
| "learning_rate": 6.476886997452092e-06, | |
| "loss": 0.0513, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.561234716376027, | |
| "grad_norm": 1.4234181642532349, | |
| "learning_rate": 6.362523644760016e-06, | |
| "loss": 0.0546, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.565243535778713, | |
| "grad_norm": 1.428277611732483, | |
| "learning_rate": 6.24911033089548e-06, | |
| "loss": 0.0598, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.569252355181399, | |
| "grad_norm": 1.793627381324768, | |
| "learning_rate": 6.1366495249988275e-06, | |
| "loss": 0.0624, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.573261174584085, | |
| "grad_norm": 1.2697495222091675, | |
| "learning_rate": 6.0251436754731495e-06, | |
| "loss": 0.058, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.577269993986771, | |
| "grad_norm": 1.402320384979248, | |
| "learning_rate": 5.914595209931006e-06, | |
| "loss": 0.0523, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.581278813389457, | |
| "grad_norm": 1.0470867156982422, | |
| "learning_rate": 5.805006535141621e-06, | |
| "loss": 0.0645, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.585287632792143, | |
| "grad_norm": 1.0179369449615479, | |
| "learning_rate": 5.6963800369784385e-06, | |
| "loss": 0.0579, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.5892964521948287, | |
| "grad_norm": 1.298664927482605, | |
| "learning_rate": 5.588718080367195e-06, | |
| "loss": 0.0596, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.5933052715975147, | |
| "grad_norm": 1.2758408784866333, | |
| "learning_rate": 5.4820230092344385e-06, | |
| "loss": 0.0635, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.5973140910002, | |
| "grad_norm": 1.3012737035751343, | |
| "learning_rate": 5.376297146456488e-06, | |
| "loss": 0.0542, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.601322910402886, | |
| "grad_norm": 1.1228282451629639, | |
| "learning_rate": 5.271542793808837e-06, | |
| "loss": 0.0547, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.605331729805572, | |
| "grad_norm": 1.2128888368606567, | |
| "learning_rate": 5.1677622319161125e-06, | |
| "loss": 0.0582, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.609340549208258, | |
| "grad_norm": 1.4961844682693481, | |
| "learning_rate": 5.064957720202374e-06, | |
| "loss": 0.0548, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.613349368610944, | |
| "grad_norm": 1.5457652807235718, | |
| "learning_rate": 4.963131496841878e-06, | |
| "loss": 0.069, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.61735818801363, | |
| "grad_norm": 0.805304229259491, | |
| "learning_rate": 4.862285778710462e-06, | |
| "loss": 0.0454, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.621367007416316, | |
| "grad_norm": 1.2888411283493042, | |
| "learning_rate": 4.762422761337182e-06, | |
| "loss": 0.0531, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.6253758268190017, | |
| "grad_norm": 1.2126528024673462, | |
| "learning_rate": 4.663544618856575e-06, | |
| "loss": 0.0688, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.6293846462216877, | |
| "grad_norm": 1.269785761833191, | |
| "learning_rate": 4.565653503961281e-06, | |
| "loss": 0.0551, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.6333934656243736, | |
| "grad_norm": 1.2016196250915527, | |
| "learning_rate": 4.468751547855215e-06, | |
| "loss": 0.0692, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.6374022850270595, | |
| "grad_norm": 1.2045531272888184, | |
| "learning_rate": 4.372840860207123e-06, | |
| "loss": 0.0468, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.6414111044297455, | |
| "grad_norm": 0.6648709177970886, | |
| "learning_rate": 4.2779235291047105e-06, | |
| "loss": 0.053, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.6454199238324314, | |
| "grad_norm": 1.4050956964492798, | |
| "learning_rate": 4.184001621009137e-06, | |
| "loss": 0.054, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.6494287432351173, | |
| "grad_norm": 1.4628591537475586, | |
| "learning_rate": 4.091077180710029e-06, | |
| "loss": 0.0633, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.6534375626378033, | |
| "grad_norm": 0.8363884091377258, | |
| "learning_rate": 3.9991522312809945e-06, | |
| "loss": 0.0523, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.657446382040489, | |
| "grad_norm": 1.3806829452514648, | |
| "learning_rate": 3.908228774035544e-06, | |
| "loss": 0.057, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.661455201443175, | |
| "grad_norm": 1.4262441396713257, | |
| "learning_rate": 3.818308788483533e-06, | |
| "loss": 0.0458, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.665464020845861, | |
| "grad_norm": 1.5106767416000366, | |
| "learning_rate": 3.72939423228808e-06, | |
| "loss": 0.0636, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.669472840248547, | |
| "grad_norm": 1.71439790725708, | |
| "learning_rate": 3.6414870412229184e-06, | |
| "loss": 0.0601, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.673481659651233, | |
| "grad_norm": 1.167715072631836, | |
| "learning_rate": 3.5545891291302704e-06, | |
| "loss": 0.0486, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.6774904790539185, | |
| "grad_norm": 1.4082682132720947, | |
| "learning_rate": 3.4687023878791857e-06, | |
| "loss": 0.0543, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.6814992984566044, | |
| "grad_norm": 1.1505839824676514, | |
| "learning_rate": 3.3838286873243197e-06, | |
| "loss": 0.0512, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.6855081178592903, | |
| "grad_norm": 1.6079833507537842, | |
| "learning_rate": 3.2999698752652685e-06, | |
| "loss": 0.0567, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.6895169372619763, | |
| "grad_norm": 1.8533159494400024, | |
| "learning_rate": 3.2171277774063204e-06, | |
| "loss": 0.0588, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.693525756664662, | |
| "grad_norm": 1.6835551261901855, | |
| "learning_rate": 3.1353041973166965e-06, | |
| "loss": 0.0619, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.697534576067348, | |
| "grad_norm": 1.7107609510421753, | |
| "learning_rate": 3.054500916391312e-06, | |
| "loss": 0.0581, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.701543395470034, | |
| "grad_norm": 1.131338119506836, | |
| "learning_rate": 2.9747196938119614e-06, | |
| "loss": 0.0501, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.70555221487272, | |
| "grad_norm": 1.9655126333236694, | |
| "learning_rate": 2.8959622665090338e-06, | |
| "loss": 0.049, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.709561034275406, | |
| "grad_norm": 1.6402628421783447, | |
| "learning_rate": 2.818230349123724e-06, | |
| "loss": 0.0671, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.713569853678092, | |
| "grad_norm": 1.2333426475524902, | |
| "learning_rate": 2.741525633970665e-06, | |
| "loss": 0.0526, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.717578673080778, | |
| "grad_norm": 1.035477638244629, | |
| "learning_rate": 2.665849791001074e-06, | |
| "loss": 0.0479, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.7215874924834638, | |
| "grad_norm": 1.5815109014511108, | |
| "learning_rate": 2.591204467766456e-06, | |
| "loss": 0.0659, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.7255963118861493, | |
| "grad_norm": 1.1497759819030762, | |
| "learning_rate": 2.517591289382676e-06, | |
| "loss": 0.0502, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.729605131288835, | |
| "grad_norm": 1.2210969924926758, | |
| "learning_rate": 2.4450118584946002e-06, | |
| "loss": 0.0589, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.733613950691521, | |
| "grad_norm": 1.4234390258789062, | |
| "learning_rate": 2.373467755241221e-06, | |
| "loss": 0.0577, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.737622770094207, | |
| "grad_norm": 1.0173630714416504, | |
| "learning_rate": 2.302960537221227e-06, | |
| "loss": 0.0418, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.741631589496893, | |
| "grad_norm": 1.2278695106506348, | |
| "learning_rate": 2.2334917394590873e-06, | |
| "loss": 0.0429, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.745640408899579, | |
| "grad_norm": 1.5566179752349854, | |
| "learning_rate": 2.1650628743716874e-06, | |
| "loss": 0.0504, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.749649228302265, | |
| "grad_norm": 1.4304202795028687, | |
| "learning_rate": 2.097675431735341e-06, | |
| "loss": 0.0547, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.753658047704951, | |
| "grad_norm": 1.333284854888916, | |
| "learning_rate": 2.0313308786533647e-06, | |
| "loss": 0.0629, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.7576668671076368, | |
| "grad_norm": 1.5218358039855957, | |
| "learning_rate": 1.966030659524182e-06, | |
| "loss": 0.0514, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.7616756865103227, | |
| "grad_norm": 1.650320291519165, | |
| "learning_rate": 1.9017761960098302e-06, | |
| "loss": 0.0574, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.7656845059130086, | |
| "grad_norm": 1.31277334690094, | |
| "learning_rate": 1.838568887005021e-06, | |
| "loss": 0.0574, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.7696933253156946, | |
| "grad_norm": 1.2339560985565186, | |
| "learning_rate": 1.776410108606702e-06, | |
| "loss": 0.0568, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.7737021447183805, | |
| "grad_norm": 1.1014797687530518, | |
| "learning_rate": 1.7153012140840808e-06, | |
| "loss": 0.0633, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.7777109641210664, | |
| "grad_norm": 1.3516331911087036, | |
| "learning_rate": 1.6552435338491544e-06, | |
| "loss": 0.0444, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.7817197835237524, | |
| "grad_norm": 1.180059552192688, | |
| "learning_rate": 1.596238375427772e-06, | |
| "loss": 0.0529, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.7857286029264383, | |
| "grad_norm": 1.1580662727355957, | |
| "learning_rate": 1.538287023431162e-06, | |
| "loss": 0.0567, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.7897374223291243, | |
| "grad_norm": 1.3882676362991333, | |
| "learning_rate": 1.4813907395279214e-06, | |
| "loss": 0.0631, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.79374624173181, | |
| "grad_norm": 0.8586792349815369, | |
| "learning_rate": 1.4255507624166109e-06, | |
| "loss": 0.0487, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.797755061134496, | |
| "grad_norm": 1.3401018381118774, | |
| "learning_rate": 1.3707683077987588e-06, | |
| "loss": 0.059, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.801763880537182, | |
| "grad_norm": 1.160061240196228, | |
| "learning_rate": 1.3170445683523769e-06, | |
| "loss": 0.0511, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.8057726999398676, | |
| "grad_norm": 1.5262839794158936, | |
| "learning_rate": 1.264380713706037e-06, | |
| "loss": 0.0571, | |
| "step": 7000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7482, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.328918701667516e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |