| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9996362313568571, |
| "eval_steps": 500, |
| "global_step": 1374, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007275372862859222, |
| "grad_norm": 1.5288920783603972, |
| "learning_rate": 0.0, |
| "loss": 2.2393, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0014550745725718443, |
| "grad_norm": 1.8644806477200417, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 2.2191, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0021826118588577663, |
| "grad_norm": 1.7784703619023348, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 2.2159, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0029101491451436886, |
| "grad_norm": 1.5806839907563581, |
| "learning_rate": 7.5e-07, |
| "loss": 2.184, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0036376864314296106, |
| "grad_norm": 1.8190865637907, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.212, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0043652237177155325, |
| "grad_norm": 1.6181433102216847, |
| "learning_rate": 1.25e-06, |
| "loss": 2.211, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.005092761004001455, |
| "grad_norm": 1.7927607230934282, |
| "learning_rate": 1.5e-06, |
| "loss": 2.1433, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.005820298290287377, |
| "grad_norm": 1.8873680830855888, |
| "learning_rate": 1.75e-06, |
| "loss": 2.206, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0065478355765733, |
| "grad_norm": 1.482732563101495, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.2254, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.007275372862859221, |
| "grad_norm": 1.4889646677667492, |
| "learning_rate": 2.25e-06, |
| "loss": 2.1792, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.008002910149145144, |
| "grad_norm": 1.4985805484052215, |
| "learning_rate": 2.5e-06, |
| "loss": 2.2684, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.008730447435431065, |
| "grad_norm": 1.5013684044916331, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 2.1795, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.009457984721716987, |
| "grad_norm": 1.7555667532063568, |
| "learning_rate": 3e-06, |
| "loss": 2.2519, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01018552200800291, |
| "grad_norm": 2.7293866713537134, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 2.2681, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.010913059294288832, |
| "grad_norm": 2.200225872763007, |
| "learning_rate": 3.5e-06, |
| "loss": 2.1938, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.011640596580574755, |
| "grad_norm": 1.8348578578231278, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 2.2048, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.012368133866860677, |
| "grad_norm": 1.8743984231151645, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.1631, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0130956711531466, |
| "grad_norm": 1.8414431627872851, |
| "learning_rate": 4.25e-06, |
| "loss": 2.1406, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.013823208439432522, |
| "grad_norm": 1.4183185947241677, |
| "learning_rate": 4.5e-06, |
| "loss": 2.2254, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.014550745725718442, |
| "grad_norm": 1.9481451459518573, |
| "learning_rate": 4.75e-06, |
| "loss": 2.1679, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.015278283012004365, |
| "grad_norm": 1.5602129290453182, |
| "learning_rate": 5e-06, |
| "loss": 2.2265, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01600582029829029, |
| "grad_norm": 1.3810432619588962, |
| "learning_rate": 5.2500000000000006e-06, |
| "loss": 2.1534, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01673335758457621, |
| "grad_norm": 1.5968789996194426, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 2.1732, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01746089487086213, |
| "grad_norm": 1.816533437678404, |
| "learning_rate": 5.75e-06, |
| "loss": 2.2226, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.018188432157148052, |
| "grad_norm": 1.5418652750157003, |
| "learning_rate": 6e-06, |
| "loss": 2.2728, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.018915969443433975, |
| "grad_norm": 1.4735527362492546, |
| "learning_rate": 6.25e-06, |
| "loss": 2.2163, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.019643506729719897, |
| "grad_norm": 1.4845561943479575, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 2.1918, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.02037104401600582, |
| "grad_norm": 1.7412028945444677, |
| "learning_rate": 6.750000000000001e-06, |
| "loss": 2.2396, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.021098581302291742, |
| "grad_norm": 1.6561669558897179, |
| "learning_rate": 7e-06, |
| "loss": 2.2286, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.021826118588577664, |
| "grad_norm": 1.7109055266319328, |
| "learning_rate": 7.25e-06, |
| "loss": 2.1781, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.022553655874863587, |
| "grad_norm": 1.5704601583829316, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 2.2403, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02328119316114951, |
| "grad_norm": 1.508006475744022, |
| "learning_rate": 7.75e-06, |
| "loss": 2.2205, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02400873044743543, |
| "grad_norm": 1.803590041071739, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.2029, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.024736267733721354, |
| "grad_norm": 1.4516903950304747, |
| "learning_rate": 8.25e-06, |
| "loss": 2.2139, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.025463805020007276, |
| "grad_norm": 3.116636483017336, |
| "learning_rate": 8.5e-06, |
| "loss": 2.2323, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0261913423062932, |
| "grad_norm": 1.623617395325745, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 2.1991, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02691887959257912, |
| "grad_norm": 2.0677382830922686, |
| "learning_rate": 9e-06, |
| "loss": 2.2652, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.027646416878865043, |
| "grad_norm": 3.034199810473946, |
| "learning_rate": 9.250000000000001e-06, |
| "loss": 2.1865, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.028373954165150966, |
| "grad_norm": 1.7919909350659693, |
| "learning_rate": 9.5e-06, |
| "loss": 2.1912, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.029101491451436885, |
| "grad_norm": 3.239893614116414, |
| "learning_rate": 9.75e-06, |
| "loss": 2.2285, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.029829028737722807, |
| "grad_norm": 1.6482411098991196, |
| "learning_rate": 1e-05, |
| "loss": 2.1061, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03055656602400873, |
| "grad_norm": 4.393883055041743, |
| "learning_rate": 9.999986134743949e-06, |
| "loss": 2.1837, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03128410331029465, |
| "grad_norm": 3.640280872327228, |
| "learning_rate": 9.99994453905269e-06, |
| "loss": 2.1949, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03201164059658058, |
| "grad_norm": 2.984560444439296, |
| "learning_rate": 9.999875213156919e-06, |
| "loss": 2.1897, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.032739177882866496, |
| "grad_norm": 3.643349152064925, |
| "learning_rate": 9.999778157441126e-06, |
| "loss": 2.2719, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03346671516915242, |
| "grad_norm": 1.679399697575627, |
| "learning_rate": 9.99965337244359e-06, |
| "loss": 2.1504, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03419425245543834, |
| "grad_norm": 2.9412530670758796, |
| "learning_rate": 9.999500858856382e-06, |
| "loss": 2.1945, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.03492178974172426, |
| "grad_norm": 1.7766551035232812, |
| "learning_rate": 9.999320617525356e-06, |
| "loss": 2.1419, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.035649327028010186, |
| "grad_norm": 2.7089668240918066, |
| "learning_rate": 9.999112649450154e-06, |
| "loss": 2.1765, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.036376864314296105, |
| "grad_norm": 1.4848133526126528, |
| "learning_rate": 9.998876955784183e-06, |
| "loss": 2.1953, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03710440160058203, |
| "grad_norm": 2.799597350559271, |
| "learning_rate": 9.998613537834625e-06, |
| "loss": 2.2151, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03783193888686795, |
| "grad_norm": 2.0026253192452117, |
| "learning_rate": 9.998322397062426e-06, |
| "loss": 2.202, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.038559476173153875, |
| "grad_norm": 2.779566768037998, |
| "learning_rate": 9.99800353508228e-06, |
| "loss": 2.2047, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.039287013459439794, |
| "grad_norm": 1.6842181907886473, |
| "learning_rate": 9.997656953662627e-06, |
| "loss": 2.1725, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.04001455074572572, |
| "grad_norm": 2.9470995907944304, |
| "learning_rate": 9.997282654725645e-06, |
| "loss": 2.22, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.04074208803201164, |
| "grad_norm": 2.209113890782605, |
| "learning_rate": 9.996880640347234e-06, |
| "loss": 2.1626, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.041469625318297565, |
| "grad_norm": 3.132001624857469, |
| "learning_rate": 9.99645091275701e-06, |
| "loss": 2.2366, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.042197162604583484, |
| "grad_norm": 2.75629026942325, |
| "learning_rate": 9.99599347433828e-06, |
| "loss": 2.2009, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04292469989086941, |
| "grad_norm": 2.6139307213283516, |
| "learning_rate": 9.99550832762805e-06, |
| "loss": 2.249, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.04365223717715533, |
| "grad_norm": 2.7751242920969674, |
| "learning_rate": 9.99499547531699e-06, |
| "loss": 2.2184, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.044379774463441254, |
| "grad_norm": 1.871420864926908, |
| "learning_rate": 9.994454920249433e-06, |
| "loss": 2.1852, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.04510731174972717, |
| "grad_norm": 2.382349880929508, |
| "learning_rate": 9.993886665423348e-06, |
| "loss": 2.1422, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04583484903601309, |
| "grad_norm": 2.3993230486198263, |
| "learning_rate": 9.993290713990343e-06, |
| "loss": 2.1824, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.04656238632229902, |
| "grad_norm": 2.360857901310075, |
| "learning_rate": 9.99266706925562e-06, |
| "loss": 2.2415, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04728992360858494, |
| "grad_norm": 1.650711565941275, |
| "learning_rate": 9.992015734677979e-06, |
| "loss": 2.1521, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04801746089487086, |
| "grad_norm": 3.3076174611401994, |
| "learning_rate": 9.991336713869785e-06, |
| "loss": 2.181, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04874499818115678, |
| "grad_norm": 1.6769492616237789, |
| "learning_rate": 9.99063001059696e-06, |
| "loss": 2.1962, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04947253546744271, |
| "grad_norm": 1.7599941311889593, |
| "learning_rate": 9.989895628778952e-06, |
| "loss": 2.1816, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.050200072753728626, |
| "grad_norm": 1.4988700213088457, |
| "learning_rate": 9.989133572488716e-06, |
| "loss": 2.2403, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.05092761004001455, |
| "grad_norm": 1.6353905077011426, |
| "learning_rate": 9.988343845952697e-06, |
| "loss": 2.2182, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05165514732630047, |
| "grad_norm": 1.7111490961536142, |
| "learning_rate": 9.987526453550798e-06, |
| "loss": 2.2347, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0523826846125864, |
| "grad_norm": 1.3600773855476789, |
| "learning_rate": 9.98668139981636e-06, |
| "loss": 2.1614, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.053110221898872316, |
| "grad_norm": 1.6698126117343595, |
| "learning_rate": 9.98580868943614e-06, |
| "loss": 2.2139, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.05383775918515824, |
| "grad_norm": 1.6245254791288735, |
| "learning_rate": 9.984908327250278e-06, |
| "loss": 2.1661, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.05456529647144416, |
| "grad_norm": 1.675160918900565, |
| "learning_rate": 9.983980318252274e-06, |
| "loss": 2.2181, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05529283375773009, |
| "grad_norm": 1.8972214787238628, |
| "learning_rate": 9.983024667588961e-06, |
| "loss": 2.1788, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.056020371044016005, |
| "grad_norm": 1.5577986727860746, |
| "learning_rate": 9.982041380560476e-06, |
| "loss": 2.2095, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05674790833030193, |
| "grad_norm": 1.7480153079321283, |
| "learning_rate": 9.98103046262023e-06, |
| "loss": 2.1671, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05747544561658785, |
| "grad_norm": 1.8001870581973476, |
| "learning_rate": 9.979991919374877e-06, |
| "loss": 2.2235, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05820298290287377, |
| "grad_norm": 1.485718877859379, |
| "learning_rate": 9.978925756584284e-06, |
| "loss": 2.1956, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.058930520189159695, |
| "grad_norm": 1.6300668294264538, |
| "learning_rate": 9.9778319801615e-06, |
| "loss": 2.223, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.059658057475445614, |
| "grad_norm": 1.359440176512661, |
| "learning_rate": 9.976710596172721e-06, |
| "loss": 2.2371, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.06038559476173154, |
| "grad_norm": 1.6330927723700759, |
| "learning_rate": 9.975561610837254e-06, |
| "loss": 2.1684, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.06111313204801746, |
| "grad_norm": 1.7561578829689188, |
| "learning_rate": 9.974385030527496e-06, |
| "loss": 2.1862, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.061840669334303384, |
| "grad_norm": 1.568293680618204, |
| "learning_rate": 9.973180861768874e-06, |
| "loss": 2.1988, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0625682066205893, |
| "grad_norm": 1.744615695345401, |
| "learning_rate": 9.971949111239838e-06, |
| "loss": 2.1834, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.06329574390687523, |
| "grad_norm": 1.620398494899476, |
| "learning_rate": 9.970689785771798e-06, |
| "loss": 2.1635, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.06402328119316116, |
| "grad_norm": 1.9224626119840165, |
| "learning_rate": 9.969402892349105e-06, |
| "loss": 2.1485, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.06475081847944707, |
| "grad_norm": 1.7081277069472598, |
| "learning_rate": 9.968088438109002e-06, |
| "loss": 2.2314, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.06547835576573299, |
| "grad_norm": 1.409570479234312, |
| "learning_rate": 9.966746430341584e-06, |
| "loss": 2.1926, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06620589305201892, |
| "grad_norm": 1.5601946347425566, |
| "learning_rate": 9.965376876489765e-06, |
| "loss": 2.2187, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.06693343033830484, |
| "grad_norm": 1.7961828728056013, |
| "learning_rate": 9.963979784149232e-06, |
| "loss": 2.1705, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.06766096762459076, |
| "grad_norm": 1.466420625246192, |
| "learning_rate": 9.962555161068401e-06, |
| "loss": 2.2048, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06838850491087668, |
| "grad_norm": 1.5126227471471592, |
| "learning_rate": 9.961103015148376e-06, |
| "loss": 2.1821, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06911604219716261, |
| "grad_norm": 1.66909649824643, |
| "learning_rate": 9.95962335444291e-06, |
| "loss": 2.1929, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.06984357948344852, |
| "grad_norm": 1.5144983190620087, |
| "learning_rate": 9.958116187158351e-06, |
| "loss": 2.1943, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.07057111676973445, |
| "grad_norm": 1.4373661097027508, |
| "learning_rate": 9.956581521653604e-06, |
| "loss": 2.2261, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.07129865405602037, |
| "grad_norm": 1.4934873826145776, |
| "learning_rate": 9.955019366440082e-06, |
| "loss": 2.1768, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0720261913423063, |
| "grad_norm": 1.3692598853625408, |
| "learning_rate": 9.953429730181653e-06, |
| "loss": 2.2163, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.07275372862859221, |
| "grad_norm": 1.4437046319559694, |
| "learning_rate": 9.95181262169461e-06, |
| "loss": 2.1791, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07348126591487814, |
| "grad_norm": 1.5090120476700792, |
| "learning_rate": 9.950168049947597e-06, |
| "loss": 2.2112, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.07420880320116406, |
| "grad_norm": 1.4811337559061613, |
| "learning_rate": 9.948496024061577e-06, |
| "loss": 2.1407, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.07493634048744999, |
| "grad_norm": 1.397294300464599, |
| "learning_rate": 9.94679655330978e-06, |
| "loss": 2.2462, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0756638777737359, |
| "grad_norm": 1.665042097857969, |
| "learning_rate": 9.945069647117645e-06, |
| "loss": 2.2017, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.07639141506002183, |
| "grad_norm": 1.6880656948565003, |
| "learning_rate": 9.943315315062766e-06, |
| "loss": 2.1964, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.07711895234630775, |
| "grad_norm": 1.6904248420523655, |
| "learning_rate": 9.941533566874852e-06, |
| "loss": 2.2072, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.07784648963259368, |
| "grad_norm": 1.4904625247428034, |
| "learning_rate": 9.939724412435661e-06, |
| "loss": 2.2022, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07857402691887959, |
| "grad_norm": 1.9029842633160232, |
| "learning_rate": 9.937887861778947e-06, |
| "loss": 2.1946, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07930156420516551, |
| "grad_norm": 1.6199815657345118, |
| "learning_rate": 9.93602392509041e-06, |
| "loss": 2.2013, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.08002910149145144, |
| "grad_norm": 1.9241181799327212, |
| "learning_rate": 9.934132612707631e-06, |
| "loss": 2.1813, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08075663877773735, |
| "grad_norm": 2.286463886994313, |
| "learning_rate": 9.932213935120025e-06, |
| "loss": 2.2181, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.08148417606402328, |
| "grad_norm": 1.5854169026098552, |
| "learning_rate": 9.930267902968774e-06, |
| "loss": 2.2136, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0822117133503092, |
| "grad_norm": 1.9337265535465549, |
| "learning_rate": 9.928294527046771e-06, |
| "loss": 2.1435, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.08293925063659513, |
| "grad_norm": 1.4724469337009167, |
| "learning_rate": 9.92629381829856e-06, |
| "loss": 2.2008, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.08366678792288104, |
| "grad_norm": 1.716990512009962, |
| "learning_rate": 9.924265787820279e-06, |
| "loss": 2.2678, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.08439432520916697, |
| "grad_norm": 1.9728635256737985, |
| "learning_rate": 9.92221044685959e-06, |
| "loss": 2.2334, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0851218624954529, |
| "grad_norm": 1.3829353726842706, |
| "learning_rate": 9.920127806815627e-06, |
| "loss": 2.2435, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.08584939978173882, |
| "grad_norm": 1.860749893286872, |
| "learning_rate": 9.918017879238922e-06, |
| "loss": 2.2438, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.08657693706802473, |
| "grad_norm": 1.5711245049229094, |
| "learning_rate": 9.915880675831352e-06, |
| "loss": 2.2131, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.08730447435431066, |
| "grad_norm": 1.7631319009945081, |
| "learning_rate": 9.913716208446067e-06, |
| "loss": 2.2532, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08803201164059658, |
| "grad_norm": 1.7971608421907779, |
| "learning_rate": 9.91152448908742e-06, |
| "loss": 2.1021, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.08875954892688251, |
| "grad_norm": 1.6070833156452324, |
| "learning_rate": 9.909305529910917e-06, |
| "loss": 2.2237, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.08948708621316842, |
| "grad_norm": 1.5982292957619832, |
| "learning_rate": 9.907059343223129e-06, |
| "loss": 2.1703, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.09021462349945435, |
| "grad_norm": 2.318593516011183, |
| "learning_rate": 9.904785941481638e-06, |
| "loss": 2.1867, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.09094216078574027, |
| "grad_norm": 1.7613403582041198, |
| "learning_rate": 9.902485337294965e-06, |
| "loss": 2.1933, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.09166969807202618, |
| "grad_norm": 2.3386117708504752, |
| "learning_rate": 9.900157543422493e-06, |
| "loss": 2.1844, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.09239723535831211, |
| "grad_norm": 1.9798703362538914, |
| "learning_rate": 9.897802572774407e-06, |
| "loss": 2.2382, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.09312477264459804, |
| "grad_norm": 2.158022738478921, |
| "learning_rate": 9.895420438411616e-06, |
| "loss": 2.2307, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.09385230993088396, |
| "grad_norm": 1.6660240142136167, |
| "learning_rate": 9.893011153545679e-06, |
| "loss": 2.2348, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.09457984721716987, |
| "grad_norm": 1.9632305154461456, |
| "learning_rate": 9.89057473153874e-06, |
| "loss": 2.2141, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0953073845034558, |
| "grad_norm": 1.672261371877987, |
| "learning_rate": 9.888111185903442e-06, |
| "loss": 2.1641, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.09603492178974173, |
| "grad_norm": 1.6661927373979792, |
| "learning_rate": 9.885620530302865e-06, |
| "loss": 2.1819, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.09676245907602765, |
| "grad_norm": 2.2520607322037947, |
| "learning_rate": 9.883102778550434e-06, |
| "loss": 2.1935, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.09748999636231356, |
| "grad_norm": 1.4347704266644696, |
| "learning_rate": 9.880557944609863e-06, |
| "loss": 2.2312, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.09821753364859949, |
| "grad_norm": 2.5969551378868685, |
| "learning_rate": 9.877986042595062e-06, |
| "loss": 2.2294, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.09894507093488542, |
| "grad_norm": 1.5897358074382941, |
| "learning_rate": 9.87538708677006e-06, |
| "loss": 2.2595, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.09967260822117134, |
| "grad_norm": 3.023355656547637, |
| "learning_rate": 9.872761091548933e-06, |
| "loss": 2.2195, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.10040014550745725, |
| "grad_norm": 2.5009819811646414, |
| "learning_rate": 9.870108071495721e-06, |
| "loss": 2.2027, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.10112768279374318, |
| "grad_norm": 2.649004490599995, |
| "learning_rate": 9.867428041324345e-06, |
| "loss": 2.2478, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.1018552200800291, |
| "grad_norm": 2.0035185647332976, |
| "learning_rate": 9.864721015898524e-06, |
| "loss": 2.2083, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10258275736631503, |
| "grad_norm": 2.6207182124681823, |
| "learning_rate": 9.861987010231701e-06, |
| "loss": 2.1863, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.10331029465260094, |
| "grad_norm": 2.262610029951531, |
| "learning_rate": 9.85922603948695e-06, |
| "loss": 2.24, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.10403783193888687, |
| "grad_norm": 2.6477691672398174, |
| "learning_rate": 9.856438118976899e-06, |
| "loss": 2.2788, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1047653692251728, |
| "grad_norm": 2.582318665211976, |
| "learning_rate": 9.853623264163638e-06, |
| "loss": 2.2197, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1054929065114587, |
| "grad_norm": 2.3195328375845814, |
| "learning_rate": 9.850781490658643e-06, |
| "loss": 2.2181, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.10622044379774463, |
| "grad_norm": 2.370457788315632, |
| "learning_rate": 9.84791281422268e-06, |
| "loss": 2.1894, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.10694798108403056, |
| "grad_norm": 2.072643637054436, |
| "learning_rate": 9.845017250765721e-06, |
| "loss": 2.2038, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.10767551837031648, |
| "grad_norm": 1.774580372637068, |
| "learning_rate": 9.84209481634686e-06, |
| "loss": 2.1874, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1084030556566024, |
| "grad_norm": 2.6456479441489185, |
| "learning_rate": 9.839145527174216e-06, |
| "loss": 2.2713, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.10913059294288832, |
| "grad_norm": 2.4439242680492193, |
| "learning_rate": 9.836169399604846e-06, |
| "loss": 2.2124, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10985813022917425, |
| "grad_norm": 2.180030185422303, |
| "learning_rate": 9.833166450144665e-06, |
| "loss": 2.1744, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.11058566751546017, |
| "grad_norm": 2.3348647373216513, |
| "learning_rate": 9.830136695448334e-06, |
| "loss": 2.19, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.11131320480174609, |
| "grad_norm": 1.9656368778684938, |
| "learning_rate": 9.827080152319182e-06, |
| "loss": 2.2332, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.11204074208803201, |
| "grad_norm": 1.8480175602182294, |
| "learning_rate": 9.823996837709114e-06, |
| "loss": 2.2119, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.11276827937431794, |
| "grad_norm": 2.281426562659181, |
| "learning_rate": 9.820886768718503e-06, |
| "loss": 2.2268, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.11349581666060386, |
| "grad_norm": 1.7489041448103229, |
| "learning_rate": 9.817749962596115e-06, |
| "loss": 2.2378, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.11422335394688977, |
| "grad_norm": 2.782758570162382, |
| "learning_rate": 9.814586436738998e-06, |
| "loss": 2.2052, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1149508912331757, |
| "grad_norm": 2.2126011643137358, |
| "learning_rate": 9.811396208692387e-06, |
| "loss": 2.1733, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.11567842851946163, |
| "grad_norm": 2.1992962836193857, |
| "learning_rate": 9.808179296149616e-06, |
| "loss": 2.1681, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.11640596580574754, |
| "grad_norm": 2.492426612704936, |
| "learning_rate": 9.804935716952011e-06, |
| "loss": 2.0941, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11713350309203346, |
| "grad_norm": 1.8537517699892123, |
| "learning_rate": 9.801665489088795e-06, |
| "loss": 2.2002, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.11786104037831939, |
| "grad_norm": 1.7969112288772866, |
| "learning_rate": 9.798368630696984e-06, |
| "loss": 2.1987, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.11858857766460532, |
| "grad_norm": 2.5513186249381343, |
| "learning_rate": 9.795045160061295e-06, |
| "loss": 2.2223, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.11931611495089123, |
| "grad_norm": 1.9389767804192248, |
| "learning_rate": 9.791695095614036e-06, |
| "loss": 2.2047, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.12004365223717715, |
| "grad_norm": 2.0700871590965413, |
| "learning_rate": 9.788318455935008e-06, |
| "loss": 2.2499, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.12077118952346308, |
| "grad_norm": 1.6399304993048427, |
| "learning_rate": 9.7849152597514e-06, |
| "loss": 2.1931, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.121498726809749, |
| "grad_norm": 2.736143568317565, |
| "learning_rate": 9.781485525937683e-06, |
| "loss": 2.2173, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.12222626409603492, |
| "grad_norm": 2.3472764088553877, |
| "learning_rate": 9.778029273515519e-06, |
| "loss": 2.1949, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.12295380138232084, |
| "grad_norm": 2.144561428349233, |
| "learning_rate": 9.774546521653633e-06, |
| "loss": 2.2077, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.12368133866860677, |
| "grad_norm": 2.0898248402072728, |
| "learning_rate": 9.771037289667726e-06, |
| "loss": 2.218, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1244088759548927, |
| "grad_norm": 2.303654173365009, |
| "learning_rate": 9.767501597020357e-06, |
| "loss": 2.1504, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1251364132411786, |
| "grad_norm": 1.9710676759990309, |
| "learning_rate": 9.76393946332084e-06, |
| "loss": 2.2076, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.12586395052746452, |
| "grad_norm": 2.0524996430866356, |
| "learning_rate": 9.760350908325131e-06, |
| "loss": 2.1814, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.12659148781375046, |
| "grad_norm": 1.980263193179824, |
| "learning_rate": 9.756735951935725e-06, |
| "loss": 2.2261, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.12731902510003637, |
| "grad_norm": 2.3228604960740595, |
| "learning_rate": 9.753094614201542e-06, |
| "loss": 2.2165, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.1280465623863223, |
| "grad_norm": 2.244176450327055, |
| "learning_rate": 9.749426915317812e-06, |
| "loss": 2.1894, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.12877409967260822, |
| "grad_norm": 1.9074936064390875, |
| "learning_rate": 9.74573287562597e-06, |
| "loss": 2.231, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.12950163695889413, |
| "grad_norm": 2.1129031026943457, |
| "learning_rate": 9.742012515613536e-06, |
| "loss": 2.2113, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.13022917424518007, |
| "grad_norm": 2.1556088711700983, |
| "learning_rate": 9.738265855914014e-06, |
| "loss": 2.2041, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.13095671153146599, |
| "grad_norm": 1.8507560510757504, |
| "learning_rate": 9.734492917306754e-06, |
| "loss": 2.2195, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1316842488177519, |
| "grad_norm": 1.8920978000890483, |
| "learning_rate": 9.730693720716866e-06, |
| "loss": 2.2247, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.13241178610403784, |
| "grad_norm": 1.825970624258127, |
| "learning_rate": 9.72686828721508e-06, |
| "loss": 2.1789, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.13313932339032375, |
| "grad_norm": 2.0662381173028095, |
| "learning_rate": 9.723016638017644e-06, |
| "loss": 2.1699, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.1338668606766097, |
| "grad_norm": 1.5517316847482627, |
| "learning_rate": 9.719138794486198e-06, |
| "loss": 2.2443, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1345943979628956, |
| "grad_norm": 2.1385006346730355, |
| "learning_rate": 9.715234778127658e-06, |
| "loss": 2.1759, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1353219352491815, |
| "grad_norm": 1.5551074423625981, |
| "learning_rate": 9.711304610594104e-06, |
| "loss": 2.1856, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.13604947253546745, |
| "grad_norm": 2.4865192981684436, |
| "learning_rate": 9.70734831368264e-06, |
| "loss": 2.1771, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.13677700982175336, |
| "grad_norm": 2.074018167734129, |
| "learning_rate": 9.7033659093353e-06, |
| "loss": 2.2207, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.13750454710803928, |
| "grad_norm": 2.3773220250146934, |
| "learning_rate": 9.699357419638904e-06, |
| "loss": 2.161, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.13823208439432522, |
| "grad_norm": 2.325983230196079, |
| "learning_rate": 9.695322866824948e-06, |
| "loss": 2.1559, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.13895962168061113, |
| "grad_norm": 2.1999495816652206, |
| "learning_rate": 9.691262273269472e-06, |
| "loss": 2.2153, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.13968715896689704, |
| "grad_norm": 2.013387564938728, |
| "learning_rate": 9.687175661492944e-06, |
| "loss": 2.1737, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.14041469625318298, |
| "grad_norm": 2.216898719559254, |
| "learning_rate": 9.683063054160136e-06, |
| "loss": 2.1715, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.1411422335394689, |
| "grad_norm": 1.7993920418582208, |
| "learning_rate": 9.678924474079986e-06, |
| "loss": 2.1593, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.14186977082575483, |
| "grad_norm": 2.144459012819178, |
| "learning_rate": 9.67475994420548e-06, |
| "loss": 2.1626, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.14259730811204074, |
| "grad_norm": 2.0013966050180674, |
| "learning_rate": 9.670569487633534e-06, |
| "loss": 2.2098, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.14332484539832666, |
| "grad_norm": 1.8221881690950872, |
| "learning_rate": 9.666353127604845e-06, |
| "loss": 2.1566, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.1440523826846126, |
| "grad_norm": 1.8758800889845013, |
| "learning_rate": 9.66211088750378e-06, |
| "loss": 2.1654, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1447799199708985, |
| "grad_norm": 1.8952323085883283, |
| "learning_rate": 9.657842790858235e-06, |
| "loss": 2.2293, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.14550745725718442, |
| "grad_norm": 1.527686543636565, |
| "learning_rate": 9.65354886133951e-06, |
| "loss": 2.2333, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14623499454347036, |
| "grad_norm": 2.097008587133337, |
| "learning_rate": 9.64922912276218e-06, |
| "loss": 2.2221, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.14696253182975627, |
| "grad_norm": 1.7030718610790336, |
| "learning_rate": 9.644883599083959e-06, |
| "loss": 2.2129, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1476900691160422, |
| "grad_norm": 2.368365632717726, |
| "learning_rate": 9.640512314405563e-06, |
| "loss": 2.234, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.14841760640232812, |
| "grad_norm": 1.880961780226356, |
| "learning_rate": 9.636115292970587e-06, |
| "loss": 2.1554, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.14914514368861403, |
| "grad_norm": 2.3389376664065287, |
| "learning_rate": 9.63169255916536e-06, |
| "loss": 2.245, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.14987268097489997, |
| "grad_norm": 1.9804703905200902, |
| "learning_rate": 9.627244137518821e-06, |
| "loss": 2.2199, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1506002182611859, |
| "grad_norm": 2.1750768527829174, |
| "learning_rate": 9.622770052702366e-06, |
| "loss": 2.1969, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1513277555474718, |
| "grad_norm": 2.0242161987667577, |
| "learning_rate": 9.618270329529734e-06, |
| "loss": 2.196, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.15205529283375774, |
| "grad_norm": 2.2298649889244593, |
| "learning_rate": 9.613744992956844e-06, |
| "loss": 2.1373, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.15278283012004365, |
| "grad_norm": 2.3350565636212828, |
| "learning_rate": 9.609194068081682e-06, |
| "loss": 2.1951, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15351036740632956, |
| "grad_norm": 1.557747949310923, |
| "learning_rate": 9.60461758014414e-06, |
| "loss": 2.139, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.1542379046926155, |
| "grad_norm": 1.7716141898834719, |
| "learning_rate": 9.60001555452589e-06, |
| "loss": 2.1823, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1549654419789014, |
| "grad_norm": 1.462177931771256, |
| "learning_rate": 9.595388016750236e-06, |
| "loss": 2.2043, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.15569297926518735, |
| "grad_norm": 1.4834208523290262, |
| "learning_rate": 9.590734992481978e-06, |
| "loss": 2.2348, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.15642051655147327, |
| "grad_norm": 1.6233545338448112, |
| "learning_rate": 9.586056507527266e-06, |
| "loss": 2.1397, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.15714805383775918, |
| "grad_norm": 1.8871156366419548, |
| "learning_rate": 9.581352587833455e-06, |
| "loss": 2.2117, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.15787559112404512, |
| "grad_norm": 1.5564465487258499, |
| "learning_rate": 9.576623259488966e-06, |
| "loss": 2.2173, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.15860312841033103, |
| "grad_norm": 1.3672789573676138, |
| "learning_rate": 9.571868548723137e-06, |
| "loss": 2.1513, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.15933066569661694, |
| "grad_norm": 2.41665190233232, |
| "learning_rate": 9.567088481906084e-06, |
| "loss": 2.1893, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.16005820298290288, |
| "grad_norm": 1.5217704051852425, |
| "learning_rate": 9.562283085548546e-06, |
| "loss": 2.2071, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1607857402691888, |
| "grad_norm": 1.7545928854710164, |
| "learning_rate": 9.55745238630174e-06, |
| "loss": 2.1944, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1615132775554747, |
| "grad_norm": 1.341908696911756, |
| "learning_rate": 9.552596410957224e-06, |
| "loss": 2.2336, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.16224081484176064, |
| "grad_norm": 1.6189063753553028, |
| "learning_rate": 9.547715186446732e-06, |
| "loss": 2.1882, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.16296835212804656, |
| "grad_norm": 1.6872635637497055, |
| "learning_rate": 9.542808739842034e-06, |
| "loss": 2.2141, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1636958894143325, |
| "grad_norm": 1.6381480046906143, |
| "learning_rate": 9.537877098354787e-06, |
| "loss": 2.2367, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.1644234267006184, |
| "grad_norm": 1.6166356354012765, |
| "learning_rate": 9.532920289336378e-06, |
| "loss": 2.1759, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.16515096398690432, |
| "grad_norm": 2.561546132872073, |
| "learning_rate": 9.52793834027778e-06, |
| "loss": 2.2114, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.16587850127319026, |
| "grad_norm": 1.4895519618659814, |
| "learning_rate": 9.522931278809393e-06, |
| "loss": 2.2076, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.16660603855947617, |
| "grad_norm": 1.4028314670269106, |
| "learning_rate": 9.517899132700889e-06, |
| "loss": 2.1887, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.16733357584576208, |
| "grad_norm": 1.7663091760320573, |
| "learning_rate": 9.512841929861069e-06, |
| "loss": 2.2005, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16806111313204802, |
| "grad_norm": 1.6922932498061918, |
| "learning_rate": 9.507759698337698e-06, |
| "loss": 2.1554, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.16878865041833394, |
| "grad_norm": 1.6345258058405243, |
| "learning_rate": 9.50265246631735e-06, |
| "loss": 2.1903, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.16951618770461988, |
| "grad_norm": 1.515813866060611, |
| "learning_rate": 9.49752026212526e-06, |
| "loss": 2.2268, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.1702437249909058, |
| "grad_norm": 1.8331281614451886, |
| "learning_rate": 9.492363114225156e-06, |
| "loss": 2.2079, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1709712622771917, |
| "grad_norm": 1.4423645928995934, |
| "learning_rate": 9.487181051219107e-06, |
| "loss": 2.1809, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.17169879956347764, |
| "grad_norm": 1.7178706823962022, |
| "learning_rate": 9.481974101847371e-06, |
| "loss": 2.1769, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.17242633684976355, |
| "grad_norm": 1.888610932855882, |
| "learning_rate": 9.476742294988214e-06, |
| "loss": 2.1868, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.17315387413604946, |
| "grad_norm": 1.452915043299178, |
| "learning_rate": 9.471485659657782e-06, |
| "loss": 2.2193, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1738814114223354, |
| "grad_norm": 1.5293848443309745, |
| "learning_rate": 9.466204225009905e-06, |
| "loss": 2.1811, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.17460894870862131, |
| "grad_norm": 1.5433952042546901, |
| "learning_rate": 9.460898020335964e-06, |
| "loss": 2.2406, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17533648599490723, |
| "grad_norm": 1.4510435811175628, |
| "learning_rate": 9.455567075064715e-06, |
| "loss": 2.2222, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.17606402328119317, |
| "grad_norm": 1.5139877812880527, |
| "learning_rate": 9.450211418762123e-06, |
| "loss": 2.195, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.17679156056747908, |
| "grad_norm": 1.376158748717973, |
| "learning_rate": 9.444831081131209e-06, |
| "loss": 2.2291, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.17751909785376502, |
| "grad_norm": 1.6584629058122018, |
| "learning_rate": 9.439426092011877e-06, |
| "loss": 2.1576, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.17824663514005093, |
| "grad_norm": 1.794121606128276, |
| "learning_rate": 9.433996481380747e-06, |
| "loss": 2.2483, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.17897417242633684, |
| "grad_norm": 1.3376769119817673, |
| "learning_rate": 9.428542279351e-06, |
| "loss": 2.1706, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.17970170971262278, |
| "grad_norm": 1.7822658414273862, |
| "learning_rate": 9.423063516172195e-06, |
| "loss": 2.2478, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.1804292469989087, |
| "grad_norm": 1.3332375488317492, |
| "learning_rate": 9.417560222230115e-06, |
| "loss": 2.1941, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1811567842851946, |
| "grad_norm": 1.8874563547747, |
| "learning_rate": 9.412032428046594e-06, |
| "loss": 2.2222, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.18188432157148055, |
| "grad_norm": 1.4438456592645297, |
| "learning_rate": 9.40648016427934e-06, |
| "loss": 2.1695, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18261185885776646, |
| "grad_norm": 1.53967957798226, |
| "learning_rate": 9.400903461721783e-06, |
| "loss": 2.1682, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.18333939614405237, |
| "grad_norm": 1.4287131829796078, |
| "learning_rate": 9.395302351302881e-06, |
| "loss": 2.213, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1840669334303383, |
| "grad_norm": 1.7632259286712169, |
| "learning_rate": 9.38967686408697e-06, |
| "loss": 2.1377, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.18479447071662422, |
| "grad_norm": 1.549901098906108, |
| "learning_rate": 9.384027031273575e-06, |
| "loss": 2.1412, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.18552200800291016, |
| "grad_norm": 1.3651034612955162, |
| "learning_rate": 9.37835288419725e-06, |
| "loss": 2.1985, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.18624954528919607, |
| "grad_norm": 1.5072473907035329, |
| "learning_rate": 9.372654454327394e-06, |
| "loss": 2.1843, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.18697708257548198, |
| "grad_norm": 1.7799488412583535, |
| "learning_rate": 9.366931773268083e-06, |
| "loss": 2.1649, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.18770461986176792, |
| "grad_norm": 1.4893473914063207, |
| "learning_rate": 9.361184872757894e-06, |
| "loss": 2.2202, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.18843215714805384, |
| "grad_norm": 1.5304343937588376, |
| "learning_rate": 9.355413784669722e-06, |
| "loss": 2.1655, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.18915969443433975, |
| "grad_norm": 1.3086425612708208, |
| "learning_rate": 9.349618541010616e-06, |
| "loss": 2.1999, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1898872317206257, |
| "grad_norm": 1.4216843105367578, |
| "learning_rate": 9.343799173921591e-06, |
| "loss": 2.1698, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1906147690069116, |
| "grad_norm": 1.3292751193395729, |
| "learning_rate": 9.337955715677452e-06, |
| "loss": 2.1372, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.19134230629319754, |
| "grad_norm": 1.496179279166623, |
| "learning_rate": 9.332088198686618e-06, |
| "loss": 2.1554, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.19206984357948345, |
| "grad_norm": 1.474351638026655, |
| "learning_rate": 9.326196655490935e-06, |
| "loss": 2.1968, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.19279738086576936, |
| "grad_norm": 1.6409542250950013, |
| "learning_rate": 9.32028111876551e-06, |
| "loss": 2.1943, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1935249181520553, |
| "grad_norm": 1.6888420163354685, |
| "learning_rate": 9.314341621318512e-06, |
| "loss": 2.2244, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.19425245543834121, |
| "grad_norm": 1.4494064790337373, |
| "learning_rate": 9.308378196091006e-06, |
| "loss": 2.2073, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.19497999272462713, |
| "grad_norm": 1.4538249282337905, |
| "learning_rate": 9.302390876156756e-06, |
| "loss": 2.2282, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.19570753001091307, |
| "grad_norm": 1.4843118954742103, |
| "learning_rate": 9.296379694722051e-06, |
| "loss": 2.1769, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.19643506729719898, |
| "grad_norm": 1.6020059875422126, |
| "learning_rate": 9.29034468512552e-06, |
| "loss": 2.181, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1971626045834849, |
| "grad_norm": 1.5715088934862171, |
| "learning_rate": 9.284285880837947e-06, |
| "loss": 2.2187, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.19789014186977083, |
| "grad_norm": 1.3806795102455487, |
| "learning_rate": 9.278203315462078e-06, |
| "loss": 2.1691, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.19861767915605674, |
| "grad_norm": 1.4921891885676057, |
| "learning_rate": 9.272097022732444e-06, |
| "loss": 2.1588, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.19934521644234268, |
| "grad_norm": 1.6009588514320718, |
| "learning_rate": 9.26596703651517e-06, |
| "loss": 2.1602, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.2000727537286286, |
| "grad_norm": 1.3637408490300853, |
| "learning_rate": 9.259813390807788e-06, |
| "loss": 2.1776, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2008002910149145, |
| "grad_norm": 1.666708428684375, |
| "learning_rate": 9.253636119739046e-06, |
| "loss": 2.1571, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.20152782830120045, |
| "grad_norm": 1.6890538727100897, |
| "learning_rate": 9.247435257568724e-06, |
| "loss": 2.1871, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.20225536558748636, |
| "grad_norm": 1.472406127157111, |
| "learning_rate": 9.241210838687438e-06, |
| "loss": 2.1659, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.20298290287377227, |
| "grad_norm": 1.99532715556326, |
| "learning_rate": 9.23496289761645e-06, |
| "loss": 2.2414, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.2037104401600582, |
| "grad_norm": 1.632025214846479, |
| "learning_rate": 9.228691469007487e-06, |
| "loss": 2.1852, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.20443797744634412, |
| "grad_norm": 1.9471642003418521, |
| "learning_rate": 9.222396587642528e-06, |
| "loss": 2.1996, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.20516551473263006, |
| "grad_norm": 1.6939593957799761, |
| "learning_rate": 9.216078288433632e-06, |
| "loss": 2.234, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.20589305201891597, |
| "grad_norm": 1.6546532687407742, |
| "learning_rate": 9.209736606422736e-06, |
| "loss": 2.2038, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.20662058930520188, |
| "grad_norm": 1.6921033877152574, |
| "learning_rate": 9.203371576781457e-06, |
| "loss": 2.1592, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.20734812659148782, |
| "grad_norm": 1.9393189311581847, |
| "learning_rate": 9.1969832348109e-06, |
| "loss": 2.1711, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.20807566387777374, |
| "grad_norm": 1.712493550054773, |
| "learning_rate": 9.190571615941462e-06, |
| "loss": 2.2575, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.20880320116405965, |
| "grad_norm": 1.3814311046647583, |
| "learning_rate": 9.18413675573264e-06, |
| "loss": 2.2285, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.2095307384503456, |
| "grad_norm": 1.898478684842769, |
| "learning_rate": 9.177678689872831e-06, |
| "loss": 2.1563, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.2102582757366315, |
| "grad_norm": 1.4946721876330928, |
| "learning_rate": 9.171197454179124e-06, |
| "loss": 2.2157, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.2109858130229174, |
| "grad_norm": 1.6336273362064742, |
| "learning_rate": 9.16469308459712e-06, |
| "loss": 2.2124, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21171335030920335, |
| "grad_norm": 1.4908709457226583, |
| "learning_rate": 9.158165617200717e-06, |
| "loss": 2.1911, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.21244088759548926, |
| "grad_norm": 1.3493887453363558, |
| "learning_rate": 9.151615088191918e-06, |
| "loss": 2.2009, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.2131684248817752, |
| "grad_norm": 1.502238823825621, |
| "learning_rate": 9.14504153390063e-06, |
| "loss": 2.1473, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.21389596216806112, |
| "grad_norm": 1.5170516607198907, |
| "learning_rate": 9.138444990784455e-06, |
| "loss": 2.2, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.21462349945434703, |
| "grad_norm": 1.3075648105696178, |
| "learning_rate": 9.131825495428496e-06, |
| "loss": 2.2107, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.21535103674063297, |
| "grad_norm": 1.5657553421243682, |
| "learning_rate": 9.125183084545158e-06, |
| "loss": 2.179, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.21607857402691888, |
| "grad_norm": 1.513376266046179, |
| "learning_rate": 9.118517794973925e-06, |
| "loss": 2.2405, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.2168061113132048, |
| "grad_norm": 1.7142209859560977, |
| "learning_rate": 9.111829663681182e-06, |
| "loss": 2.2293, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.21753364859949073, |
| "grad_norm": 1.4146055271200115, |
| "learning_rate": 9.105118727759984e-06, |
| "loss": 2.2409, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.21826118588577664, |
| "grad_norm": 1.3274975658992578, |
| "learning_rate": 9.098385024429875e-06, |
| "loss": 2.1765, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21898872317206255, |
| "grad_norm": 1.5492595246341485, |
| "learning_rate": 9.09162859103666e-06, |
| "loss": 2.223, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.2197162604583485, |
| "grad_norm": 1.4496566082639615, |
| "learning_rate": 9.08484946505221e-06, |
| "loss": 2.2214, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.2204437977446344, |
| "grad_norm": 1.4095738528344526, |
| "learning_rate": 9.078047684074254e-06, |
| "loss": 2.2012, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.22117133503092035, |
| "grad_norm": 1.6379763890733414, |
| "learning_rate": 9.071223285826166e-06, |
| "loss": 2.1833, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.22189887231720626, |
| "grad_norm": 1.500367513516407, |
| "learning_rate": 9.064376308156754e-06, |
| "loss": 2.1929, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.22262640960349217, |
| "grad_norm": 1.4210992503367728, |
| "learning_rate": 9.057506789040063e-06, |
| "loss": 2.189, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2233539468897781, |
| "grad_norm": 1.3727183620185701, |
| "learning_rate": 9.050614766575147e-06, |
| "loss": 2.183, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.22408148417606402, |
| "grad_norm": 1.2406796013559476, |
| "learning_rate": 9.043700278985867e-06, |
| "loss": 2.1905, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.22480902146234993, |
| "grad_norm": 1.597577469216686, |
| "learning_rate": 9.03676336462068e-06, |
| "loss": 2.2035, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.22553655874863587, |
| "grad_norm": 1.4225372502609417, |
| "learning_rate": 9.029804061952426e-06, |
| "loss": 2.1641, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.22626409603492179, |
| "grad_norm": 1.2629130268864135, |
| "learning_rate": 9.022822409578106e-06, |
| "loss": 2.1931, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.22699163332120773, |
| "grad_norm": 1.3676865135238048, |
| "learning_rate": 9.015818446218683e-06, |
| "loss": 2.1984, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.22771917060749364, |
| "grad_norm": 1.3928409928251604, |
| "learning_rate": 9.008792210718854e-06, |
| "loss": 2.1726, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.22844670789377955, |
| "grad_norm": 1.6216502611547985, |
| "learning_rate": 9.00174374204684e-06, |
| "loss": 2.1881, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.2291742451800655, |
| "grad_norm": 4.52807672247779, |
| "learning_rate": 8.994673079294171e-06, |
| "loss": 2.2517, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.2299017824663514, |
| "grad_norm": 1.8101091064623136, |
| "learning_rate": 8.987580261675466e-06, |
| "loss": 2.1813, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.2306293197526373, |
| "grad_norm": 1.5021589664831807, |
| "learning_rate": 8.98046532852822e-06, |
| "loss": 2.1709, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.23135685703892325, |
| "grad_norm": 1.2800291466011613, |
| "learning_rate": 8.973328319312577e-06, |
| "loss": 2.1939, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.23208439432520916, |
| "grad_norm": 1.7339241475054041, |
| "learning_rate": 8.966169273611125e-06, |
| "loss": 2.1608, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.23281193161149508, |
| "grad_norm": 1.4736261516509148, |
| "learning_rate": 8.958988231128665e-06, |
| "loss": 2.1797, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.23353946889778102, |
| "grad_norm": 1.4139860896226535, |
| "learning_rate": 8.95178523169199e-06, |
| "loss": 2.2057, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.23426700618406693, |
| "grad_norm": 1.6671639145660242, |
| "learning_rate": 8.944560315249676e-06, |
| "loss": 2.1635, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.23499454347035287, |
| "grad_norm": 1.598924274653862, |
| "learning_rate": 8.937313521871846e-06, |
| "loss": 2.224, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.23572208075663878, |
| "grad_norm": 1.8671189985221208, |
| "learning_rate": 8.930044891749962e-06, |
| "loss": 2.1746, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2364496180429247, |
| "grad_norm": 1.3561015874043998, |
| "learning_rate": 8.922754465196591e-06, |
| "loss": 2.2098, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.23717715532921063, |
| "grad_norm": 2.0307652448169926, |
| "learning_rate": 8.915442282645183e-06, |
| "loss": 2.16, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.23790469261549654, |
| "grad_norm": 1.6434640576558963, |
| "learning_rate": 8.908108384649856e-06, |
| "loss": 2.2079, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.23863222990178246, |
| "grad_norm": 1.4279872738065433, |
| "learning_rate": 8.900752811885152e-06, |
| "loss": 2.2, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.2393597671880684, |
| "grad_norm": 1.6970394563321913, |
| "learning_rate": 8.893375605145837e-06, |
| "loss": 2.1893, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2400873044743543, |
| "grad_norm": 1.5059035467757602, |
| "learning_rate": 8.885976805346651e-06, |
| "loss": 2.1675, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.24081484176064022, |
| "grad_norm": 1.5043481771028886, |
| "learning_rate": 8.8785564535221e-06, |
| "loss": 2.1719, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.24154237904692616, |
| "grad_norm": 1.4989135816810697, |
| "learning_rate": 8.871114590826211e-06, |
| "loss": 2.1782, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.24226991633321207, |
| "grad_norm": 1.3065841727798744, |
| "learning_rate": 8.86365125853232e-06, |
| "loss": 2.2057, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.242997453619498, |
| "grad_norm": 1.8903212768191282, |
| "learning_rate": 8.85616649803283e-06, |
| "loss": 2.1798, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.24372499090578392, |
| "grad_norm": 1.419329884971465, |
| "learning_rate": 8.84866035083899e-06, |
| "loss": 2.2423, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.24445252819206983, |
| "grad_norm": 1.5054024361601408, |
| "learning_rate": 8.841132858580661e-06, |
| "loss": 2.1573, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.24518006547835577, |
| "grad_norm": 1.7230388656951334, |
| "learning_rate": 8.833584063006088e-06, |
| "loss": 2.0773, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2459076027646417, |
| "grad_norm": 1.4222728150757113, |
| "learning_rate": 8.826014005981662e-06, |
| "loss": 2.1373, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.2466351400509276, |
| "grad_norm": 1.4823414448646695, |
| "learning_rate": 8.818422729491693e-06, |
| "loss": 2.2306, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.24736267733721354, |
| "grad_norm": 1.494452896957868, |
| "learning_rate": 8.810810275638183e-06, |
| "loss": 2.1501, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.24809021462349945, |
| "grad_norm": 1.4702282798707498, |
| "learning_rate": 8.803176686640577e-06, |
| "loss": 2.1816, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2488177519097854, |
| "grad_norm": 1.3618536433099357, |
| "learning_rate": 8.795522004835543e-06, |
| "loss": 2.2072, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.2495452891960713, |
| "grad_norm": 1.3113305408053801, |
| "learning_rate": 8.787846272676728e-06, |
| "loss": 2.2214, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.2502728264823572, |
| "grad_norm": 1.9543942178819584, |
| "learning_rate": 8.780149532734531e-06, |
| "loss": 2.1721, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.25100036376864315, |
| "grad_norm": 1.7571304927742484, |
| "learning_rate": 8.772431827695862e-06, |
| "loss": 2.1813, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.25172790105492904, |
| "grad_norm": 2.204086860989567, |
| "learning_rate": 8.764693200363897e-06, |
| "loss": 2.1743, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.252455438341215, |
| "grad_norm": 1.5999845528923904, |
| "learning_rate": 8.756933693657863e-06, |
| "loss": 2.1814, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2531829756275009, |
| "grad_norm": 1.8635454967928038, |
| "learning_rate": 8.749153350612774e-06, |
| "loss": 2.2211, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.25391051291378686, |
| "grad_norm": 1.4428311175390514, |
| "learning_rate": 8.74135221437921e-06, |
| "loss": 2.2052, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.25463805020007274, |
| "grad_norm": 2.106620577197869, |
| "learning_rate": 8.733530328223076e-06, |
| "loss": 2.1838, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2553655874863587, |
| "grad_norm": 1.6014903315993732, |
| "learning_rate": 8.725687735525347e-06, |
| "loss": 2.2324, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.2560931247726446, |
| "grad_norm": 1.5943126144634416, |
| "learning_rate": 8.71782447978185e-06, |
| "loss": 2.1376, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2568206620589305, |
| "grad_norm": 1.6101881809656868, |
| "learning_rate": 8.709940604603006e-06, |
| "loss": 2.1742, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.25754819934521644, |
| "grad_norm": 1.3439129152348759, |
| "learning_rate": 8.702036153713594e-06, |
| "loss": 2.238, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.2582757366315024, |
| "grad_norm": 1.569010680613162, |
| "learning_rate": 8.694111170952508e-06, |
| "loss": 2.2009, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.25900327391778827, |
| "grad_norm": 1.542834123668552, |
| "learning_rate": 8.686165700272513e-06, |
| "loss": 2.1963, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.2597308112040742, |
| "grad_norm": 1.8941251098493939, |
| "learning_rate": 8.678199785740003e-06, |
| "loss": 2.1238, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.26045834849036015, |
| "grad_norm": 1.497786911579719, |
| "learning_rate": 8.670213471534759e-06, |
| "loss": 2.1888, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.26118588577664603, |
| "grad_norm": 1.5438129971287569, |
| "learning_rate": 8.662206801949694e-06, |
| "loss": 2.2097, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.26191342306293197, |
| "grad_norm": 1.7304337859507464, |
| "learning_rate": 8.65417982139062e-06, |
| "loss": 2.1145, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2626409603492179, |
| "grad_norm": 1.324711597748057, |
| "learning_rate": 8.646132574375994e-06, |
| "loss": 2.2167, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.2633684976355038, |
| "grad_norm": 1.764099789083863, |
| "learning_rate": 8.638065105536669e-06, |
| "loss": 2.2043, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.26409603492178974, |
| "grad_norm": 1.4187103713769413, |
| "learning_rate": 8.629977459615655e-06, |
| "loss": 2.1899, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.2648235722080757, |
| "grad_norm": 1.7175448008154528, |
| "learning_rate": 8.621869681467865e-06, |
| "loss": 2.2032, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.26555110949436156, |
| "grad_norm": 1.584647884308209, |
| "learning_rate": 8.613741816059867e-06, |
| "loss": 2.1902, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2662786467806475, |
| "grad_norm": 1.496402910794325, |
| "learning_rate": 8.605593908469635e-06, |
| "loss": 2.1673, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.26700618406693344, |
| "grad_norm": 1.698578959322338, |
| "learning_rate": 8.597426003886295e-06, |
| "loss": 2.2301, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.2677337213532194, |
| "grad_norm": 1.4974994622957056, |
| "learning_rate": 8.58923814760989e-06, |
| "loss": 2.2378, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.26846125863950526, |
| "grad_norm": 1.646949899011469, |
| "learning_rate": 8.581030385051105e-06, |
| "loss": 2.2092, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.2691887959257912, |
| "grad_norm": 1.3378903417269772, |
| "learning_rate": 8.572802761731031e-06, |
| "loss": 2.1916, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.26991633321207714, |
| "grad_norm": 1.4087448945095493, |
| "learning_rate": 8.564555323280913e-06, |
| "loss": 2.2059, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.270643870498363, |
| "grad_norm": 1.5838365104583751, |
| "learning_rate": 8.556288115441887e-06, |
| "loss": 2.2018, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.27137140778464897, |
| "grad_norm": 1.5540637646051088, |
| "learning_rate": 8.548001184064733e-06, |
| "loss": 2.2461, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.2720989450709349, |
| "grad_norm": 1.7111502870886097, |
| "learning_rate": 8.539694575109626e-06, |
| "loss": 2.1165, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.2728264823572208, |
| "grad_norm": 1.5424590640718876, |
| "learning_rate": 8.531368334645865e-06, |
| "loss": 2.2229, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.27355401964350673, |
| "grad_norm": 1.812143369112376, |
| "learning_rate": 8.523022508851634e-06, |
| "loss": 2.1401, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.27428155692979267, |
| "grad_norm": 1.5062727694529647, |
| "learning_rate": 8.514657144013738e-06, |
| "loss": 2.2003, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.27500909421607855, |
| "grad_norm": 1.413424435228159, |
| "learning_rate": 8.506272286527346e-06, |
| "loss": 2.1805, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2757366315023645, |
| "grad_norm": 1.5890140441579645, |
| "learning_rate": 8.497867982895741e-06, |
| "loss": 2.2219, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.27646416878865043, |
| "grad_norm": 1.281171640918851, |
| "learning_rate": 8.489444279730046e-06, |
| "loss": 2.1925, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2771917060749363, |
| "grad_norm": 1.4263623146922937, |
| "learning_rate": 8.481001223748986e-06, |
| "loss": 2.1471, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.27791924336122226, |
| "grad_norm": 2.1371121352259546, |
| "learning_rate": 8.47253886177861e-06, |
| "loss": 2.1793, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.2786467806475082, |
| "grad_norm": 1.3412182076022034, |
| "learning_rate": 8.464057240752046e-06, |
| "loss": 2.2298, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.2793743179337941, |
| "grad_norm": 2.071607077435448, |
| "learning_rate": 8.455556407709235e-06, |
| "loss": 2.2543, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.28010185522008, |
| "grad_norm": 1.9534003413176857, |
| "learning_rate": 8.447036409796663e-06, |
| "loss": 2.1475, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.28082939250636596, |
| "grad_norm": 1.6769392968093242, |
| "learning_rate": 8.438497294267117e-06, |
| "loss": 2.1668, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2815569297926519, |
| "grad_norm": 1.557532120926547, |
| "learning_rate": 8.429939108479403e-06, |
| "loss": 2.134, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2822844670789378, |
| "grad_norm": 2.1612204380901043, |
| "learning_rate": 8.421361899898095e-06, |
| "loss": 2.1667, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2830120043652237, |
| "grad_norm": 1.4942618996422175, |
| "learning_rate": 8.412765716093273e-06, |
| "loss": 2.1555, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.28373954165150966, |
| "grad_norm": 1.4344662772626562, |
| "learning_rate": 8.404150604740248e-06, |
| "loss": 2.2251, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.28446707893779555, |
| "grad_norm": 1.2683804314574985, |
| "learning_rate": 8.395516613619315e-06, |
| "loss": 2.1955, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2851946162240815, |
| "grad_norm": 1.3922747189250564, |
| "learning_rate": 8.386863790615472e-06, |
| "loss": 2.2443, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.2859221535103674, |
| "grad_norm": 1.5881319507851879, |
| "learning_rate": 8.378192183718158e-06, |
| "loss": 2.1906, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.2866496907966533, |
| "grad_norm": 1.473240672349657, |
| "learning_rate": 8.369501841021e-06, |
| "loss": 2.1416, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.28737722808293925, |
| "grad_norm": 1.4687165410998146, |
| "learning_rate": 8.360792810721522e-06, |
| "loss": 2.1452, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.2881047653692252, |
| "grad_norm": 1.579771253507425, |
| "learning_rate": 8.352065141120902e-06, |
| "loss": 2.2256, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2888323026555111, |
| "grad_norm": 1.5712685675891773, |
| "learning_rate": 8.343318880623688e-06, |
| "loss": 2.2189, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.289559839941797, |
| "grad_norm": 1.5420480636071308, |
| "learning_rate": 8.334554077737535e-06, |
| "loss": 2.2173, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.29028737722808295, |
| "grad_norm": 1.2859318999500267, |
| "learning_rate": 8.325770781072939e-06, |
| "loss": 2.2135, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.29101491451436884, |
| "grad_norm": 1.4805265877565563, |
| "learning_rate": 8.316969039342963e-06, |
| "loss": 2.2217, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2917424518006548, |
| "grad_norm": 1.5545540894337095, |
| "learning_rate": 8.30814890136297e-06, |
| "loss": 2.1985, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2924699890869407, |
| "grad_norm": 1.3383510734550588, |
| "learning_rate": 8.299310416050345e-06, |
| "loss": 2.1915, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.2931975263732266, |
| "grad_norm": 1.915208096416909, |
| "learning_rate": 8.290453632424236e-06, |
| "loss": 2.1534, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.29392506365951254, |
| "grad_norm": 1.664670781828968, |
| "learning_rate": 8.281578599605269e-06, |
| "loss": 2.1847, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2946526009457985, |
| "grad_norm": 1.733732203769642, |
| "learning_rate": 8.272685366815287e-06, |
| "loss": 2.211, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2953801382320844, |
| "grad_norm": 1.3602606814344493, |
| "learning_rate": 8.26377398337707e-06, |
| "loss": 2.1909, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2961076755183703, |
| "grad_norm": 1.5107817144001174, |
| "learning_rate": 8.254844498714063e-06, |
| "loss": 2.1868, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.29683521280465625, |
| "grad_norm": 1.8836347431425295, |
| "learning_rate": 8.2458969623501e-06, |
| "loss": 2.2369, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2975627500909422, |
| "grad_norm": 1.490350939718598, |
| "learning_rate": 8.23693142390914e-06, |
| "loss": 2.2138, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.29829028737722807, |
| "grad_norm": 2.1389350266540177, |
| "learning_rate": 8.227947933114971e-06, |
| "loss": 2.2343, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.299017824663514, |
| "grad_norm": 1.9494550174812935, |
| "learning_rate": 8.218946539790957e-06, |
| "loss": 2.2259, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.29974536194979995, |
| "grad_norm": 2.0558110266623886, |
| "learning_rate": 8.209927293859746e-06, |
| "loss": 2.1916, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.30047289923608583, |
| "grad_norm": 1.6565866622694128, |
| "learning_rate": 8.200890245342999e-06, |
| "loss": 2.1939, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.3012004365223718, |
| "grad_norm": 2.0124988863289284, |
| "learning_rate": 8.191835444361113e-06, |
| "loss": 2.1739, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.3019279738086577, |
| "grad_norm": 1.5885211482360033, |
| "learning_rate": 8.182762941132944e-06, |
| "loss": 2.1427, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.3026555110949436, |
| "grad_norm": 1.9064461762750247, |
| "learning_rate": 8.173672785975522e-06, |
| "loss": 2.174, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.30338304838122954, |
| "grad_norm": 1.404065532109891, |
| "learning_rate": 8.16456502930378e-06, |
| "loss": 2.1558, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.3041105856675155, |
| "grad_norm": 2.326646391688612, |
| "learning_rate": 8.155439721630265e-06, |
| "loss": 2.2072, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.30483812295380136, |
| "grad_norm": 1.7482726501120724, |
| "learning_rate": 8.146296913564872e-06, |
| "loss": 2.2442, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.3055656602400873, |
| "grad_norm": 2.337940156742116, |
| "learning_rate": 8.13713665581455e-06, |
| "loss": 2.1828, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.30629319752637324, |
| "grad_norm": 2.1322634931803286, |
| "learning_rate": 8.127958999183027e-06, |
| "loss": 2.1796, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.3070207348126591, |
| "grad_norm": 1.9097123883738019, |
| "learning_rate": 8.118763994570528e-06, |
| "loss": 2.1734, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.30774827209894506, |
| "grad_norm": 2.1980133785757396, |
| "learning_rate": 8.109551692973487e-06, |
| "loss": 2.1898, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.308475809385231, |
| "grad_norm": 1.789583137950799, |
| "learning_rate": 8.100322145484275e-06, |
| "loss": 2.1475, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.3092033466715169, |
| "grad_norm": 2.344482041800149, |
| "learning_rate": 8.091075403290905e-06, |
| "loss": 2.1686, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.3099308839578028, |
| "grad_norm": 1.8283336967274593, |
| "learning_rate": 8.081811517676759e-06, |
| "loss": 2.1467, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.31065842124408877, |
| "grad_norm": 1.8002355815592943, |
| "learning_rate": 8.072530540020294e-06, |
| "loss": 2.191, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.3113859585303747, |
| "grad_norm": 1.5927825576828603, |
| "learning_rate": 8.063232521794762e-06, |
| "loss": 2.1891, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.3121134958166606, |
| "grad_norm": 1.451643393359493, |
| "learning_rate": 8.053917514567927e-06, |
| "loss": 2.1759, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.31284103310294653, |
| "grad_norm": 1.7610648505587718, |
| "learning_rate": 8.04458557000177e-06, |
| "loss": 2.1871, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.31356857038923247, |
| "grad_norm": 1.473618048365197, |
| "learning_rate": 8.035236739852214e-06, |
| "loss": 2.1437, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.31429610767551835, |
| "grad_norm": 1.8221809772213533, |
| "learning_rate": 8.025871075968828e-06, |
| "loss": 2.2207, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.3150236449618043, |
| "grad_norm": 1.2813989706691338, |
| "learning_rate": 8.016488630294539e-06, |
| "loss": 2.2069, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.31575118224809023, |
| "grad_norm": 2.0251930508569145, |
| "learning_rate": 8.007089454865358e-06, |
| "loss": 2.156, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.3164787195343761, |
| "grad_norm": 1.4958585375734768, |
| "learning_rate": 7.997673601810071e-06, |
| "loss": 2.1811, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.31720625682066206, |
| "grad_norm": 2.1481478251388233, |
| "learning_rate": 7.988241123349965e-06, |
| "loss": 2.2135, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.317933794106948, |
| "grad_norm": 1.5434770811180365, |
| "learning_rate": 7.97879207179853e-06, |
| "loss": 2.1987, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.3186613313932339, |
| "grad_norm": 1.9931565162404845, |
| "learning_rate": 7.969326499561173e-06, |
| "loss": 2.1962, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.3193888686795198, |
| "grad_norm": 1.5486850673722312, |
| "learning_rate": 7.95984445913493e-06, |
| "loss": 2.1802, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.32011640596580576, |
| "grad_norm": 1.8117775512143597, |
| "learning_rate": 7.950346003108167e-06, |
| "loss": 2.1405, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.32084394325209165, |
| "grad_norm": 1.2724580035235387, |
| "learning_rate": 7.940831184160294e-06, |
| "loss": 2.1457, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3215714805383776, |
| "grad_norm": 1.8000824307049166, |
| "learning_rate": 7.93130005506147e-06, |
| "loss": 2.1788, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.3222990178246635, |
| "grad_norm": 1.2920020781067392, |
| "learning_rate": 7.921752668672316e-06, |
| "loss": 2.1797, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.3230265551109494, |
| "grad_norm": 1.426393538509052, |
| "learning_rate": 7.912189077943613e-06, |
| "loss": 2.1948, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.32375409239723535, |
| "grad_norm": 1.4072389270233285, |
| "learning_rate": 7.902609335916015e-06, |
| "loss": 2.2199, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.3244816296835213, |
| "grad_norm": 1.5274959145306195, |
| "learning_rate": 7.893013495719752e-06, |
| "loss": 2.2308, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.32520916696980723, |
| "grad_norm": 1.6391686446377485, |
| "learning_rate": 7.883401610574338e-06, |
| "loss": 2.2187, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3259367042560931, |
| "grad_norm": 1.3663896663037896, |
| "learning_rate": 7.873773733788268e-06, |
| "loss": 2.1845, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.32666424154237905, |
| "grad_norm": 1.4500864060090617, |
| "learning_rate": 7.864129918758738e-06, |
| "loss": 2.1479, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.327391778828665, |
| "grad_norm": 2.1191880844490396, |
| "learning_rate": 7.854470218971333e-06, |
| "loss": 2.1975, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3281193161149509, |
| "grad_norm": 1.3905443689989125, |
| "learning_rate": 7.844794687999737e-06, |
| "loss": 2.2096, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.3288468534012368, |
| "grad_norm": 1.3979439713741857, |
| "learning_rate": 7.835103379505433e-06, |
| "loss": 2.1892, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.32957439068752276, |
| "grad_norm": 1.4535005426658243, |
| "learning_rate": 7.825396347237413e-06, |
| "loss": 2.232, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.33030192797380864, |
| "grad_norm": 1.5115384501349436, |
| "learning_rate": 7.815673645031871e-06, |
| "loss": 2.1301, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.3310294652600946, |
| "grad_norm": 1.3115282469872074, |
| "learning_rate": 7.805935326811913e-06, |
| "loss": 2.2099, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.3317570025463805, |
| "grad_norm": 1.5010012823097187, |
| "learning_rate": 7.796181446587244e-06, |
| "loss": 2.145, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.3324845398326664, |
| "grad_norm": 1.404187219427933, |
| "learning_rate": 7.786412058453886e-06, |
| "loss": 2.1492, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.33321207711895234, |
| "grad_norm": 1.4296996293876862, |
| "learning_rate": 7.776627216593863e-06, |
| "loss": 2.2032, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.3339396144052383, |
| "grad_norm": 1.4288606512177882, |
| "learning_rate": 7.766826975274916e-06, |
| "loss": 2.1794, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.33466715169152417, |
| "grad_norm": 1.521228766731058, |
| "learning_rate": 7.75701138885018e-06, |
| "loss": 2.1904, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3353946889778101, |
| "grad_norm": 1.6173905994542024, |
| "learning_rate": 7.747180511757908e-06, |
| "loss": 2.1972, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.33612222626409605, |
| "grad_norm": 1.303059707860955, |
| "learning_rate": 7.737334398521149e-06, |
| "loss": 2.1815, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.33684976355038193, |
| "grad_norm": 1.6029388549317818, |
| "learning_rate": 7.727473103747456e-06, |
| "loss": 2.1548, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.33757730083666787, |
| "grad_norm": 1.4310714925242356, |
| "learning_rate": 7.717596682128578e-06, |
| "loss": 2.2169, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.3383048381229538, |
| "grad_norm": 1.5636696059731714, |
| "learning_rate": 7.707705188440165e-06, |
| "loss": 2.1424, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.33903237540923975, |
| "grad_norm": 1.5700744253848755, |
| "learning_rate": 7.697798677541448e-06, |
| "loss": 2.2076, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.33975991269552563, |
| "grad_norm": 1.4436465913463896, |
| "learning_rate": 7.687877204374957e-06, |
| "loss": 2.2143, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.3404874499818116, |
| "grad_norm": 1.765254733044937, |
| "learning_rate": 7.677940823966196e-06, |
| "loss": 2.2218, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.3412149872680975, |
| "grad_norm": 1.4691603784424878, |
| "learning_rate": 7.667989591423349e-06, |
| "loss": 2.1976, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.3419425245543834, |
| "grad_norm": 1.3603055735560683, |
| "learning_rate": 7.658023561936966e-06, |
| "loss": 2.2193, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.34267006184066934, |
| "grad_norm": 1.4465116666140336, |
| "learning_rate": 7.648042790779677e-06, |
| "loss": 2.1955, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.3433975991269553, |
| "grad_norm": 1.3063737828027806, |
| "learning_rate": 7.638047333305853e-06, |
| "loss": 2.176, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.34412513641324116, |
| "grad_norm": 1.4867284794499172, |
| "learning_rate": 7.628037244951328e-06, |
| "loss": 2.2157, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.3448526736995271, |
| "grad_norm": 1.3664879462629613, |
| "learning_rate": 7.618012581233076e-06, |
| "loss": 2.2188, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.34558021098581304, |
| "grad_norm": 1.7470519475565276, |
| "learning_rate": 7.607973397748909e-06, |
| "loss": 2.1446, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3463077482720989, |
| "grad_norm": 1.6154246963221328, |
| "learning_rate": 7.597919750177168e-06, |
| "loss": 2.2107, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.34703528555838487, |
| "grad_norm": 1.3638455698941867, |
| "learning_rate": 7.587851694276412e-06, |
| "loss": 2.2398, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.3477628228446708, |
| "grad_norm": 1.3982722629120483, |
| "learning_rate": 7.57776928588511e-06, |
| "loss": 2.1973, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.3484903601309567, |
| "grad_norm": 1.357914570235351, |
| "learning_rate": 7.56767258092133e-06, |
| "loss": 2.1843, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.34921789741724263, |
| "grad_norm": 1.6309991413499052, |
| "learning_rate": 7.557561635382433e-06, |
| "loss": 2.1556, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.34994543470352857, |
| "grad_norm": 1.6193509733209566, |
| "learning_rate": 7.54743650534476e-06, |
| "loss": 2.1584, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.35067297198981445, |
| "grad_norm": 1.2099067965626962, |
| "learning_rate": 7.537297246963316e-06, |
| "loss": 2.2221, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.3514005092761004, |
| "grad_norm": 1.5900525315340603, |
| "learning_rate": 7.5271439164714695e-06, |
| "loss": 2.1933, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.35212804656238633, |
| "grad_norm": 1.5438986652266296, |
| "learning_rate": 7.5169765701806295e-06, |
| "loss": 2.1357, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.35285558384867227, |
| "grad_norm": 1.3208757653456373, |
| "learning_rate": 7.506795264479941e-06, |
| "loss": 2.1879, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.35358312113495816, |
| "grad_norm": 1.4301990530187678, |
| "learning_rate": 7.4966000558359675e-06, |
| "loss": 2.1783, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.3543106584212441, |
| "grad_norm": 1.4812716326736017, |
| "learning_rate": 7.486391000792379e-06, |
| "loss": 2.1626, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.35503819570753004, |
| "grad_norm": 1.417579122162795, |
| "learning_rate": 7.476168155969643e-06, |
| "loss": 2.2251, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.3557657329938159, |
| "grad_norm": 1.3774973192524718, |
| "learning_rate": 7.465931578064703e-06, |
| "loss": 2.1823, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.35649327028010186, |
| "grad_norm": 1.5296783044040714, |
| "learning_rate": 7.455681323850669e-06, |
| "loss": 2.1694, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3572208075663878, |
| "grad_norm": 1.6575720924854538, |
| "learning_rate": 7.4454174501765e-06, |
| "loss": 2.1799, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.3579483448526737, |
| "grad_norm": 1.546046770665215, |
| "learning_rate": 7.4351400139666894e-06, |
| "loss": 2.2169, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3586758821389596, |
| "grad_norm": 1.540389159578989, |
| "learning_rate": 7.424849072220953e-06, |
| "loss": 2.2173, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.35940341942524556, |
| "grad_norm": 1.4043240669644461, |
| "learning_rate": 7.414544682013907e-06, |
| "loss": 2.2141, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.36013095671153145, |
| "grad_norm": 1.8638300064501658, |
| "learning_rate": 7.404226900494753e-06, |
| "loss": 2.1952, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.3608584939978174, |
| "grad_norm": 1.342885944687377, |
| "learning_rate": 7.3938957848869684e-06, |
| "loss": 2.1466, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.3615860312841033, |
| "grad_norm": 1.5882952997860806, |
| "learning_rate": 7.3835513924879755e-06, |
| "loss": 2.1954, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3623135685703892, |
| "grad_norm": 1.2373309543768045, |
| "learning_rate": 7.373193780668835e-06, |
| "loss": 2.1891, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.36304110585667515, |
| "grad_norm": 2.0443919878347168, |
| "learning_rate": 7.36282300687392e-06, |
| "loss": 2.1675, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.3637686431429611, |
| "grad_norm": 1.3887286548737836, |
| "learning_rate": 7.35243912862061e-06, |
| "loss": 2.1893, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.364496180429247, |
| "grad_norm": 2.719239252549507, |
| "learning_rate": 7.342042203498952e-06, |
| "loss": 2.2446, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3652237177155329, |
| "grad_norm": 1.7604260800939213, |
| "learning_rate": 7.33163228917136e-06, |
| "loss": 2.19, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.36595125500181885, |
| "grad_norm": 1.8242987699374724, |
| "learning_rate": 7.321209443372284e-06, |
| "loss": 2.1801, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.36667879228810474, |
| "grad_norm": 1.7293101420517358, |
| "learning_rate": 7.310773723907895e-06, |
| "loss": 2.1508, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3674063295743907, |
| "grad_norm": 2.0764703155554733, |
| "learning_rate": 7.300325188655762e-06, |
| "loss": 2.173, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.3681338668606766, |
| "grad_norm": 1.588249317170812, |
| "learning_rate": 7.289863895564531e-06, |
| "loss": 2.1604, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.36886140414696256, |
| "grad_norm": 2.1777553622237917, |
| "learning_rate": 7.279389902653606e-06, |
| "loss": 2.1974, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.36958894143324844, |
| "grad_norm": 1.7809060067522566, |
| "learning_rate": 7.268903268012823e-06, |
| "loss": 2.1729, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3703164787195344, |
| "grad_norm": 1.9174174704695481, |
| "learning_rate": 7.258404049802135e-06, |
| "loss": 2.2177, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3710440160058203, |
| "grad_norm": 2.2243845689710615, |
| "learning_rate": 7.247892306251276e-06, |
| "loss": 2.1902, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3717715532921062, |
| "grad_norm": 1.7631413862695091, |
| "learning_rate": 7.237368095659459e-06, |
| "loss": 2.1981, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.37249909057839214, |
| "grad_norm": 1.8123427455098935, |
| "learning_rate": 7.226831476395028e-06, |
| "loss": 2.1863, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.3732266278646781, |
| "grad_norm": 1.5649100589426284, |
| "learning_rate": 7.216282506895155e-06, |
| "loss": 2.2349, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.37395416515096397, |
| "grad_norm": 1.9927093639757405, |
| "learning_rate": 7.2057212456655055e-06, |
| "loss": 2.2133, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3746817024372499, |
| "grad_norm": 1.4433401145184241, |
| "learning_rate": 7.195147751279915e-06, |
| "loss": 2.1875, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.37540923972353585, |
| "grad_norm": 1.4702187969117708, |
| "learning_rate": 7.184562082380069e-06, |
| "loss": 2.1786, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.37613677700982173, |
| "grad_norm": 1.3663494773812568, |
| "learning_rate": 7.173964297675168e-06, |
| "loss": 2.2144, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.37686431429610767, |
| "grad_norm": 1.346520463150049, |
| "learning_rate": 7.163354455941614e-06, |
| "loss": 2.2495, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3775918515823936, |
| "grad_norm": 1.5135411330745128, |
| "learning_rate": 7.152732616022675e-06, |
| "loss": 2.1856, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3783193888686795, |
| "grad_norm": 1.631804860578378, |
| "learning_rate": 7.142098836828162e-06, |
| "loss": 2.1518, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.37904692615496544, |
| "grad_norm": 1.5375776438606996, |
| "learning_rate": 7.131453177334103e-06, |
| "loss": 2.2006, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.3797744634412514, |
| "grad_norm": 1.3613025917303885, |
| "learning_rate": 7.120795696582419e-06, |
| "loss": 2.2173, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.38050200072753726, |
| "grad_norm": 3.7985413414970797, |
| "learning_rate": 7.1101264536805885e-06, |
| "loss": 2.196, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3812295380138232, |
| "grad_norm": 1.4859354434913918, |
| "learning_rate": 7.099445507801324e-06, |
| "loss": 2.1631, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.38195707530010914, |
| "grad_norm": 1.3409270310054628, |
| "learning_rate": 7.088752918182247e-06, |
| "loss": 2.2036, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3826846125863951, |
| "grad_norm": 1.3337351630271117, |
| "learning_rate": 7.078048744125553e-06, |
| "loss": 2.188, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.38341214987268096, |
| "grad_norm": 1.4962256806518117, |
| "learning_rate": 7.067333044997689e-06, |
| "loss": 2.159, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3841396871589669, |
| "grad_norm": 1.6612181443786198, |
| "learning_rate": 7.0566058802290196e-06, |
| "loss": 2.1896, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.38486722444525284, |
| "grad_norm": 1.4131231742415042, |
| "learning_rate": 7.045867309313499e-06, |
| "loss": 2.1807, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3855947617315387, |
| "grad_norm": 1.3191716183997426, |
| "learning_rate": 7.035117391808341e-06, |
| "loss": 2.159, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.38632229901782467, |
| "grad_norm": 1.4960990390145708, |
| "learning_rate": 7.024356187333692e-06, |
| "loss": 2.0966, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.3870498363041106, |
| "grad_norm": 1.429956783351808, |
| "learning_rate": 7.01358375557229e-06, |
| "loss": 2.1894, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3877773735903965, |
| "grad_norm": 1.4572049756953669, |
| "learning_rate": 7.0028001562691475e-06, |
| "loss": 2.1212, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.38850491087668243, |
| "grad_norm": 1.5966434548753567, |
| "learning_rate": 6.9920054492312086e-06, |
| "loss": 2.1895, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.38923244816296837, |
| "grad_norm": 1.33802025979571, |
| "learning_rate": 6.981199694327024e-06, |
| "loss": 2.1844, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.38995998544925425, |
| "grad_norm": 1.3802399864961623, |
| "learning_rate": 6.97038295148642e-06, |
| "loss": 2.1677, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3906875227355402, |
| "grad_norm": 1.6091030572317013, |
| "learning_rate": 6.959555280700162e-06, |
| "loss": 2.1643, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.39141506002182613, |
| "grad_norm": 2.7837498605385913, |
| "learning_rate": 6.948716742019616e-06, |
| "loss": 2.1977, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.392142597308112, |
| "grad_norm": 1.500029077202491, |
| "learning_rate": 6.937867395556428e-06, |
| "loss": 2.1591, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.39287013459439796, |
| "grad_norm": 1.3796722880177077, |
| "learning_rate": 6.927007301482187e-06, |
| "loss": 2.1181, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3935976718806839, |
| "grad_norm": 1.2841069999862686, |
| "learning_rate": 6.916136520028087e-06, |
| "loss": 2.1519, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.3943252091669698, |
| "grad_norm": 1.4995288048414561, |
| "learning_rate": 6.905255111484592e-06, |
| "loss": 2.2125, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.3950527464532557, |
| "grad_norm": 2.0316965689982607, |
| "learning_rate": 6.894363136201114e-06, |
| "loss": 2.1612, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.39578028373954166, |
| "grad_norm": 1.3579977814916835, |
| "learning_rate": 6.88346065458566e-06, |
| "loss": 2.2381, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3965078210258276, |
| "grad_norm": 1.3472184874244, |
| "learning_rate": 6.8725477271045085e-06, |
| "loss": 2.1687, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.3972353583121135, |
| "grad_norm": 1.485886897353197, |
| "learning_rate": 6.861624414281875e-06, |
| "loss": 2.1931, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3979628955983994, |
| "grad_norm": 1.4551297785369353, |
| "learning_rate": 6.850690776699574e-06, |
| "loss": 2.147, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.39869043288468536, |
| "grad_norm": 1.4380183589510607, |
| "learning_rate": 6.8397468749966735e-06, |
| "loss": 2.1952, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.39941797017097125, |
| "grad_norm": 1.5714557268967253, |
| "learning_rate": 6.8287927698691745e-06, |
| "loss": 2.1955, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.4001455074572572, |
| "grad_norm": 1.279188232189353, |
| "learning_rate": 6.8178285220696686e-06, |
| "loss": 2.1981, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4008730447435431, |
| "grad_norm": 1.4424478452820921, |
| "learning_rate": 6.806854192406995e-06, |
| "loss": 2.1119, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.401600582029829, |
| "grad_norm": 1.3026202589943117, |
| "learning_rate": 6.795869841745912e-06, |
| "loss": 2.1805, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.40232811931611495, |
| "grad_norm": 1.409446508641355, |
| "learning_rate": 6.784875531006751e-06, |
| "loss": 2.2104, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.4030556566024009, |
| "grad_norm": 1.3443877470647068, |
| "learning_rate": 6.7738713211650885e-06, |
| "loss": 2.1898, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.4037831938886868, |
| "grad_norm": 1.6892639463777863, |
| "learning_rate": 6.762857273251396e-06, |
| "loss": 2.2186, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.4045107311749727, |
| "grad_norm": 1.5688437019416017, |
| "learning_rate": 6.751833448350713e-06, |
| "loss": 2.1855, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.40523826846125865, |
| "grad_norm": 1.603112976516178, |
| "learning_rate": 6.740799907602302e-06, |
| "loss": 2.2116, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.40596580574754454, |
| "grad_norm": 1.4321804286899071, |
| "learning_rate": 6.729756712199309e-06, |
| "loss": 2.1305, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.4066933430338305, |
| "grad_norm": 1.5339389129789849, |
| "learning_rate": 6.718703923388427e-06, |
| "loss": 2.1777, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.4074208803201164, |
| "grad_norm": 1.503291365726574, |
| "learning_rate": 6.707641602469554e-06, |
| "loss": 2.1939, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4081484176064023, |
| "grad_norm": 1.9104614902741786, |
| "learning_rate": 6.696569810795455e-06, |
| "loss": 2.1462, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.40887595489268824, |
| "grad_norm": 1.5181941385544446, |
| "learning_rate": 6.685488609771422e-06, |
| "loss": 2.2068, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.4096034921789742, |
| "grad_norm": 1.854212603906569, |
| "learning_rate": 6.674398060854931e-06, |
| "loss": 2.1901, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.4103310294652601, |
| "grad_norm": 1.5249028077188949, |
| "learning_rate": 6.6632982255553004e-06, |
| "loss": 2.207, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.411058566751546, |
| "grad_norm": 2.0569427143628647, |
| "learning_rate": 6.652189165433356e-06, |
| "loss": 2.1998, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.41178610403783195, |
| "grad_norm": 1.7548253682980153, |
| "learning_rate": 6.64107094210108e-06, |
| "loss": 2.1028, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.4125136413241179, |
| "grad_norm": 1.641162449194734, |
| "learning_rate": 6.62994361722128e-06, |
| "loss": 2.1805, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.41324117861040377, |
| "grad_norm": 1.4868121115076225, |
| "learning_rate": 6.618807252507238e-06, |
| "loss": 2.1483, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.4139687158966897, |
| "grad_norm": 1.7597038884989522, |
| "learning_rate": 6.6076619097223735e-06, |
| "loss": 2.2063, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.41469625318297565, |
| "grad_norm": 1.5240304099422757, |
| "learning_rate": 6.5965076506799e-06, |
| "loss": 2.2156, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.41542379046926153, |
| "grad_norm": 1.564601016318413, |
| "learning_rate": 6.5853445372424805e-06, |
| "loss": 2.2211, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.4161513277555475, |
| "grad_norm": 1.480576244672552, |
| "learning_rate": 6.574172631321885e-06, |
| "loss": 2.1956, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.4168788650418334, |
| "grad_norm": 1.7476087188506109, |
| "learning_rate": 6.562991994878649e-06, |
| "loss": 2.1871, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.4176064023281193, |
| "grad_norm": 1.3207274146769286, |
| "learning_rate": 6.551802689921726e-06, |
| "loss": 2.1756, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.41833393961440524, |
| "grad_norm": 1.7068135107861524, |
| "learning_rate": 6.5406047785081485e-06, |
| "loss": 2.1858, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.4190614769006912, |
| "grad_norm": 1.4042743643084192, |
| "learning_rate": 6.529398322742677e-06, |
| "loss": 2.1722, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.41978901418697706, |
| "grad_norm": 1.9755798708905363, |
| "learning_rate": 6.518183384777468e-06, |
| "loss": 2.1576, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.420516551473263, |
| "grad_norm": 1.5994303461102262, |
| "learning_rate": 6.506960026811712e-06, |
| "loss": 2.2132, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.42124408875954894, |
| "grad_norm": 2.0649893491282114, |
| "learning_rate": 6.495728311091303e-06, |
| "loss": 2.1971, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.4219716260458348, |
| "grad_norm": 1.8098677800737781, |
| "learning_rate": 6.484488299908487e-06, |
| "loss": 2.2014, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.42269916333212076, |
| "grad_norm": 1.6214666551666128, |
| "learning_rate": 6.473240055601517e-06, |
| "loss": 2.2096, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.4234267006184067, |
| "grad_norm": 1.7878777932661953, |
| "learning_rate": 6.46198364055431e-06, |
| "loss": 2.1419, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.4241542379046926, |
| "grad_norm": 2.159509168849858, |
| "learning_rate": 6.450719117196094e-06, |
| "loss": 2.194, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.42488177519097853, |
| "grad_norm": 1.605971453076131, |
| "learning_rate": 6.439446548001069e-06, |
| "loss": 2.2016, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.42560931247726447, |
| "grad_norm": 1.2909853506916047, |
| "learning_rate": 6.4281659954880605e-06, |
| "loss": 2.1941, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.4263368497635504, |
| "grad_norm": 1.6094780157573472, |
| "learning_rate": 6.416877522220167e-06, |
| "loss": 2.1441, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.4270643870498363, |
| "grad_norm": 1.2477800378388897, |
| "learning_rate": 6.405581190804418e-06, |
| "loss": 2.1539, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.42779192433612223, |
| "grad_norm": 1.4114854405252097, |
| "learning_rate": 6.394277063891422e-06, |
| "loss": 2.1303, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.42851946162240817, |
| "grad_norm": 1.365404913537153, |
| "learning_rate": 6.382965204175027e-06, |
| "loss": 2.1426, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.42924699890869406, |
| "grad_norm": 1.6876040695944547, |
| "learning_rate": 6.371645674391967e-06, |
| "loss": 2.1633, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.42997453619498, |
| "grad_norm": 1.3968277764396695, |
| "learning_rate": 6.3603185373215105e-06, |
| "loss": 2.1505, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.43070207348126593, |
| "grad_norm": 1.4586710459080776, |
| "learning_rate": 6.348983855785122e-06, |
| "loss": 2.1557, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.4314296107675518, |
| "grad_norm": 1.369509599324066, |
| "learning_rate": 6.337641692646106e-06, |
| "loss": 2.163, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.43215714805383776, |
| "grad_norm": 1.488317021603382, |
| "learning_rate": 6.326292110809258e-06, |
| "loss": 2.2305, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.4328846853401237, |
| "grad_norm": 1.293463816991133, |
| "learning_rate": 6.314935173220524e-06, |
| "loss": 2.1733, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.4336122226264096, |
| "grad_norm": 1.2329798588148075, |
| "learning_rate": 6.303570942866643e-06, |
| "loss": 2.2361, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.4343397599126955, |
| "grad_norm": 1.274641665015929, |
| "learning_rate": 6.2921994827748e-06, |
| "loss": 2.1727, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.43506729719898146, |
| "grad_norm": 1.52350376606858, |
| "learning_rate": 6.280820856012277e-06, |
| "loss": 2.1166, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.43579483448526735, |
| "grad_norm": 1.3443923990170132, |
| "learning_rate": 6.269435125686105e-06, |
| "loss": 2.1592, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.4365223717715533, |
| "grad_norm": 1.4532761222668074, |
| "learning_rate": 6.258042354942708e-06, |
| "loss": 2.1825, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4372499090578392, |
| "grad_norm": 1.4337559630256602, |
| "learning_rate": 6.2466426069675626e-06, |
| "loss": 2.1405, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.4379774463441251, |
| "grad_norm": 1.3646159270348335, |
| "learning_rate": 6.235235944984835e-06, |
| "loss": 2.1662, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.43870498363041105, |
| "grad_norm": 1.2228149723332558, |
| "learning_rate": 6.223822432257043e-06, |
| "loss": 2.2147, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.439432520916697, |
| "grad_norm": 1.5215015742650733, |
| "learning_rate": 6.212402132084697e-06, |
| "loss": 2.1389, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.44016005820298293, |
| "grad_norm": 1.3169917799704782, |
| "learning_rate": 6.200975107805951e-06, |
| "loss": 2.2103, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.4408875954892688, |
| "grad_norm": 1.3932661877068893, |
| "learning_rate": 6.189541422796254e-06, |
| "loss": 2.215, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.44161513277555475, |
| "grad_norm": 1.3096197602120236, |
| "learning_rate": 6.1781011404679905e-06, |
| "loss": 2.2127, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.4423426700618407, |
| "grad_norm": 1.4934737646647973, |
| "learning_rate": 6.16665432427014e-06, |
| "loss": 2.2126, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.4430702073481266, |
| "grad_norm": 1.2933269423981684, |
| "learning_rate": 6.155201037687917e-06, |
| "loss": 2.213, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.4437977446344125, |
| "grad_norm": 1.2805748576266416, |
| "learning_rate": 6.1437413442424236e-06, |
| "loss": 2.2125, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.44452528192069846, |
| "grad_norm": 1.2687373233631138, |
| "learning_rate": 6.132275307490291e-06, |
| "loss": 2.1634, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.44525281920698434, |
| "grad_norm": 1.2334743124034346, |
| "learning_rate": 6.120802991023334e-06, |
| "loss": 2.1814, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.4459803564932703, |
| "grad_norm": 1.3262631511688279, |
| "learning_rate": 6.109324458468198e-06, |
| "loss": 2.1997, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.4467078937795562, |
| "grad_norm": 1.2608029031130938, |
| "learning_rate": 6.097839773485995e-06, |
| "loss": 2.2009, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.4474354310658421, |
| "grad_norm": 1.600622551011594, |
| "learning_rate": 6.086348999771967e-06, |
| "loss": 2.1711, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.44816296835212804, |
| "grad_norm": 1.3683377787237463, |
| "learning_rate": 6.074852201055121e-06, |
| "loss": 2.1955, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.448890505638414, |
| "grad_norm": 8.048703163178985, |
| "learning_rate": 6.063349441097881e-06, |
| "loss": 2.1864, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.44961804292469987, |
| "grad_norm": 1.5300459998198346, |
| "learning_rate": 6.051840783695731e-06, |
| "loss": 2.1785, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.4503455802109858, |
| "grad_norm": 1.3019331521335327, |
| "learning_rate": 6.040326292676865e-06, |
| "loss": 2.188, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.45107311749727175, |
| "grad_norm": 1.4083628372242818, |
| "learning_rate": 6.028806031901829e-06, |
| "loss": 2.1921, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.45180065478355763, |
| "grad_norm": 1.3938607440843427, |
| "learning_rate": 6.0172800652631706e-06, |
| "loss": 2.1739, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.45252819206984357, |
| "grad_norm": 1.3388550745415642, |
| "learning_rate": 6.005748456685077e-06, |
| "loss": 2.1935, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4532557293561295, |
| "grad_norm": 1.414747738164431, |
| "learning_rate": 5.994211270123034e-06, |
| "loss": 2.2062, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.45398326664241545, |
| "grad_norm": 1.3971151162411044, |
| "learning_rate": 5.9826685695634575e-06, |
| "loss": 2.1685, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.45471080392870133, |
| "grad_norm": 1.5318415273856, |
| "learning_rate": 5.971120419023349e-06, |
| "loss": 2.1522, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.4554383412149873, |
| "grad_norm": 1.465485425902321, |
| "learning_rate": 5.959566882549936e-06, |
| "loss": 2.2035, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.4561658785012732, |
| "grad_norm": 1.423172083365543, |
| "learning_rate": 5.948008024220311e-06, |
| "loss": 2.1863, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.4568934157875591, |
| "grad_norm": 1.4240745852160854, |
| "learning_rate": 5.936443908141088e-06, |
| "loss": 2.1878, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.45762095307384504, |
| "grad_norm": 1.3532825391471102, |
| "learning_rate": 5.924874598448038e-06, |
| "loss": 2.169, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.458348490360131, |
| "grad_norm": 1.358186857880125, |
| "learning_rate": 5.913300159305741e-06, |
| "loss": 2.1414, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.45907602764641686, |
| "grad_norm": 1.4951735213965345, |
| "learning_rate": 5.901720654907217e-06, |
| "loss": 2.1903, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.4598035649327028, |
| "grad_norm": 1.5407272148019264, |
| "learning_rate": 5.8901361494735874e-06, |
| "loss": 2.2203, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.46053110221898874, |
| "grad_norm": 1.3121352377647268, |
| "learning_rate": 5.878546707253704e-06, |
| "loss": 2.1472, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.4612586395052746, |
| "grad_norm": 1.34642713273401, |
| "learning_rate": 5.8669523925238e-06, |
| "loss": 2.2139, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.46198617679156057, |
| "grad_norm": 1.278017955157032, |
| "learning_rate": 5.855353269587134e-06, |
| "loss": 2.178, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.4627137140778465, |
| "grad_norm": 1.4552647069125098, |
| "learning_rate": 5.843749402773629e-06, |
| "loss": 2.1409, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.4634412513641324, |
| "grad_norm": 1.564092880214355, |
| "learning_rate": 5.8321408564395165e-06, |
| "loss": 2.1801, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.46416878865041833, |
| "grad_norm": 1.2442285834349986, |
| "learning_rate": 5.820527694966988e-06, |
| "loss": 2.2191, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.46489632593670427, |
| "grad_norm": 1.2375566820697201, |
| "learning_rate": 5.808909982763825e-06, |
| "loss": 2.1601, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.46562386322299015, |
| "grad_norm": 1.3882042357040267, |
| "learning_rate": 5.797287784263047e-06, |
| "loss": 2.1376, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4663514005092761, |
| "grad_norm": 1.4087732429863524, |
| "learning_rate": 5.785661163922558e-06, |
| "loss": 2.2206, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.46707893779556203, |
| "grad_norm": 1.4816673933482334, |
| "learning_rate": 5.774030186224786e-06, |
| "loss": 2.1835, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.46780647508184797, |
| "grad_norm": 1.2407433002116413, |
| "learning_rate": 5.762394915676325e-06, |
| "loss": 2.1961, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.46853401236813386, |
| "grad_norm": 1.273642310061063, |
| "learning_rate": 5.750755416807575e-06, |
| "loss": 2.1482, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.4692615496544198, |
| "grad_norm": 1.4089871928103983, |
| "learning_rate": 5.7391117541723914e-06, |
| "loss": 2.1724, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.46998908694070574, |
| "grad_norm": 1.5125196870593247, |
| "learning_rate": 5.727463992347719e-06, |
| "loss": 2.1689, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4707166242269916, |
| "grad_norm": 1.341889005253219, |
| "learning_rate": 5.715812195933238e-06, |
| "loss": 2.2176, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.47144416151327756, |
| "grad_norm": 1.403686348651851, |
| "learning_rate": 5.704156429551004e-06, |
| "loss": 2.1759, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4721716987995635, |
| "grad_norm": 1.307605691657786, |
| "learning_rate": 5.692496757845092e-06, |
| "loss": 2.1926, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4728992360858494, |
| "grad_norm": 1.4323344759039234, |
| "learning_rate": 5.680833245481234e-06, |
| "loss": 2.2126, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4736267733721353, |
| "grad_norm": 1.5581057141318937, |
| "learning_rate": 5.6691659571464655e-06, |
| "loss": 2.2334, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.47435431065842126, |
| "grad_norm": 1.4300789953117086, |
| "learning_rate": 5.657494957548761e-06, |
| "loss": 2.1843, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.47508184794470715, |
| "grad_norm": 1.3970648098199812, |
| "learning_rate": 5.645820311416681e-06, |
| "loss": 2.2122, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4758093852309931, |
| "grad_norm": 1.3766394494849667, |
| "learning_rate": 5.63414208349901e-06, |
| "loss": 2.1505, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.476536922517279, |
| "grad_norm": 1.2611851627806758, |
| "learning_rate": 5.622460338564393e-06, |
| "loss": 2.1835, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.4772644598035649, |
| "grad_norm": 1.2689258379576362, |
| "learning_rate": 5.610775141400986e-06, |
| "loss": 2.1851, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.47799199708985085, |
| "grad_norm": 1.2562703679535898, |
| "learning_rate": 5.599086556816089e-06, |
| "loss": 2.2069, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4787195343761368, |
| "grad_norm": 1.373649678079621, |
| "learning_rate": 5.587394649635789e-06, |
| "loss": 2.1818, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4794470716624227, |
| "grad_norm": 1.340447745306835, |
| "learning_rate": 5.575699484704599e-06, |
| "loss": 2.1518, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.4801746089487086, |
| "grad_norm": 1.4409063267903557, |
| "learning_rate": 5.564001126885106e-06, |
| "loss": 2.2298, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.48090214623499455, |
| "grad_norm": 1.4770825673856207, |
| "learning_rate": 5.552299641057596e-06, |
| "loss": 2.211, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.48162968352128044, |
| "grad_norm": 1.451816328888107, |
| "learning_rate": 5.540595092119709e-06, |
| "loss": 2.2002, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.4823572208075664, |
| "grad_norm": 1.237295472207834, |
| "learning_rate": 5.5288875449860745e-06, |
| "loss": 2.2193, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4830847580938523, |
| "grad_norm": 1.4771105112520675, |
| "learning_rate": 5.517177064587945e-06, |
| "loss": 2.1932, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.48381229538013826, |
| "grad_norm": 1.3053846946139933, |
| "learning_rate": 5.505463715872846e-06, |
| "loss": 2.1545, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.48453983266642414, |
| "grad_norm": 1.3508860777646363, |
| "learning_rate": 5.493747563804211e-06, |
| "loss": 2.1694, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4852673699527101, |
| "grad_norm": 1.2858010766706796, |
| "learning_rate": 5.482028673361015e-06, |
| "loss": 2.2014, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.485994907238996, |
| "grad_norm": 1.3616832657950724, |
| "learning_rate": 5.470307109537427e-06, |
| "loss": 2.1389, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.4867224445252819, |
| "grad_norm": 1.3973955367794273, |
| "learning_rate": 5.45858293734244e-06, |
| "loss": 2.1505, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.48744998181156785, |
| "grad_norm": 1.2914653848603326, |
| "learning_rate": 5.446856221799515e-06, |
| "loss": 2.183, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4881775190978538, |
| "grad_norm": 1.2585865929216316, |
| "learning_rate": 5.435127027946215e-06, |
| "loss": 2.1943, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.48890505638413967, |
| "grad_norm": 1.3052683558561404, |
| "learning_rate": 5.423395420833853e-06, |
| "loss": 2.176, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4896325936704256, |
| "grad_norm": 1.3873609358622778, |
| "learning_rate": 5.411661465527123e-06, |
| "loss": 2.1425, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.49036013095671155, |
| "grad_norm": 1.2669745889132074, |
| "learning_rate": 5.39992522710374e-06, |
| "loss": 2.1792, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.49108766824299743, |
| "grad_norm": 1.4194570179154562, |
| "learning_rate": 5.38818677065409e-06, |
| "loss": 2.2, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4918152055292834, |
| "grad_norm": 1.2404450036792771, |
| "learning_rate": 5.376446161280851e-06, |
| "loss": 2.2025, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.4925427428155693, |
| "grad_norm": 1.3396645425218945, |
| "learning_rate": 5.364703464098645e-06, |
| "loss": 2.1413, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4932702801018552, |
| "grad_norm": 1.2987925328919214, |
| "learning_rate": 5.352958744233673e-06, |
| "loss": 2.1667, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.49399781738814114, |
| "grad_norm": 1.7812213963044377, |
| "learning_rate": 5.341212066823356e-06, |
| "loss": 2.157, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.4947253546744271, |
| "grad_norm": 1.292537524703904, |
| "learning_rate": 5.329463497015969e-06, |
| "loss": 2.201, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.49545289196071296, |
| "grad_norm": 1.2361221322380314, |
| "learning_rate": 5.317713099970283e-06, |
| "loss": 2.1697, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.4961804292469989, |
| "grad_norm": 1.2590867014183742, |
| "learning_rate": 5.305960940855205e-06, |
| "loss": 2.1641, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.49690796653328484, |
| "grad_norm": 1.3424288308905323, |
| "learning_rate": 5.294207084849412e-06, |
| "loss": 2.1684, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4976355038195708, |
| "grad_norm": 1.294118009811616, |
| "learning_rate": 5.282451597140994e-06, |
| "loss": 2.213, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.49836304110585666, |
| "grad_norm": 1.2748612300877948, |
| "learning_rate": 5.270694542927089e-06, |
| "loss": 2.1645, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.4990905783921426, |
| "grad_norm": 1.4758373492064474, |
| "learning_rate": 5.258935987413524e-06, |
| "loss": 2.1795, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.49981811567842854, |
| "grad_norm": 1.418081827629764, |
| "learning_rate": 5.247175995814452e-06, |
| "loss": 2.0981, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5005456529647144, |
| "grad_norm": 1.3395470503978169, |
| "learning_rate": 5.235414633351992e-06, |
| "loss": 2.1836, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5012731902510004, |
| "grad_norm": 1.2941659730123067, |
| "learning_rate": 5.223651965255864e-06, |
| "loss": 2.169, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.5020007275372863, |
| "grad_norm": 1.575875089973837, |
| "learning_rate": 5.211888056763029e-06, |
| "loss": 2.1769, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5027282648235722, |
| "grad_norm": 1.3840535076365168, |
| "learning_rate": 5.20012297311733e-06, |
| "loss": 2.189, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.5034558021098581, |
| "grad_norm": 1.4124848862463264, |
| "learning_rate": 5.188356779569125e-06, |
| "loss": 2.1797, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.504183339396144, |
| "grad_norm": 1.3903900183285682, |
| "learning_rate": 5.176589541374929e-06, |
| "loss": 2.1851, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.50491087668243, |
| "grad_norm": 1.346682971941151, |
| "learning_rate": 5.164821323797051e-06, |
| "loss": 2.1561, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5056384139687159, |
| "grad_norm": 1.2342634042885627, |
| "learning_rate": 5.1530521921032305e-06, |
| "loss": 2.1873, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5063659512550018, |
| "grad_norm": 1.289754702836016, |
| "learning_rate": 5.141282211566276e-06, |
| "loss": 2.2453, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5070934885412878, |
| "grad_norm": 1.2422208566945978, |
| "learning_rate": 5.129511447463705e-06, |
| "loss": 2.2103, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.5078210258275737, |
| "grad_norm": 1.3065192995628705, |
| "learning_rate": 5.117739965077382e-06, |
| "loss": 2.2236, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5085485631138595, |
| "grad_norm": 1.3495238458160375, |
| "learning_rate": 5.105967829693155e-06, |
| "loss": 2.1476, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.5092761004001455, |
| "grad_norm": 1.2325811406693374, |
| "learning_rate": 5.0941951066004906e-06, |
| "loss": 2.1687, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5100036376864314, |
| "grad_norm": 1.3631358419723048, |
| "learning_rate": 5.082421861092116e-06, |
| "loss": 2.1913, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5107311749727174, |
| "grad_norm": 1.270779013966519, |
| "learning_rate": 5.0706481584636605e-06, |
| "loss": 2.2431, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.5114587122590033, |
| "grad_norm": 1.2036076608414459, |
| "learning_rate": 5.0588740640132805e-06, |
| "loss": 2.1895, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.5121862495452892, |
| "grad_norm": 1.309299489371034, |
| "learning_rate": 5.047099643041312e-06, |
| "loss": 2.2308, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.5129137868315751, |
| "grad_norm": 2.4163480824417887, |
| "learning_rate": 5.0353249608499e-06, |
| "loss": 2.2099, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.513641324117861, |
| "grad_norm": 1.3384150940350374, |
| "learning_rate": 5.023550082742637e-06, |
| "loss": 2.1809, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.514368861404147, |
| "grad_norm": 1.286056696964337, |
| "learning_rate": 5.011775074024202e-06, |
| "loss": 2.182, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.5150963986904329, |
| "grad_norm": 1.3126296066456467, |
| "learning_rate": 5e-06, |
| "loss": 2.1946, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.5158239359767188, |
| "grad_norm": 1.3270454866003318, |
| "learning_rate": 4.988224925975799e-06, |
| "loss": 2.1873, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.5165514732630048, |
| "grad_norm": 1.2104803167234541, |
| "learning_rate": 4.976449917257365e-06, |
| "loss": 2.1754, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5172790105492906, |
| "grad_norm": 1.361700904797741, |
| "learning_rate": 4.964675039150102e-06, |
| "loss": 2.1479, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.5180065478355765, |
| "grad_norm": 1.2525166720230223, |
| "learning_rate": 4.952900356958689e-06, |
| "loss": 2.1876, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.5187340851218625, |
| "grad_norm": 1.3502833880969705, |
| "learning_rate": 4.941125935986721e-06, |
| "loss": 2.1657, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.5194616224081484, |
| "grad_norm": 1.3396266138224724, |
| "learning_rate": 4.929351841536342e-06, |
| "loss": 2.1869, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.5201891596944344, |
| "grad_norm": 1.2140015595334195, |
| "learning_rate": 4.917578138907884e-06, |
| "loss": 2.1803, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.5209166969807203, |
| "grad_norm": 1.6002438838217814, |
| "learning_rate": 4.90580489339951e-06, |
| "loss": 2.1274, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.5216442342670062, |
| "grad_norm": 1.3821865250885832, |
| "learning_rate": 4.894032170306846e-06, |
| "loss": 2.1731, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.5223717715532921, |
| "grad_norm": 1.3845708527944702, |
| "learning_rate": 4.882260034922618e-06, |
| "loss": 2.1313, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.523099308839578, |
| "grad_norm": 1.3576928213923576, |
| "learning_rate": 4.870488552536296e-06, |
| "loss": 2.1854, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.5238268461258639, |
| "grad_norm": 1.3371555624306086, |
| "learning_rate": 4.858717788433725e-06, |
| "loss": 2.1803, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5245543834121499, |
| "grad_norm": 1.3113210170808254, |
| "learning_rate": 4.846947807896771e-06, |
| "loss": 2.1972, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.5252819206984358, |
| "grad_norm": 1.2936893601356254, |
| "learning_rate": 4.83517867620295e-06, |
| "loss": 2.1742, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.5260094579847218, |
| "grad_norm": 1.363329577036146, |
| "learning_rate": 4.823410458625072e-06, |
| "loss": 2.1583, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.5267369952710076, |
| "grad_norm": 1.2816776945909583, |
| "learning_rate": 4.811643220430877e-06, |
| "loss": 2.2167, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.5274645325572935, |
| "grad_norm": 1.4000479914079145, |
| "learning_rate": 4.7998770268826726e-06, |
| "loss": 2.1422, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.5281920698435795, |
| "grad_norm": 1.2363482944769124, |
| "learning_rate": 4.788111943236973e-06, |
| "loss": 2.1708, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.5289196071298654, |
| "grad_norm": 1.2634999042620405, |
| "learning_rate": 4.7763480347441395e-06, |
| "loss": 2.2168, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.5296471444161513, |
| "grad_norm": 1.3655777462801062, |
| "learning_rate": 4.7645853666480104e-06, |
| "loss": 2.214, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.5303746817024373, |
| "grad_norm": 1.3566078294042703, |
| "learning_rate": 4.752824004185548e-06, |
| "loss": 2.149, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.5311022189887231, |
| "grad_norm": 1.4882940760804018, |
| "learning_rate": 4.7410640125864785e-06, |
| "loss": 2.1441, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5318297562750091, |
| "grad_norm": 1.4375750939214706, |
| "learning_rate": 4.729305457072913e-06, |
| "loss": 2.1757, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.532557293561295, |
| "grad_norm": 1.3376613224457599, |
| "learning_rate": 4.717548402859008e-06, |
| "loss": 2.1585, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.5332848308475809, |
| "grad_norm": 1.2605792646846594, |
| "learning_rate": 4.7057929151505895e-06, |
| "loss": 2.1889, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.5340123681338669, |
| "grad_norm": 1.8650621006861559, |
| "learning_rate": 4.694039059144797e-06, |
| "loss": 2.1302, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.5347399054201528, |
| "grad_norm": 1.4404601559351293, |
| "learning_rate": 4.6822869000297185e-06, |
| "loss": 2.1504, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.5354674427064388, |
| "grad_norm": 1.3347908301393572, |
| "learning_rate": 4.670536502984033e-06, |
| "loss": 2.191, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.5361949799927246, |
| "grad_norm": 1.50594842793546, |
| "learning_rate": 4.6587879331766465e-06, |
| "loss": 2.1525, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.5369225172790105, |
| "grad_norm": 1.2677367459392634, |
| "learning_rate": 4.647041255766329e-06, |
| "loss": 2.1766, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.5376500545652965, |
| "grad_norm": 1.2221286265080424, |
| "learning_rate": 4.6352965359013576e-06, |
| "loss": 2.171, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.5383775918515824, |
| "grad_norm": 1.2878264260228098, |
| "learning_rate": 4.623553838719151e-06, |
| "loss": 2.1832, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5391051291378683, |
| "grad_norm": 1.2660416052678283, |
| "learning_rate": 4.611813229345911e-06, |
| "loss": 2.1712, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.5398326664241543, |
| "grad_norm": 1.442141691261282, |
| "learning_rate": 4.6000747728962606e-06, |
| "loss": 2.1485, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.5405602037104401, |
| "grad_norm": 1.2638028620122013, |
| "learning_rate": 4.588338534472878e-06, |
| "loss": 2.2101, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.541287740996726, |
| "grad_norm": 1.3293217690788863, |
| "learning_rate": 4.576604579166147e-06, |
| "loss": 2.2138, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.542015278283012, |
| "grad_norm": 1.3418992707760247, |
| "learning_rate": 4.564872972053786e-06, |
| "loss": 2.1954, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.5427428155692979, |
| "grad_norm": 1.383674149920183, |
| "learning_rate": 4.553143778200486e-06, |
| "loss": 2.1756, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.5434703528555839, |
| "grad_norm": 1.2522210322008165, |
| "learning_rate": 4.541417062657561e-06, |
| "loss": 2.1991, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.5441978901418698, |
| "grad_norm": 1.1950219808865283, |
| "learning_rate": 4.529692890462574e-06, |
| "loss": 2.1462, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.5449254274281556, |
| "grad_norm": 1.4821104806561844, |
| "learning_rate": 4.5179713266389866e-06, |
| "loss": 2.1622, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.5456529647144416, |
| "grad_norm": 1.2743523641653294, |
| "learning_rate": 4.50625243619579e-06, |
| "loss": 2.2167, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5463805020007275, |
| "grad_norm": 1.3181368071817843, |
| "learning_rate": 4.494536284127155e-06, |
| "loss": 2.2139, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.5471080392870135, |
| "grad_norm": 1.2418738111745269, |
| "learning_rate": 4.4828229354120565e-06, |
| "loss": 2.2264, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.5478355765732994, |
| "grad_norm": 1.2885155053876325, |
| "learning_rate": 4.471112455013928e-06, |
| "loss": 2.2022, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.5485631138595853, |
| "grad_norm": 1.3444450947561895, |
| "learning_rate": 4.459404907880293e-06, |
| "loss": 2.1748, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.5492906511458713, |
| "grad_norm": 2.9905972047742733, |
| "learning_rate": 4.447700358942407e-06, |
| "loss": 2.1239, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.5500181884321571, |
| "grad_norm": 1.473706001074463, |
| "learning_rate": 4.435998873114895e-06, |
| "loss": 2.1655, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.550745725718443, |
| "grad_norm": 1.3501108667605346, |
| "learning_rate": 4.424300515295401e-06, |
| "loss": 2.1731, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.551473263004729, |
| "grad_norm": 1.2964111110362575, |
| "learning_rate": 4.412605350364213e-06, |
| "loss": 2.1732, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.5522008002910149, |
| "grad_norm": 1.3254861775621958, |
| "learning_rate": 4.400913443183913e-06, |
| "loss": 2.183, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.5529283375773009, |
| "grad_norm": 1.449214814716554, |
| "learning_rate": 4.389224858599015e-06, |
| "loss": 2.1766, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5536558748635868, |
| "grad_norm": 1.3379961949853125, |
| "learning_rate": 4.377539661435608e-06, |
| "loss": 2.1253, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.5543834121498726, |
| "grad_norm": 1.302585638735648, |
| "learning_rate": 4.365857916500991e-06, |
| "loss": 2.1778, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.5551109494361586, |
| "grad_norm": 1.4822331492387226, |
| "learning_rate": 4.35417968858332e-06, |
| "loss": 2.1978, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.5558384867224445, |
| "grad_norm": 1.5506151702217899, |
| "learning_rate": 4.3425050424512405e-06, |
| "loss": 2.1304, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.5565660240087305, |
| "grad_norm": 1.3104067578648475, |
| "learning_rate": 4.330834042853537e-06, |
| "loss": 2.1714, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.5572935612950164, |
| "grad_norm": 1.3051669299320896, |
| "learning_rate": 4.319166754518768e-06, |
| "loss": 2.1609, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.5580210985813023, |
| "grad_norm": 1.229874722173343, |
| "learning_rate": 4.30750324215491e-06, |
| "loss": 2.2052, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.5587486358675882, |
| "grad_norm": 1.3217497554258093, |
| "learning_rate": 4.295843570448998e-06, |
| "loss": 2.175, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5594761731538741, |
| "grad_norm": 1.4091315451317816, |
| "learning_rate": 4.284187804066764e-06, |
| "loss": 2.197, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.56020371044016, |
| "grad_norm": 1.9162169406144054, |
| "learning_rate": 4.272536007652281e-06, |
| "loss": 2.1093, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.560931247726446, |
| "grad_norm": 1.3754027757166483, |
| "learning_rate": 4.260888245827608e-06, |
| "loss": 2.2147, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.5616587850127319, |
| "grad_norm": 1.3743512226246963, |
| "learning_rate": 4.249244583192425e-06, |
| "loss": 2.188, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.5623863222990179, |
| "grad_norm": 1.5122563849193087, |
| "learning_rate": 4.237605084323676e-06, |
| "loss": 2.1581, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.5631138595853038, |
| "grad_norm": 1.1836737555675112, |
| "learning_rate": 4.225969813775215e-06, |
| "loss": 2.1945, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5638413968715896, |
| "grad_norm": 1.4327467816072759, |
| "learning_rate": 4.214338836077444e-06, |
| "loss": 2.1444, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5645689341578756, |
| "grad_norm": 1.4050707246252088, |
| "learning_rate": 4.202712215736955e-06, |
| "loss": 2.1793, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.5652964714441615, |
| "grad_norm": 1.239554329773933, |
| "learning_rate": 4.191090017236177e-06, |
| "loss": 2.2064, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5660240087304474, |
| "grad_norm": 1.2770561521182617, |
| "learning_rate": 4.1794723050330125e-06, |
| "loss": 2.2113, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5667515460167334, |
| "grad_norm": 1.3219438488411415, |
| "learning_rate": 4.167859143560484e-06, |
| "loss": 2.2279, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5674790833030193, |
| "grad_norm": 1.4154758042801283, |
| "learning_rate": 4.1562505972263735e-06, |
| "loss": 2.1746, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5682066205893052, |
| "grad_norm": 1.3740339531693826, |
| "learning_rate": 4.144646730412868e-06, |
| "loss": 2.1997, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5689341578755911, |
| "grad_norm": 3.777402142491512, |
| "learning_rate": 4.133047607476202e-06, |
| "loss": 2.1841, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.569661695161877, |
| "grad_norm": 1.544999076264375, |
| "learning_rate": 4.121453292746297e-06, |
| "loss": 2.1451, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.570389232448163, |
| "grad_norm": 1.3279022974010046, |
| "learning_rate": 4.109863850526413e-06, |
| "loss": 2.1742, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5711167697344489, |
| "grad_norm": 1.323190106700413, |
| "learning_rate": 4.098279345092783e-06, |
| "loss": 2.1796, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5718443070207349, |
| "grad_norm": 1.3152250504974496, |
| "learning_rate": 4.086699840694262e-06, |
| "loss": 2.1191, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5725718443070207, |
| "grad_norm": 1.5083030885832742, |
| "learning_rate": 4.075125401551963e-06, |
| "loss": 2.1534, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5732993815933066, |
| "grad_norm": 1.136113143067403, |
| "learning_rate": 4.063556091858914e-06, |
| "loss": 2.1837, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5740269188795926, |
| "grad_norm": 1.4698755700883925, |
| "learning_rate": 4.051991975779691e-06, |
| "loss": 2.1546, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5747544561658785, |
| "grad_norm": 1.3802123832715365, |
| "learning_rate": 4.040433117450066e-06, |
| "loss": 2.1632, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5754819934521644, |
| "grad_norm": 1.5354119147308578, |
| "learning_rate": 4.0288795809766516e-06, |
| "loss": 2.1918, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5762095307384504, |
| "grad_norm": 1.44019389860738, |
| "learning_rate": 4.017331430436543e-06, |
| "loss": 2.188, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5769370680247363, |
| "grad_norm": 1.3140332293999832, |
| "learning_rate": 4.005788729876968e-06, |
| "loss": 2.1619, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.5776646053110222, |
| "grad_norm": 1.3595817834123527, |
| "learning_rate": 3.994251543314925e-06, |
| "loss": 2.1158, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5783921425973081, |
| "grad_norm": 1.3075801783310985, |
| "learning_rate": 3.982719934736832e-06, |
| "loss": 2.151, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.579119679883594, |
| "grad_norm": 1.45652881963162, |
| "learning_rate": 3.971193968098172e-06, |
| "loss": 2.1715, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.57984721716988, |
| "grad_norm": 1.1217645508627507, |
| "learning_rate": 3.959673707323135e-06, |
| "loss": 2.1998, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.5805747544561659, |
| "grad_norm": 1.2189466042014208, |
| "learning_rate": 3.948159216304269e-06, |
| "loss": 2.1857, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5813022917424518, |
| "grad_norm": 1.3190108995893723, |
| "learning_rate": 3.93665055890212e-06, |
| "loss": 2.1988, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5820298290287377, |
| "grad_norm": 1.2178878026121687, |
| "learning_rate": 3.92514779894488e-06, |
| "loss": 2.1617, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5827573663150236, |
| "grad_norm": 1.3032425542197852, |
| "learning_rate": 3.9136510002280344e-06, |
| "loss": 2.2213, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5834849036013096, |
| "grad_norm": 1.5544389666656435, |
| "learning_rate": 3.902160226514007e-06, |
| "loss": 2.1795, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.5842124408875955, |
| "grad_norm": 1.2429875863967428, |
| "learning_rate": 3.8906755415318045e-06, |
| "loss": 2.1962, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5849399781738814, |
| "grad_norm": 1.2878993650555384, |
| "learning_rate": 3.8791970089766665e-06, |
| "loss": 2.1809, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.5856675154601674, |
| "grad_norm": 1.5981630336586339, |
| "learning_rate": 3.86772469250971e-06, |
| "loss": 2.1831, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5863950527464532, |
| "grad_norm": 1.3398972377634621, |
| "learning_rate": 3.856258655757578e-06, |
| "loss": 2.1683, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5871225900327391, |
| "grad_norm": 1.3902922273693485, |
| "learning_rate": 3.844798962312085e-06, |
| "loss": 2.1711, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.5878501273190251, |
| "grad_norm": 1.5076390916392564, |
| "learning_rate": 3.833345675729863e-06, |
| "loss": 2.1873, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.588577664605311, |
| "grad_norm": 1.3602095220019874, |
| "learning_rate": 3.821898859532013e-06, |
| "loss": 2.1572, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.589305201891597, |
| "grad_norm": 1.3849483328488064, |
| "learning_rate": 3.8104585772037493e-06, |
| "loss": 2.1637, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5900327391778829, |
| "grad_norm": 1.3389495619269316, |
| "learning_rate": 3.7990248921940485e-06, |
| "loss": 2.1713, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5907602764641688, |
| "grad_norm": 1.3880194739199263, |
| "learning_rate": 3.787597867915303e-06, |
| "loss": 2.1837, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5914878137504547, |
| "grad_norm": 1.2861161541489807, |
| "learning_rate": 3.7761775677429567e-06, |
| "loss": 2.188, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5922153510367406, |
| "grad_norm": 1.2318992307646213, |
| "learning_rate": 3.7647640550151666e-06, |
| "loss": 2.1662, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5929428883230266, |
| "grad_norm": 1.1972675066127456, |
| "learning_rate": 3.7533573930324395e-06, |
| "loss": 2.2122, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5936704256093125, |
| "grad_norm": 1.341853588828462, |
| "learning_rate": 3.7419576450572924e-06, |
| "loss": 2.1221, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5943979628955984, |
| "grad_norm": 1.3121356522727252, |
| "learning_rate": 3.7305648743138966e-06, |
| "loss": 2.1702, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5951255001818844, |
| "grad_norm": 1.193623744711227, |
| "learning_rate": 3.7191791439877236e-06, |
| "loss": 2.1873, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5958530374681702, |
| "grad_norm": 2.1904797654328534, |
| "learning_rate": 3.7078005172252015e-06, |
| "loss": 2.2182, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5965805747544561, |
| "grad_norm": 1.2499680138753542, |
| "learning_rate": 3.6964290571333583e-06, |
| "loss": 2.1625, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5973081120407421, |
| "grad_norm": 1.452278370561498, |
| "learning_rate": 3.6850648267794776e-06, |
| "loss": 2.0981, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.598035649327028, |
| "grad_norm": 1.3248797969544868, |
| "learning_rate": 3.673707889190744e-06, |
| "loss": 2.1693, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.598763186613314, |
| "grad_norm": 1.3935529244008626, |
| "learning_rate": 3.662358307353897e-06, |
| "loss": 2.1635, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5994907238995999, |
| "grad_norm": 1.2660094629060932, |
| "learning_rate": 3.6510161442148783e-06, |
| "loss": 2.0612, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.6002182611858857, |
| "grad_norm": 1.1899096410575125, |
| "learning_rate": 3.63968146267849e-06, |
| "loss": 2.1582, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6009457984721717, |
| "grad_norm": 1.4707524646381498, |
| "learning_rate": 3.6283543256080334e-06, |
| "loss": 2.1861, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.6016733357584576, |
| "grad_norm": 2.2990139344746043, |
| "learning_rate": 3.6170347958249728e-06, |
| "loss": 2.1556, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.6024008730447435, |
| "grad_norm": 1.2985950672345654, |
| "learning_rate": 3.605722936108579e-06, |
| "loss": 2.1848, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.6031284103310295, |
| "grad_norm": 1.2578946342605386, |
| "learning_rate": 3.5944188091955843e-06, |
| "loss": 2.1714, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.6038559476173154, |
| "grad_norm": 1.2748819645637657, |
| "learning_rate": 3.5831224777798346e-06, |
| "loss": 2.1336, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6045834849036014, |
| "grad_norm": 1.416638405798943, |
| "learning_rate": 3.5718340045119416e-06, |
| "loss": 2.1863, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.6053110221898872, |
| "grad_norm": 1.2823866800966135, |
| "learning_rate": 3.5605534519989327e-06, |
| "loss": 2.1921, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.6060385594761731, |
| "grad_norm": 1.261745846438069, |
| "learning_rate": 3.5492808828039083e-06, |
| "loss": 2.1546, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.6067660967624591, |
| "grad_norm": 1.2333351495616436, |
| "learning_rate": 3.538016359445692e-06, |
| "loss": 2.0962, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.607493634048745, |
| "grad_norm": 1.241100958102387, |
| "learning_rate": 3.5267599443984848e-06, |
| "loss": 2.1731, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.608221171335031, |
| "grad_norm": 1.4719975452264942, |
| "learning_rate": 3.5155117000915153e-06, |
| "loss": 2.1652, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.6089487086213169, |
| "grad_norm": 1.2062716693873263, |
| "learning_rate": 3.5042716889086998e-06, |
| "loss": 2.1553, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.6096762459076027, |
| "grad_norm": 1.3613990068473611, |
| "learning_rate": 3.493039973188289e-06, |
| "loss": 2.1603, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.6104037831938887, |
| "grad_norm": 1.2656018128816646, |
| "learning_rate": 3.481816615222533e-06, |
| "loss": 2.1789, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.6111313204801746, |
| "grad_norm": 1.4240615172025373, |
| "learning_rate": 3.470601677257323e-06, |
| "loss": 2.1792, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6118588577664605, |
| "grad_norm": 1.4177889412892386, |
| "learning_rate": 3.459395221491853e-06, |
| "loss": 2.1296, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.6125863950527465, |
| "grad_norm": 1.2845162857537746, |
| "learning_rate": 3.4481973100782756e-06, |
| "loss": 2.1477, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.6133139323390324, |
| "grad_norm": 1.1999612596610867, |
| "learning_rate": 3.4370080051213527e-06, |
| "loss": 2.1877, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.6140414696253182, |
| "grad_norm": 1.173270406358778, |
| "learning_rate": 3.4258273686781156e-06, |
| "loss": 2.1612, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.6147690069116042, |
| "grad_norm": 1.3127766990292529, |
| "learning_rate": 3.4146554627575207e-06, |
| "loss": 2.1637, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.6154965441978901, |
| "grad_norm": 1.3033434445301304, |
| "learning_rate": 3.403492349320101e-06, |
| "loss": 2.152, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.6162240814841761, |
| "grad_norm": 1.2129577650821965, |
| "learning_rate": 3.392338090277628e-06, |
| "loss": 2.1982, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.616951618770462, |
| "grad_norm": 1.2402068492200884, |
| "learning_rate": 3.3811927474927644e-06, |
| "loss": 2.2277, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.617679156056748, |
| "grad_norm": 1.2610330081583359, |
| "learning_rate": 3.3700563827787224e-06, |
| "loss": 2.1312, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.6184066933430338, |
| "grad_norm": 1.2221232125577248, |
| "learning_rate": 3.358929057898922e-06, |
| "loss": 2.1795, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6191342306293197, |
| "grad_norm": 1.367240054820803, |
| "learning_rate": 3.3478108345666456e-06, |
| "loss": 2.1599, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.6198617679156057, |
| "grad_norm": 1.2533884095301935, |
| "learning_rate": 3.3367017744446995e-06, |
| "loss": 2.1798, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.6205893052018916, |
| "grad_norm": 1.445873029531831, |
| "learning_rate": 3.3256019391450696e-06, |
| "loss": 2.189, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.6213168424881775, |
| "grad_norm": 1.5308918859452214, |
| "learning_rate": 3.314511390228578e-06, |
| "loss": 2.1167, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.6220443797744635, |
| "grad_norm": 1.1935640999794672, |
| "learning_rate": 3.303430189204545e-06, |
| "loss": 2.1835, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.6227719170607494, |
| "grad_norm": 1.2758745534483429, |
| "learning_rate": 3.2923583975304474e-06, |
| "loss": 2.189, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.6234994543470352, |
| "grad_norm": 1.2812582557317376, |
| "learning_rate": 3.2812960766115747e-06, |
| "loss": 2.1764, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.6242269916333212, |
| "grad_norm": 1.3052844302185176, |
| "learning_rate": 3.270243287800693e-06, |
| "loss": 2.1596, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.6249545289196071, |
| "grad_norm": 1.1735756944101055, |
| "learning_rate": 3.2592000923976997e-06, |
| "loss": 2.1914, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.6256820662058931, |
| "grad_norm": 1.3820135946650651, |
| "learning_rate": 3.2481665516492876e-06, |
| "loss": 2.1671, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.626409603492179, |
| "grad_norm": 1.2951883382314702, |
| "learning_rate": 3.2371427267486044e-06, |
| "loss": 2.1767, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.6271371407784649, |
| "grad_norm": 1.6538268336475825, |
| "learning_rate": 3.2261286788349127e-06, |
| "loss": 2.1366, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.6278646780647508, |
| "grad_norm": 1.2576633846796257, |
| "learning_rate": 3.2151244689932505e-06, |
| "loss": 2.1383, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.6285922153510367, |
| "grad_norm": 1.2026846661371544, |
| "learning_rate": 3.2041301582540903e-06, |
| "loss": 2.1615, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.6293197526373226, |
| "grad_norm": 1.9677840709401988, |
| "learning_rate": 3.1931458075930046e-06, |
| "loss": 2.1896, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.6300472899236086, |
| "grad_norm": 1.3114255471930778, |
| "learning_rate": 3.182171477930332e-06, |
| "loss": 2.2036, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.6307748272098945, |
| "grad_norm": 1.5993419978853645, |
| "learning_rate": 3.171207230130826e-06, |
| "loss": 2.175, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.6315023644961805, |
| "grad_norm": 1.3379682301899738, |
| "learning_rate": 3.1602531250033286e-06, |
| "loss": 2.1345, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.6322299017824663, |
| "grad_norm": 1.1605864226425355, |
| "learning_rate": 3.149309223300428e-06, |
| "loss": 2.1998, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.6329574390687522, |
| "grad_norm": 1.3863307023900937, |
| "learning_rate": 3.1383755857181253e-06, |
| "loss": 2.1669, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6336849763550382, |
| "grad_norm": 1.253318814192694, |
| "learning_rate": 3.1274522728954928e-06, |
| "loss": 2.1333, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.6344125136413241, |
| "grad_norm": 1.2058026824056487, |
| "learning_rate": 3.1165393454143423e-06, |
| "loss": 2.1651, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.6351400509276101, |
| "grad_norm": 1.1644040700745641, |
| "learning_rate": 3.1056368637988876e-06, |
| "loss": 2.0699, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.635867588213896, |
| "grad_norm": 1.20675303117915, |
| "learning_rate": 3.0947448885154085e-06, |
| "loss": 2.1484, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.6365951255001819, |
| "grad_norm": 1.2391348218319218, |
| "learning_rate": 3.0838634799719157e-06, |
| "loss": 2.2138, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.6373226627864678, |
| "grad_norm": 1.2114878066432457, |
| "learning_rate": 3.072992698517815e-06, |
| "loss": 2.2021, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.6380502000727537, |
| "grad_norm": 1.3283135525126404, |
| "learning_rate": 3.0621326044435738e-06, |
| "loss": 2.1344, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.6387777373590396, |
| "grad_norm": 1.3236221007892095, |
| "learning_rate": 3.0512832579803873e-06, |
| "loss": 2.1644, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.6395052746453256, |
| "grad_norm": 1.1667265916431366, |
| "learning_rate": 3.0404447192998398e-06, |
| "loss": 2.182, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.6402328119316115, |
| "grad_norm": 1.204222459042279, |
| "learning_rate": 3.029617048513579e-06, |
| "loss": 2.1453, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6409603492178975, |
| "grad_norm": 1.3308367139766337, |
| "learning_rate": 3.0188003056729752e-06, |
| "loss": 2.1893, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.6416878865041833, |
| "grad_norm": 1.1960192121658078, |
| "learning_rate": 3.007994550768793e-06, |
| "loss": 2.1942, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.6424154237904692, |
| "grad_norm": 1.145892797210482, |
| "learning_rate": 2.9971998437308546e-06, |
| "loss": 2.1857, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.6431429610767552, |
| "grad_norm": 1.535073515408013, |
| "learning_rate": 2.9864162444277118e-06, |
| "loss": 2.154, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.6438704983630411, |
| "grad_norm": 1.19226809686383, |
| "learning_rate": 2.97564381266631e-06, |
| "loss": 2.1522, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.644598035649327, |
| "grad_norm": 1.283458540798743, |
| "learning_rate": 2.964882608191659e-06, |
| "loss": 2.1653, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.645325572935613, |
| "grad_norm": 1.1444227382316488, |
| "learning_rate": 2.954132690686502e-06, |
| "loss": 2.1985, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.6460531102218988, |
| "grad_norm": 1.3133662020784518, |
| "learning_rate": 2.9433941197709813e-06, |
| "loss": 2.0988, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.6467806475081848, |
| "grad_norm": 9.045196136906558, |
| "learning_rate": 2.9326669550023124e-06, |
| "loss": 2.1569, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.6475081847944707, |
| "grad_norm": 1.318633177919385, |
| "learning_rate": 2.921951255874449e-06, |
| "loss": 2.1826, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6482357220807566, |
| "grad_norm": 1.2574589759053059, |
| "learning_rate": 2.9112470818177563e-06, |
| "loss": 2.2294, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.6489632593670426, |
| "grad_norm": 1.5320788443566247, |
| "learning_rate": 2.9005544921986774e-06, |
| "loss": 2.1152, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.6496907966533285, |
| "grad_norm": 1.3393258246398236, |
| "learning_rate": 2.8898735463194128e-06, |
| "loss": 2.1454, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.6504183339396145, |
| "grad_norm": 1.2099327172531733, |
| "learning_rate": 2.8792043034175817e-06, |
| "loss": 2.1981, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.6511458712259003, |
| "grad_norm": 1.4423610907708257, |
| "learning_rate": 2.8685468226658974e-06, |
| "loss": 2.2454, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.6518734085121862, |
| "grad_norm": 1.50482757871637, |
| "learning_rate": 2.85790116317184e-06, |
| "loss": 2.2105, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.6526009457984722, |
| "grad_norm": 1.2444683591992163, |
| "learning_rate": 2.8472673839773267e-06, |
| "loss": 2.1572, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.6533284830847581, |
| "grad_norm": 1.2427782247532162, |
| "learning_rate": 2.8366455440583874e-06, |
| "loss": 2.1902, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.654056020371044, |
| "grad_norm": 1.162919929796855, |
| "learning_rate": 2.8260357023248323e-06, |
| "loss": 2.1376, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.65478355765733, |
| "grad_norm": 1.2691029843654758, |
| "learning_rate": 2.815437917619932e-06, |
| "loss": 2.2438, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6555110949436158, |
| "grad_norm": 1.3335556247541136, |
| "learning_rate": 2.804852248720085e-06, |
| "loss": 2.179, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.6562386322299018, |
| "grad_norm": 1.6825656470777264, |
| "learning_rate": 2.7942787543344957e-06, |
| "loss": 2.2047, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.6569661695161877, |
| "grad_norm": 1.2506157526312551, |
| "learning_rate": 2.783717493104846e-06, |
| "loss": 2.1322, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.6576937068024736, |
| "grad_norm": 1.2507417772023046, |
| "learning_rate": 2.7731685236049745e-06, |
| "loss": 2.2024, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.6584212440887596, |
| "grad_norm": 1.2322326595801811, |
| "learning_rate": 2.762631904340546e-06, |
| "loss": 2.1625, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.6591487813750455, |
| "grad_norm": 1.4118755276611434, |
| "learning_rate": 2.7521076937487248e-06, |
| "loss": 2.1307, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.6598763186613313, |
| "grad_norm": 1.4424042343111918, |
| "learning_rate": 2.7415959501978674e-06, |
| "loss": 2.1514, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.6606038559476173, |
| "grad_norm": 1.2750452237109262, |
| "learning_rate": 2.731096731987177e-06, |
| "loss": 2.1527, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.6613313932339032, |
| "grad_norm": 1.1683736874051467, |
| "learning_rate": 2.7206100973463958e-06, |
| "loss": 2.1582, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.6620589305201892, |
| "grad_norm": 1.2299439828061258, |
| "learning_rate": 2.71013610443547e-06, |
| "loss": 2.128, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6627864678064751, |
| "grad_norm": 1.6816303465805231, |
| "learning_rate": 2.6996748113442397e-06, |
| "loss": 2.1404, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.663514005092761, |
| "grad_norm": 1.2618180205618854, |
| "learning_rate": 2.689226276092107e-06, |
| "loss": 2.1719, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.664241542379047, |
| "grad_norm": 1.2228966712350884, |
| "learning_rate": 2.6787905566277185e-06, |
| "loss": 2.12, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.6649690796653328, |
| "grad_norm": 1.2443352246586448, |
| "learning_rate": 2.6683677108286423e-06, |
| "loss": 2.1936, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.6656966169516187, |
| "grad_norm": 1.3458440849116249, |
| "learning_rate": 2.65795779650105e-06, |
| "loss": 2.1508, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.6664241542379047, |
| "grad_norm": 1.2361743789072361, |
| "learning_rate": 2.6475608713793923e-06, |
| "loss": 2.1627, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.6671516915241906, |
| "grad_norm": 1.2303373889513625, |
| "learning_rate": 2.6371769931260806e-06, |
| "loss": 2.1704, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.6678792288104766, |
| "grad_norm": 1.2708199715941912, |
| "learning_rate": 2.6268062193311672e-06, |
| "loss": 2.1186, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.6686067660967625, |
| "grad_norm": 1.2294325949492098, |
| "learning_rate": 2.6164486075120245e-06, |
| "loss": 2.108, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.6693343033830483, |
| "grad_norm": 1.1475392443431833, |
| "learning_rate": 2.606104215113033e-06, |
| "loss": 2.1766, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6700618406693343, |
| "grad_norm": 1.1625492019817114, |
| "learning_rate": 2.5957730995052477e-06, |
| "loss": 2.1743, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.6707893779556202, |
| "grad_norm": 1.2367499609432473, |
| "learning_rate": 2.585455317986095e-06, |
| "loss": 2.1893, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.6715169152419062, |
| "grad_norm": 1.2430383905991158, |
| "learning_rate": 2.5751509277790487e-06, |
| "loss": 2.2044, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.6722444525281921, |
| "grad_norm": 1.5168046892041702, |
| "learning_rate": 2.5648599860333122e-06, |
| "loss": 2.1738, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.672971989814478, |
| "grad_norm": 1.2063096926991026, |
| "learning_rate": 2.554582549823502e-06, |
| "loss": 2.1727, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6736995271007639, |
| "grad_norm": 1.2758063180302197, |
| "learning_rate": 2.5443186761493327e-06, |
| "loss": 2.1567, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.6744270643870498, |
| "grad_norm": 1.1824740849643813, |
| "learning_rate": 2.5340684219352977e-06, |
| "loss": 2.1925, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.6751546016733357, |
| "grad_norm": 1.1643463010411315, |
| "learning_rate": 2.523831844030358e-06, |
| "loss": 2.1241, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6758821389596217, |
| "grad_norm": 1.1286757655919661, |
| "learning_rate": 2.513608999207622e-06, |
| "loss": 2.1638, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.6766096762459076, |
| "grad_norm": 1.5947207232710128, |
| "learning_rate": 2.503399944164035e-06, |
| "loss": 2.1467, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6773372135321936, |
| "grad_norm": 1.5508691613737382, |
| "learning_rate": 2.4932047355200613e-06, |
| "loss": 2.1744, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.6780647508184795, |
| "grad_norm": 1.1974290750839893, |
| "learning_rate": 2.483023429819372e-06, |
| "loss": 2.1832, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.6787922881047653, |
| "grad_norm": 1.2618835690498464, |
| "learning_rate": 2.472856083528531e-06, |
| "loss": 2.1972, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.6795198253910513, |
| "grad_norm": 1.2782330367865011, |
| "learning_rate": 2.4627027530366836e-06, |
| "loss": 2.1451, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.6802473626773372, |
| "grad_norm": 1.436706243744654, |
| "learning_rate": 2.4525634946552405e-06, |
| "loss": 2.2135, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.6809748999636231, |
| "grad_norm": 1.411133966277898, |
| "learning_rate": 2.442438364617567e-06, |
| "loss": 2.1562, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.6817024372499091, |
| "grad_norm": 1.2169700146486773, |
| "learning_rate": 2.4323274190786703e-06, |
| "loss": 2.2271, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.682429974536195, |
| "grad_norm": 1.3255427285044914, |
| "learning_rate": 2.422230714114891e-06, |
| "loss": 2.0765, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.6831575118224809, |
| "grad_norm": 1.2230803856218881, |
| "learning_rate": 2.4121483057235884e-06, |
| "loss": 2.1744, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.6838850491087668, |
| "grad_norm": 1.1731216113786644, |
| "learning_rate": 2.4020802498228333e-06, |
| "loss": 2.1297, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6846125863950527, |
| "grad_norm": 1.2488772581044372, |
| "learning_rate": 2.392026602251093e-06, |
| "loss": 2.1954, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.6853401236813387, |
| "grad_norm": 1.2800008196295536, |
| "learning_rate": 2.3819874187669266e-06, |
| "loss": 2.1727, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.6860676609676246, |
| "grad_norm": 1.2834051636026926, |
| "learning_rate": 2.371962755048675e-06, |
| "loss": 2.1916, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.6867951982539106, |
| "grad_norm": 1.1954226450289407, |
| "learning_rate": 2.36195266669415e-06, |
| "loss": 2.1212, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6875227355401964, |
| "grad_norm": 1.57141550563432, |
| "learning_rate": 2.351957209220326e-06, |
| "loss": 2.1853, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.6882502728264823, |
| "grad_norm": 1.2182134837350904, |
| "learning_rate": 2.341976438063035e-06, |
| "loss": 2.146, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.6889778101127683, |
| "grad_norm": 1.2384002744557308, |
| "learning_rate": 2.332010408576653e-06, |
| "loss": 2.1524, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.6897053473990542, |
| "grad_norm": 1.1238339859507342, |
| "learning_rate": 2.3220591760338046e-06, |
| "loss": 2.1538, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.6904328846853401, |
| "grad_norm": 1.287927410530698, |
| "learning_rate": 2.3121227956250435e-06, |
| "loss": 2.1496, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.6911604219716261, |
| "grad_norm": 1.2592066618116187, |
| "learning_rate": 2.302201322458552e-06, |
| "loss": 2.121, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.691887959257912, |
| "grad_norm": 1.2131741302198513, |
| "learning_rate": 2.292294811559837e-06, |
| "loss": 2.2045, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.6926154965441979, |
| "grad_norm": 1.4467989027206576, |
| "learning_rate": 2.282403317871422e-06, |
| "loss": 2.1203, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.6933430338304838, |
| "grad_norm": 1.4648976666718256, |
| "learning_rate": 2.2725268962525454e-06, |
| "loss": 2.1879, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.6940705711167697, |
| "grad_norm": 1.501178435831737, |
| "learning_rate": 2.262665601478852e-06, |
| "loss": 2.2198, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.6947981084030557, |
| "grad_norm": 1.2540967608340094, |
| "learning_rate": 2.252819488242093e-06, |
| "loss": 2.2109, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.6955256456893416, |
| "grad_norm": 1.3412153228190293, |
| "learning_rate": 2.24298861114982e-06, |
| "loss": 2.1885, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.6962531829756275, |
| "grad_norm": 1.1811258333483128, |
| "learning_rate": 2.2331730247250857e-06, |
| "loss": 2.1559, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.6969807202619134, |
| "grad_norm": 1.3587367408455937, |
| "learning_rate": 2.223372783406137e-06, |
| "loss": 2.1716, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.6977082575481993, |
| "grad_norm": 1.2034426964114804, |
| "learning_rate": 2.2135879415461152e-06, |
| "loss": 2.0977, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.6984357948344853, |
| "grad_norm": 1.2127358120385474, |
| "learning_rate": 2.203818553412757e-06, |
| "loss": 2.1677, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6991633321207712, |
| "grad_norm": 1.0992839512542802, |
| "learning_rate": 2.1940646731880887e-06, |
| "loss": 2.1886, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.6998908694070571, |
| "grad_norm": 1.2635019786683503, |
| "learning_rate": 2.1843263549681287e-06, |
| "loss": 2.177, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.7006184066933431, |
| "grad_norm": 1.1771389092011315, |
| "learning_rate": 2.174603652762588e-06, |
| "loss": 2.1845, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.7013459439796289, |
| "grad_norm": 1.232918716717663, |
| "learning_rate": 2.164896620494569e-06, |
| "loss": 2.1865, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.7020734812659148, |
| "grad_norm": 1.2162200340200031, |
| "learning_rate": 2.1552053120002655e-06, |
| "loss": 2.2218, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.7028010185522008, |
| "grad_norm": 1.210396704267297, |
| "learning_rate": 2.145529781028668e-06, |
| "loss": 2.2157, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.7035285558384867, |
| "grad_norm": 1.174279238426189, |
| "learning_rate": 2.1358700812412625e-06, |
| "loss": 2.1556, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.7042560931247727, |
| "grad_norm": 1.2262375990076664, |
| "learning_rate": 2.1262262662117327e-06, |
| "loss": 2.1585, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.7049836304110586, |
| "grad_norm": 1.2564233079440728, |
| "learning_rate": 2.1165983894256647e-06, |
| "loss": 2.1624, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.7057111676973445, |
| "grad_norm": 2.68545550948343, |
| "learning_rate": 2.1069865042802502e-06, |
| "loss": 2.1878, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7064387049836304, |
| "grad_norm": 1.586940340896605, |
| "learning_rate": 2.0973906640839867e-06, |
| "loss": 2.1582, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.7071662422699163, |
| "grad_norm": 1.2373753331763884, |
| "learning_rate": 2.0878109220563884e-06, |
| "loss": 2.1438, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.7078937795562023, |
| "grad_norm": 1.2072718942816432, |
| "learning_rate": 2.078247331327685e-06, |
| "loss": 2.1775, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.7086213168424882, |
| "grad_norm": 2.0527719084538005, |
| "learning_rate": 2.0686999449385286e-06, |
| "loss": 2.1157, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.7093488541287741, |
| "grad_norm": 1.4912010984015167, |
| "learning_rate": 2.0591688158397054e-06, |
| "loss": 2.1851, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.7100763914150601, |
| "grad_norm": 1.1603288071139832, |
| "learning_rate": 2.0496539968918342e-06, |
| "loss": 2.2072, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.7108039287013459, |
| "grad_norm": 1.2710873811707057, |
| "learning_rate": 2.0401555408650714e-06, |
| "loss": 2.1385, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.7115314659876318, |
| "grad_norm": 1.1827296569891017, |
| "learning_rate": 2.030673500438828e-06, |
| "loss": 2.1932, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.7122590032739178, |
| "grad_norm": 1.2118848171068402, |
| "learning_rate": 2.0212079282014725e-06, |
| "loss": 2.1849, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.7129865405602037, |
| "grad_norm": 1.1819279466854862, |
| "learning_rate": 2.0117588766500375e-06, |
| "loss": 2.1489, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7137140778464897, |
| "grad_norm": 1.2566646302176254, |
| "learning_rate": 2.002326398189931e-06, |
| "loss": 2.1535, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.7144416151327756, |
| "grad_norm": 1.1881989349766866, |
| "learning_rate": 1.9929105451346436e-06, |
| "loss": 2.1631, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.7151691524190614, |
| "grad_norm": 1.1577242634231006, |
| "learning_rate": 1.983511369705462e-06, |
| "loss": 2.173, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.7158966897053474, |
| "grad_norm": 1.4225822847082645, |
| "learning_rate": 1.9741289240311757e-06, |
| "loss": 2.1893, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.7166242269916333, |
| "grad_norm": 1.2226780041724277, |
| "learning_rate": 1.9647632601477877e-06, |
| "loss": 2.1584, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.7173517642779192, |
| "grad_norm": 1.2859995557359045, |
| "learning_rate": 1.9554144299982314e-06, |
| "loss": 2.1452, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.7180793015642052, |
| "grad_norm": 1.2099058922003745, |
| "learning_rate": 1.9460824854320755e-06, |
| "loss": 2.1644, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.7188068388504911, |
| "grad_norm": 1.1443095152375087, |
| "learning_rate": 1.9367674782052376e-06, |
| "loss": 2.1687, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.7195343761367771, |
| "grad_norm": 1.2280151800014574, |
| "learning_rate": 1.9274694599797067e-06, |
| "loss": 2.204, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.7202619134230629, |
| "grad_norm": 1.2141865430162755, |
| "learning_rate": 1.918188482323242e-06, |
| "loss": 2.1757, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7209894507093488, |
| "grad_norm": 1.382072714954584, |
| "learning_rate": 1.9089245967090952e-06, |
| "loss": 2.171, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.7217169879956348, |
| "grad_norm": 1.2705212986077308, |
| "learning_rate": 1.8996778545157263e-06, |
| "loss": 2.137, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.7224445252819207, |
| "grad_norm": 1.2381608303569365, |
| "learning_rate": 1.8904483070265133e-06, |
| "loss": 2.1668, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.7231720625682067, |
| "grad_norm": 1.2340508189273616, |
| "learning_rate": 1.8812360054294725e-06, |
| "loss": 2.1137, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.7238995998544926, |
| "grad_norm": 1.3012424535662288, |
| "learning_rate": 1.8720410008169727e-06, |
| "loss": 2.1833, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.7246271371407784, |
| "grad_norm": 1.282191032046726, |
| "learning_rate": 1.8628633441854515e-06, |
| "loss": 2.1927, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.7253546744270644, |
| "grad_norm": 1.1817189594536317, |
| "learning_rate": 1.8537030864351303e-06, |
| "loss": 2.2164, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.7260822117133503, |
| "grad_norm": 1.3280972391871648, |
| "learning_rate": 1.8445602783697375e-06, |
| "loss": 2.1622, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.7268097489996362, |
| "grad_norm": 1.6079264986426691, |
| "learning_rate": 1.8354349706962243e-06, |
| "loss": 2.1486, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.7275372862859222, |
| "grad_norm": 1.2135689219158894, |
| "learning_rate": 1.8263272140244803e-06, |
| "loss": 2.1321, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7282648235722081, |
| "grad_norm": 1.2135204905494617, |
| "learning_rate": 1.8172370588670563e-06, |
| "loss": 2.1563, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.728992360858494, |
| "grad_norm": 1.1202605701009725, |
| "learning_rate": 1.8081645556388866e-06, |
| "loss": 2.1528, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.7297198981447799, |
| "grad_norm": 1.191518625719572, |
| "learning_rate": 1.7991097546570018e-06, |
| "loss": 2.1803, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.7304474354310658, |
| "grad_norm": 1.2602921711662611, |
| "learning_rate": 1.7900727061402556e-06, |
| "loss": 2.1558, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.7311749727173518, |
| "grad_norm": 1.2059662767470918, |
| "learning_rate": 1.7810534602090445e-06, |
| "loss": 2.2084, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.7319025100036377, |
| "grad_norm": 1.3067306072967486, |
| "learning_rate": 1.77205206688503e-06, |
| "loss": 2.1738, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.7326300472899236, |
| "grad_norm": 1.1891951117979749, |
| "learning_rate": 1.7630685760908623e-06, |
| "loss": 2.193, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.7333575845762095, |
| "grad_norm": 1.192631934374658, |
| "learning_rate": 1.7541030376499002e-06, |
| "loss": 2.1612, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.7340851218624954, |
| "grad_norm": 1.3750072249969223, |
| "learning_rate": 1.745155501285939e-06, |
| "loss": 2.1168, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.7348126591487814, |
| "grad_norm": 1.2232130975728073, |
| "learning_rate": 1.736226016622931e-06, |
| "loss": 2.1883, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7355401964350673, |
| "grad_norm": 1.1914477767091434, |
| "learning_rate": 1.727314633184714e-06, |
| "loss": 2.1502, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.7362677337213532, |
| "grad_norm": 1.210870912995155, |
| "learning_rate": 1.718421400394732e-06, |
| "loss": 2.2149, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.7369952710076392, |
| "grad_norm": 1.1574223183585124, |
| "learning_rate": 1.7095463675757656e-06, |
| "loss": 2.2031, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.7377228082939251, |
| "grad_norm": 1.1965731255050562, |
| "learning_rate": 1.7006895839496557e-06, |
| "loss": 2.1607, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.7384503455802109, |
| "grad_norm": 1.3292826460968283, |
| "learning_rate": 1.6918510986370312e-06, |
| "loss": 2.1709, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.7391778828664969, |
| "grad_norm": 1.2612651230081413, |
| "learning_rate": 1.6830309606570372e-06, |
| "loss": 2.1685, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.7399054201527828, |
| "grad_norm": 1.2382345176070437, |
| "learning_rate": 1.674229218927062e-06, |
| "loss": 2.169, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.7406329574390688, |
| "grad_norm": 1.1520915560133695, |
| "learning_rate": 1.665445922262467e-06, |
| "loss": 2.2348, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.7413604947253547, |
| "grad_norm": 1.336700749645762, |
| "learning_rate": 1.6566811193763149e-06, |
| "loss": 2.1717, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.7420880320116406, |
| "grad_norm": 1.1549274584397604, |
| "learning_rate": 1.6479348588791e-06, |
| "loss": 2.1904, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7428155692979265, |
| "grad_norm": 1.1893843208664543, |
| "learning_rate": 1.6392071892784789e-06, |
| "loss": 2.144, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.7435431065842124, |
| "grad_norm": 1.2381224137576587, |
| "learning_rate": 1.6304981589790015e-06, |
| "loss": 2.1631, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.7442706438704983, |
| "grad_norm": 1.1142566545158097, |
| "learning_rate": 1.6218078162818418e-06, |
| "loss": 2.1496, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.7449981811567843, |
| "grad_norm": 1.1146629784867004, |
| "learning_rate": 1.6131362093845299e-06, |
| "loss": 2.107, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.7457257184430702, |
| "grad_norm": 1.1632211803162529, |
| "learning_rate": 1.6044833863806864e-06, |
| "loss": 2.2039, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.7464532557293562, |
| "grad_norm": 1.195542241551238, |
| "learning_rate": 1.5958493952597536e-06, |
| "loss": 2.1514, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.747180793015642, |
| "grad_norm": 1.21840951959182, |
| "learning_rate": 1.5872342839067305e-06, |
| "loss": 2.139, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.7479083303019279, |
| "grad_norm": 1.130530825309705, |
| "learning_rate": 1.5786381001019052e-06, |
| "loss": 2.2141, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.7486358675882139, |
| "grad_norm": 1.182720116319556, |
| "learning_rate": 1.5700608915205978e-06, |
| "loss": 2.178, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.7493634048744998, |
| "grad_norm": 1.295581677262678, |
| "learning_rate": 1.561502705732883e-06, |
| "loss": 2.1421, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7500909421607858, |
| "grad_norm": 1.2466355218601581, |
| "learning_rate": 1.5529635902033358e-06, |
| "loss": 2.1752, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.7508184794470717, |
| "grad_norm": 1.2314124456003142, |
| "learning_rate": 1.5444435922907669e-06, |
| "loss": 2.148, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.7515460167333576, |
| "grad_norm": 1.2138658987017406, |
| "learning_rate": 1.5359427592479553e-06, |
| "loss": 2.1426, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.7522735540196435, |
| "grad_norm": 1.1764184047644508, |
| "learning_rate": 1.5274611382213922e-06, |
| "loss": 2.1451, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.7530010913059294, |
| "grad_norm": 1.1915391500641057, |
| "learning_rate": 1.5189987762510167e-06, |
| "loss": 2.239, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.7537286285922153, |
| "grad_norm": 1.3393307034725943, |
| "learning_rate": 1.510555720269955e-06, |
| "loss": 2.1776, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.7544561658785013, |
| "grad_norm": 1.1074497181939627, |
| "learning_rate": 1.5021320171042608e-06, |
| "loss": 2.1814, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.7551837031647872, |
| "grad_norm": 1.2294477772308599, |
| "learning_rate": 1.4937277134726542e-06, |
| "loss": 2.1771, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.7559112404510732, |
| "grad_norm": 1.3646434494534407, |
| "learning_rate": 1.4853428559862637e-06, |
| "loss": 2.1932, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.756638777737359, |
| "grad_norm": 1.3978038925374128, |
| "learning_rate": 1.4769774911483686e-06, |
| "loss": 2.1953, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7573663150236449, |
| "grad_norm": 1.2943852265946576, |
| "learning_rate": 1.4686316653541377e-06, |
| "loss": 2.171, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.7580938523099309, |
| "grad_norm": 1.087441192480477, |
| "learning_rate": 1.4603054248903752e-06, |
| "loss": 2.1768, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.7588213895962168, |
| "grad_norm": 1.5143127314406655, |
| "learning_rate": 1.4519988159352665e-06, |
| "loss": 2.1381, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.7595489268825028, |
| "grad_norm": 1.2488966257426735, |
| "learning_rate": 1.4437118845581138e-06, |
| "loss": 2.1914, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.7602764641687887, |
| "grad_norm": 1.2273958325384626, |
| "learning_rate": 1.4354446767190873e-06, |
| "loss": 2.1348, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.7610040014550745, |
| "grad_norm": 1.150082229138482, |
| "learning_rate": 1.4271972382689685e-06, |
| "loss": 2.1541, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.7617315387413605, |
| "grad_norm": 1.1934279136181158, |
| "learning_rate": 1.4189696149488956e-06, |
| "loss": 2.1576, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.7624590760276464, |
| "grad_norm": 1.2353996087944772, |
| "learning_rate": 1.4107618523901101e-06, |
| "loss": 2.151, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.7631866133139323, |
| "grad_norm": 1.1608215953352758, |
| "learning_rate": 1.4025739961137043e-06, |
| "loss": 2.1742, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.7639141506002183, |
| "grad_norm": 1.2788688900480345, |
| "learning_rate": 1.394406091530367e-06, |
| "loss": 2.1227, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7646416878865042, |
| "grad_norm": 1.1579121493761417, |
| "learning_rate": 1.3862581839401346e-06, |
| "loss": 2.1749, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.7653692251727902, |
| "grad_norm": 1.9174641245246467, |
| "learning_rate": 1.3781303185321377e-06, |
| "loss": 2.2115, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.766096762459076, |
| "grad_norm": 1.3522708249259594, |
| "learning_rate": 1.370022540384347e-06, |
| "loss": 2.1995, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.7668242997453619, |
| "grad_norm": 1.603800827528043, |
| "learning_rate": 1.3619348944633331e-06, |
| "loss": 2.1682, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.7675518370316479, |
| "grad_norm": 1.1171227555477896, |
| "learning_rate": 1.3538674256240087e-06, |
| "loss": 2.1833, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.7682793743179338, |
| "grad_norm": 1.1622054710202911, |
| "learning_rate": 1.3458201786093795e-06, |
| "loss": 2.174, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.7690069116042197, |
| "grad_norm": 1.2372871882629508, |
| "learning_rate": 1.3377931980503055e-06, |
| "loss": 2.1894, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.7697344488905057, |
| "grad_norm": 1.220747336103902, |
| "learning_rate": 1.3297865284652417e-06, |
| "loss": 2.1416, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.7704619861767915, |
| "grad_norm": 1.2380603823828646, |
| "learning_rate": 1.3218002142599973e-06, |
| "loss": 2.1695, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.7711895234630775, |
| "grad_norm": 1.3966519763419472, |
| "learning_rate": 1.3138342997274883e-06, |
| "loss": 2.1459, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7719170607493634, |
| "grad_norm": 1.3223384039164157, |
| "learning_rate": 1.3058888290474937e-06, |
| "loss": 2.1687, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.7726445980356493, |
| "grad_norm": 1.3021294267966386, |
| "learning_rate": 1.2979638462864069e-06, |
| "loss": 2.1213, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.7733721353219353, |
| "grad_norm": 1.223581952451272, |
| "learning_rate": 1.2900593953969947e-06, |
| "loss": 2.1693, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.7740996726082212, |
| "grad_norm": 1.5875289558290289, |
| "learning_rate": 1.2821755202181503e-06, |
| "loss": 2.1336, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.774827209894507, |
| "grad_norm": 1.2649833712441083, |
| "learning_rate": 1.2743122644746536e-06, |
| "loss": 2.1759, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.775554747180793, |
| "grad_norm": 1.2123555937456751, |
| "learning_rate": 1.266469671776926e-06, |
| "loss": 2.1812, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.7762822844670789, |
| "grad_norm": 1.2276927993970268, |
| "learning_rate": 1.2586477856207902e-06, |
| "loss": 2.1468, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.7770098217533649, |
| "grad_norm": 1.447520161103287, |
| "learning_rate": 1.2508466493872273e-06, |
| "loss": 2.1762, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.7777373590396508, |
| "grad_norm": 1.1531459103202673, |
| "learning_rate": 1.2430663063421388e-06, |
| "loss": 2.1864, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.7784648963259367, |
| "grad_norm": 1.400241485477495, |
| "learning_rate": 1.2353067996361034e-06, |
| "loss": 2.1957, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7791924336122227, |
| "grad_norm": 1.2819279866788011, |
| "learning_rate": 1.2275681723041406e-06, |
| "loss": 2.1548, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.7799199708985085, |
| "grad_norm": 1.2699916767435784, |
| "learning_rate": 1.2198504672654694e-06, |
| "loss": 2.167, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.7806475081847944, |
| "grad_norm": 1.316288970557494, |
| "learning_rate": 1.212153727323273e-06, |
| "loss": 2.2055, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.7813750454710804, |
| "grad_norm": 1.1597273100326448, |
| "learning_rate": 1.2044779951644586e-06, |
| "loss": 2.1858, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.7821025827573663, |
| "grad_norm": 1.1638383463111301, |
| "learning_rate": 1.1968233133594243e-06, |
| "loss": 2.1741, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.7828301200436523, |
| "grad_norm": 1.2672830420894228, |
| "learning_rate": 1.1891897243618184e-06, |
| "loss": 2.1857, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.7835576573299382, |
| "grad_norm": 1.1682442183180266, |
| "learning_rate": 1.1815772705083072e-06, |
| "loss": 2.1882, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.784285194616224, |
| "grad_norm": 1.1495622151549003, |
| "learning_rate": 1.17398599401834e-06, |
| "loss": 2.2104, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.78501273190251, |
| "grad_norm": 2.4743403838723483, |
| "learning_rate": 1.1664159369939137e-06, |
| "loss": 2.1847, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.7857402691887959, |
| "grad_norm": 1.1427902163296222, |
| "learning_rate": 1.1588671414193397e-06, |
| "loss": 2.1397, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7864678064750819, |
| "grad_norm": 1.2419446136578443, |
| "learning_rate": 1.1513396491610113e-06, |
| "loss": 2.113, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.7871953437613678, |
| "grad_norm": 1.19356149405073, |
| "learning_rate": 1.1438335019671715e-06, |
| "loss": 2.1734, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.7879228810476537, |
| "grad_norm": 1.4092480507548693, |
| "learning_rate": 1.1363487414676805e-06, |
| "loss": 2.1451, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.7886504183339396, |
| "grad_norm": 1.33971569627605, |
| "learning_rate": 1.128885409173789e-06, |
| "loss": 2.1734, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.7893779556202255, |
| "grad_norm": 1.0948443491354019, |
| "learning_rate": 1.1214435464779006e-06, |
| "loss": 2.186, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.7901054929065114, |
| "grad_norm": 1.1451593409287064, |
| "learning_rate": 1.1140231946533486e-06, |
| "loss": 2.2262, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.7908330301927974, |
| "grad_norm": 1.1592036822391414, |
| "learning_rate": 1.1066243948541638e-06, |
| "loss": 2.1421, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.7915605674790833, |
| "grad_norm": 1.3931059781118835, |
| "learning_rate": 1.0992471881148497e-06, |
| "loss": 2.1791, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.7922881047653693, |
| "grad_norm": 1.1756390362792626, |
| "learning_rate": 1.091891615350147e-06, |
| "loss": 2.1748, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.7930156420516552, |
| "grad_norm": 1.5667835817360032, |
| "learning_rate": 1.0845577173548172e-06, |
| "loss": 2.1871, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.793743179337941, |
| "grad_norm": 1.1723123928342023, |
| "learning_rate": 1.07724553480341e-06, |
| "loss": 2.1973, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.794470716624227, |
| "grad_norm": 1.1741596660871914, |
| "learning_rate": 1.0699551082500387e-06, |
| "loss": 2.1675, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.7951982539105129, |
| "grad_norm": 1.5792380614214159, |
| "learning_rate": 1.0626864781281553e-06, |
| "loss": 2.1785, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.7959257911967988, |
| "grad_norm": 1.3496288299574075, |
| "learning_rate": 1.0554396847503272e-06, |
| "loss": 2.1754, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.7966533284830848, |
| "grad_norm": 1.209717075090295, |
| "learning_rate": 1.0482147683080125e-06, |
| "loss": 2.1536, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.7973808657693707, |
| "grad_norm": 1.1606881621010003, |
| "learning_rate": 1.0410117688713366e-06, |
| "loss": 2.1714, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.7981084030556566, |
| "grad_norm": 1.311535089956001, |
| "learning_rate": 1.0338307263888748e-06, |
| "loss": 2.2004, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.7988359403419425, |
| "grad_norm": 1.0940623937123624, |
| "learning_rate": 1.0266716806874227e-06, |
| "loss": 2.184, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.7995634776282284, |
| "grad_norm": 1.5075293827537422, |
| "learning_rate": 1.0195346714717813e-06, |
| "loss": 2.2194, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.8002910149145144, |
| "grad_norm": 1.219694046957049, |
| "learning_rate": 1.0124197383245344e-06, |
| "loss": 2.1548, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8010185522008003, |
| "grad_norm": 1.2551974465446878, |
| "learning_rate": 1.0053269207058298e-06, |
| "loss": 2.151, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.8017460894870863, |
| "grad_norm": 1.237030767097611, |
| "learning_rate": 9.982562579531607e-07, |
| "loss": 2.1479, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.8024736267733721, |
| "grad_norm": 1.1308995295036655, |
| "learning_rate": 9.912077892811473e-07, |
| "loss": 2.2096, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.803201164059658, |
| "grad_norm": 1.1208141811054002, |
| "learning_rate": 9.841815537813177e-07, |
| "loss": 2.1537, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.803928701345944, |
| "grad_norm": 1.4560765495831982, |
| "learning_rate": 9.77177590421895e-07, |
| "loss": 2.1327, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.8046562386322299, |
| "grad_norm": 1.1404979922710892, |
| "learning_rate": 9.70195938047576e-07, |
| "loss": 2.1422, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.8053837759185158, |
| "grad_norm": 4.06610644983023, |
| "learning_rate": 9.63236635379321e-07, |
| "loss": 2.1989, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.8061113132048018, |
| "grad_norm": 1.1852343159799084, |
| "learning_rate": 9.562997210141355e-07, |
| "loss": 2.1391, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.8068388504910877, |
| "grad_norm": 1.0865192405071906, |
| "learning_rate": 9.49385233424856e-07, |
| "loss": 2.101, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.8075663877773736, |
| "grad_norm": 1.2112784868475805, |
| "learning_rate": 9.424932109599372e-07, |
| "loss": 2.1838, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8082939250636595, |
| "grad_norm": 1.1375103883075794, |
| "learning_rate": 9.356236918432454e-07, |
| "loss": 2.1604, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.8090214623499454, |
| "grad_norm": 1.2333713888207174, |
| "learning_rate": 9.287767141738352e-07, |
| "loss": 2.1123, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.8097489996362314, |
| "grad_norm": 1.1611512866888798, |
| "learning_rate": 9.21952315925746e-07, |
| "loss": 2.1285, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.8104765369225173, |
| "grad_norm": 1.169065058306762, |
| "learning_rate": 9.151505349477901e-07, |
| "loss": 2.1505, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.8112040742088032, |
| "grad_norm": 1.1785587576365077, |
| "learning_rate": 9.08371408963341e-07, |
| "loss": 2.1385, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.8119316114950891, |
| "grad_norm": 1.1577165996672336, |
| "learning_rate": 9.016149755701259e-07, |
| "loss": 2.1415, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.812659148781375, |
| "grad_norm": 1.2593173534941327, |
| "learning_rate": 8.948812722400157e-07, |
| "loss": 2.1681, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.813386686067661, |
| "grad_norm": 1.219466614222021, |
| "learning_rate": 8.881703363188199e-07, |
| "loss": 2.1802, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.8141142233539469, |
| "grad_norm": 1.121801024097721, |
| "learning_rate": 8.814822050260758e-07, |
| "loss": 2.2099, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.8148417606402328, |
| "grad_norm": 1.167478440289016, |
| "learning_rate": 8.748169154548448e-07, |
| "loss": 2.1474, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8155692979265188, |
| "grad_norm": 1.1164752843710233, |
| "learning_rate": 8.681745045715045e-07, |
| "loss": 2.168, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.8162968352128046, |
| "grad_norm": 1.1218689018085943, |
| "learning_rate": 8.615550092155478e-07, |
| "loss": 2.2039, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.8170243724990905, |
| "grad_norm": 1.1738239638703962, |
| "learning_rate": 8.549584660993726e-07, |
| "loss": 2.194, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.8177519097853765, |
| "grad_norm": 1.1883715644243047, |
| "learning_rate": 8.483849118080828e-07, |
| "loss": 2.1823, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.8184794470716624, |
| "grad_norm": 1.2887090928750042, |
| "learning_rate": 8.418343827992842e-07, |
| "loss": 2.1005, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.8192069843579484, |
| "grad_norm": 1.2560094931923382, |
| "learning_rate": 8.353069154028814e-07, |
| "loss": 2.1619, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.8199345216442343, |
| "grad_norm": 1.1674958296283056, |
| "learning_rate": 8.28802545820877e-07, |
| "loss": 2.162, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.8206620589305202, |
| "grad_norm": 1.124133546214429, |
| "learning_rate": 8.223213101271709e-07, |
| "loss": 2.1873, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.8213895962168061, |
| "grad_norm": 1.3855733066248885, |
| "learning_rate": 8.158632442673603e-07, |
| "loss": 2.1377, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.822117133503092, |
| "grad_norm": 1.1974445286333184, |
| "learning_rate": 8.094283840585398e-07, |
| "loss": 2.1868, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.822844670789378, |
| "grad_norm": 1.3903170573362245, |
| "learning_rate": 8.03016765189103e-07, |
| "loss": 2.2213, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.8235722080756639, |
| "grad_norm": 1.1331591969369272, |
| "learning_rate": 7.966284232185451e-07, |
| "loss": 2.242, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.8242997453619498, |
| "grad_norm": 1.1468355972644708, |
| "learning_rate": 7.902633935772647e-07, |
| "loss": 2.2077, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.8250272826482358, |
| "grad_norm": 1.1686608768216766, |
| "learning_rate": 7.839217115663683e-07, |
| "loss": 2.1383, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.8257548199345216, |
| "grad_norm": 1.3068539015030758, |
| "learning_rate": 7.776034123574738e-07, |
| "loss": 2.1846, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.8264823572208075, |
| "grad_norm": 1.523560986226689, |
| "learning_rate": 7.713085309925156e-07, |
| "loss": 2.1513, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.8272098945070935, |
| "grad_norm": 1.1007712070990663, |
| "learning_rate": 7.650371023835495e-07, |
| "loss": 2.227, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.8279374317933794, |
| "grad_norm": 1.1507259497696056, |
| "learning_rate": 7.587891613125631e-07, |
| "loss": 2.1392, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.8286649690796654, |
| "grad_norm": 1.1372034569945393, |
| "learning_rate": 7.525647424312766e-07, |
| "loss": 2.1327, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.8293925063659513, |
| "grad_norm": 1.2185998796300685, |
| "learning_rate": 7.46363880260954e-07, |
| "loss": 2.1443, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8301200436522371, |
| "grad_norm": 1.431473939970321, |
| "learning_rate": 7.401866091922133e-07, |
| "loss": 2.1697, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.8308475809385231, |
| "grad_norm": 1.168251404055778, |
| "learning_rate": 7.340329634848309e-07, |
| "loss": 2.1866, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.831575118224809, |
| "grad_norm": 1.7243666262254045, |
| "learning_rate": 7.279029772675572e-07, |
| "loss": 2.2235, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.832302655511095, |
| "grad_norm": 1.1198817161954957, |
| "learning_rate": 7.217966845379243e-07, |
| "loss": 2.1741, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.8330301927973809, |
| "grad_norm": 1.1495553472618867, |
| "learning_rate": 7.157141191620548e-07, |
| "loss": 2.1228, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.8337577300836668, |
| "grad_norm": 1.2264294129806552, |
| "learning_rate": 7.096553148744806e-07, |
| "loss": 2.209, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.8344852673699527, |
| "grad_norm": 1.1189699810078966, |
| "learning_rate": 7.036203052779506e-07, |
| "loss": 2.1608, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.8352128046562386, |
| "grad_norm": 1.1167070479223586, |
| "learning_rate": 6.97609123843247e-07, |
| "loss": 2.1324, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.8359403419425245, |
| "grad_norm": 1.2407016203598813, |
| "learning_rate": 6.916218039089961e-07, |
| "loss": 2.2016, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.8366678792288105, |
| "grad_norm": 1.0891946755118704, |
| "learning_rate": 6.856583786814891e-07, |
| "loss": 2.1565, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8373954165150964, |
| "grad_norm": 1.121834586640259, |
| "learning_rate": 6.797188812344907e-07, |
| "loss": 2.1688, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.8381229538013824, |
| "grad_norm": 1.1952722361624357, |
| "learning_rate": 6.738033445090653e-07, |
| "loss": 2.1958, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.8388504910876683, |
| "grad_norm": 1.256213852552062, |
| "learning_rate": 6.67911801313384e-07, |
| "loss": 2.1381, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.8395780283739541, |
| "grad_norm": 1.8449125450101216, |
| "learning_rate": 6.620442843225483e-07, |
| "loss": 2.1822, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.8403055656602401, |
| "grad_norm": 1.222232102615464, |
| "learning_rate": 6.562008260784092e-07, |
| "loss": 2.2062, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.841033102946526, |
| "grad_norm": 1.0938211772059208, |
| "learning_rate": 6.503814589893836e-07, |
| "loss": 2.1551, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.8417606402328119, |
| "grad_norm": 1.1976326698603619, |
| "learning_rate": 6.445862153302784e-07, |
| "loss": 2.198, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.8424881775190979, |
| "grad_norm": 1.145599285934117, |
| "learning_rate": 6.388151272421078e-07, |
| "loss": 2.152, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.8432157148053838, |
| "grad_norm": 1.1584030834020036, |
| "learning_rate": 6.330682267319177e-07, |
| "loss": 2.1492, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.8439432520916696, |
| "grad_norm": 1.1891679423413797, |
| "learning_rate": 6.273455456726074e-07, |
| "loss": 2.1861, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8446707893779556, |
| "grad_norm": 1.240134539666397, |
| "learning_rate": 6.216471158027515e-07, |
| "loss": 2.2224, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.8453983266642415, |
| "grad_norm": 1.3441384078153296, |
| "learning_rate": 6.159729687264254e-07, |
| "loss": 2.1399, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.8461258639505275, |
| "grad_norm": 1.3621665509768819, |
| "learning_rate": 6.103231359130308e-07, |
| "loss": 2.1678, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.8468534012368134, |
| "grad_norm": 1.1509024526416052, |
| "learning_rate": 6.046976486971201e-07, |
| "loss": 2.2213, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.8475809385230993, |
| "grad_norm": 1.135048732397783, |
| "learning_rate": 5.990965382782177e-07, |
| "loss": 2.1534, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.8483084758093852, |
| "grad_norm": 1.150383037307535, |
| "learning_rate": 5.935198357206595e-07, |
| "loss": 2.2025, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.8490360130956711, |
| "grad_norm": 1.1090708990139237, |
| "learning_rate": 5.879675719534078e-07, |
| "loss": 2.1869, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.8497635503819571, |
| "grad_norm": 1.2017907661883667, |
| "learning_rate": 5.824397777698859e-07, |
| "loss": 2.1598, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.850491087668243, |
| "grad_norm": 1.2122822792948795, |
| "learning_rate": 5.769364838278063e-07, |
| "loss": 2.1669, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.8512186249545289, |
| "grad_norm": 1.2593026892043682, |
| "learning_rate": 5.714577206490018e-07, |
| "loss": 2.2036, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8519461622408149, |
| "grad_norm": 1.1026371974965643, |
| "learning_rate": 5.660035186192531e-07, |
| "loss": 2.1641, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.8526736995271008, |
| "grad_norm": 1.261771709071015, |
| "learning_rate": 5.60573907988124e-07, |
| "loss": 2.1476, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.8534012368133866, |
| "grad_norm": 1.207021289017894, |
| "learning_rate": 5.551689188687909e-07, |
| "loss": 2.1635, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.8541287740996726, |
| "grad_norm": 1.2578218076463634, |
| "learning_rate": 5.497885812378772e-07, |
| "loss": 2.1802, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.8548563113859585, |
| "grad_norm": 1.1413457564662952, |
| "learning_rate": 5.444329249352859e-07, |
| "loss": 2.2131, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.8555838486722445, |
| "grad_norm": 1.292402549873486, |
| "learning_rate": 5.391019796640362e-07, |
| "loss": 2.1774, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.8563113859585304, |
| "grad_norm": 1.14093076222506, |
| "learning_rate": 5.337957749900958e-07, |
| "loss": 2.235, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.8570389232448163, |
| "grad_norm": 1.236479670217711, |
| "learning_rate": 5.285143403422188e-07, |
| "loss": 2.141, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.8577664605311022, |
| "grad_norm": 1.30310549862543, |
| "learning_rate": 5.23257705011786e-07, |
| "loss": 2.1661, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.8584939978173881, |
| "grad_norm": 1.2079208652252142, |
| "learning_rate": 5.18025898152631e-07, |
| "loss": 2.1353, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.859221535103674, |
| "grad_norm": 1.4069011415665924, |
| "learning_rate": 5.128189487808927e-07, |
| "loss": 2.1496, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.85994907238996, |
| "grad_norm": 1.161746521014043, |
| "learning_rate": 5.076368857748454e-07, |
| "loss": 2.1444, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.8606766096762459, |
| "grad_norm": 1.332538033508701, |
| "learning_rate": 5.024797378747414e-07, |
| "loss": 2.164, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.8614041469625319, |
| "grad_norm": 1.0993636240534148, |
| "learning_rate": 4.973475336826506e-07, |
| "loss": 2.1636, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.8621316842488177, |
| "grad_norm": 1.1356208007011255, |
| "learning_rate": 4.922403016623034e-07, |
| "loss": 2.1201, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.8628592215351036, |
| "grad_norm": 1.1305387251029828, |
| "learning_rate": 4.871580701389316e-07, |
| "loss": 2.2007, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.8635867588213896, |
| "grad_norm": 1.0574829907568835, |
| "learning_rate": 4.821008672991118e-07, |
| "loss": 2.1973, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.8643142961076755, |
| "grad_norm": 1.190650921826717, |
| "learning_rate": 4.770687211906089e-07, |
| "loss": 2.1696, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.8650418333939615, |
| "grad_norm": 1.1306022951349342, |
| "learning_rate": 4.720616597222205e-07, |
| "loss": 2.1633, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.8657693706802474, |
| "grad_norm": 1.196362267443237, |
| "learning_rate": 4.6707971066362324e-07, |
| "loss": 2.1598, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8664969079665333, |
| "grad_norm": 1.1567776943227535, |
| "learning_rate": 4.6212290164521554e-07, |
| "loss": 2.1812, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.8672244452528192, |
| "grad_norm": 1.1723671166995326, |
| "learning_rate": 4.5719126015796757e-07, |
| "loss": 2.1705, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.8679519825391051, |
| "grad_norm": 1.0810496584272096, |
| "learning_rate": 4.522848135532698e-07, |
| "loss": 2.1261, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.868679519825391, |
| "grad_norm": 1.243126377593316, |
| "learning_rate": 4.474035890427769e-07, |
| "loss": 2.1473, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.869407057111677, |
| "grad_norm": 1.0895467499897105, |
| "learning_rate": 4.4254761369825984e-07, |
| "loss": 2.1511, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.8701345943979629, |
| "grad_norm": 1.1230163941665576, |
| "learning_rate": 4.377169144514554e-07, |
| "loss": 2.1951, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.8708621316842489, |
| "grad_norm": 1.1565737076422473, |
| "learning_rate": 4.329115180939164e-07, |
| "loss": 2.1575, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.8715896689705347, |
| "grad_norm": 1.0696169727753786, |
| "learning_rate": 4.281314512768625e-07, |
| "loss": 2.181, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.8723172062568206, |
| "grad_norm": 1.3615594210380526, |
| "learning_rate": 4.2337674051103504e-07, |
| "loss": 2.1395, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.8730447435431066, |
| "grad_norm": 1.2378958475002244, |
| "learning_rate": 4.186474121665468e-07, |
| "loss": 2.1419, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8737722808293925, |
| "grad_norm": 1.1629911120291947, |
| "learning_rate": 4.139434924727359e-07, |
| "loss": 2.2328, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.8744998181156785, |
| "grad_norm": 1.2389585671969723, |
| "learning_rate": 4.092650075180232e-07, |
| "loss": 2.1682, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.8752273554019644, |
| "grad_norm": 1.3514109314913836, |
| "learning_rate": 4.046119832497658e-07, |
| "loss": 2.1164, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.8759548926882502, |
| "grad_norm": 1.1144370754598174, |
| "learning_rate": 3.9998444547411255e-07, |
| "loss": 2.2024, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.8766824299745362, |
| "grad_norm": 1.1147888063902611, |
| "learning_rate": 3.9538241985586144e-07, |
| "loss": 2.183, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.8774099672608221, |
| "grad_norm": 1.1141131177134063, |
| "learning_rate": 3.908059319183194e-07, |
| "loss": 2.1748, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.878137504547108, |
| "grad_norm": 1.1547417917481566, |
| "learning_rate": 3.8625500704315645e-07, |
| "loss": 2.2025, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.878865041833394, |
| "grad_norm": 1.1701183558695587, |
| "learning_rate": 3.8172967047026834e-07, |
| "loss": 2.131, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.8795925791196799, |
| "grad_norm": 1.0882106659044095, |
| "learning_rate": 3.7722994729763427e-07, |
| "loss": 2.192, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.8803201164059659, |
| "grad_norm": 1.0736286473742218, |
| "learning_rate": 3.7275586248118114e-07, |
| "loss": 2.1932, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8810476536922517, |
| "grad_norm": 1.0875477501520778, |
| "learning_rate": 3.683074408346404e-07, |
| "loss": 2.1797, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.8817751909785376, |
| "grad_norm": 1.0912855242341808, |
| "learning_rate": 3.6388470702941436e-07, |
| "loss": 2.1876, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.8825027282648236, |
| "grad_norm": 1.1768993340036018, |
| "learning_rate": 3.594876855944385e-07, |
| "loss": 2.1852, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.8832302655511095, |
| "grad_norm": 1.151821081597769, |
| "learning_rate": 3.5511640091604293e-07, |
| "loss": 2.193, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.8839578028373954, |
| "grad_norm": 1.0904648278786118, |
| "learning_rate": 3.50770877237821e-07, |
| "loss": 2.1804, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.8846853401236814, |
| "grad_norm": 1.3744133620598458, |
| "learning_rate": 3.4645113866049187e-07, |
| "loss": 2.1703, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.8854128774099672, |
| "grad_norm": 1.2488307712877746, |
| "learning_rate": 3.42157209141768e-07, |
| "loss": 2.1049, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.8861404146962532, |
| "grad_norm": 1.3738389697166, |
| "learning_rate": 3.3788911249622194e-07, |
| "loss": 2.1679, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.8868679519825391, |
| "grad_norm": 1.0926436743467096, |
| "learning_rate": 3.336468723951558e-07, |
| "loss": 2.1589, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.887595489268825, |
| "grad_norm": 1.233863721599574, |
| "learning_rate": 3.294305123664665e-07, |
| "loss": 2.1621, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.888323026555111, |
| "grad_norm": 1.1963007037275617, |
| "learning_rate": 3.2524005579452014e-07, |
| "loss": 2.1802, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.8890505638413969, |
| "grad_norm": 1.1338121772814955, |
| "learning_rate": 3.2107552592001657e-07, |
| "loss": 2.1652, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.8897781011276827, |
| "grad_norm": 1.0673131775790383, |
| "learning_rate": 3.169369458398652e-07, |
| "loss": 2.1752, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.8905056384139687, |
| "grad_norm": 1.1289416481728582, |
| "learning_rate": 3.128243385070562e-07, |
| "loss": 2.1889, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.8912331757002546, |
| "grad_norm": 1.2096782575370533, |
| "learning_rate": 3.087377267305297e-07, |
| "loss": 2.1454, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.8919607129865406, |
| "grad_norm": 1.1219019564743515, |
| "learning_rate": 3.0467713317505363e-07, |
| "loss": 2.1647, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.8926882502728265, |
| "grad_norm": 1.1840262988207078, |
| "learning_rate": 3.006425803610963e-07, |
| "loss": 2.1608, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.8934157875591124, |
| "grad_norm": 1.1775780165121978, |
| "learning_rate": 2.9663409066470025e-07, |
| "loss": 2.1721, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.8941433248453984, |
| "grad_norm": 1.1423731089255424, |
| "learning_rate": 2.9265168631736005e-07, |
| "loss": 2.1577, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.8948708621316842, |
| "grad_norm": 1.832353369781614, |
| "learning_rate": 2.88695389405898e-07, |
| "loss": 2.1378, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8955983994179701, |
| "grad_norm": 1.1648125598165715, |
| "learning_rate": 2.8476522187234177e-07, |
| "loss": 2.1383, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.8963259367042561, |
| "grad_norm": 1.1276493156309477, |
| "learning_rate": 2.808612055138038e-07, |
| "loss": 2.1563, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.897053473990542, |
| "grad_norm": 1.2036308042804933, |
| "learning_rate": 2.76983361982357e-07, |
| "loss": 2.1653, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.897781011276828, |
| "grad_norm": 1.1166716350605161, |
| "learning_rate": 2.731317127849209e-07, |
| "loss": 2.1954, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.8985085485631139, |
| "grad_norm": 1.0644548721799072, |
| "learning_rate": 2.693062792831358e-07, |
| "loss": 2.1614, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.8992360858493997, |
| "grad_norm": 1.0984178351424647, |
| "learning_rate": 2.655070826932471e-07, |
| "loss": 2.1572, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.8999636231356857, |
| "grad_norm": 1.101303527800766, |
| "learning_rate": 2.617341440859883e-07, |
| "loss": 2.138, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.9006911604219716, |
| "grad_norm": 1.1208887662712228, |
| "learning_rate": 2.5798748438646326e-07, |
| "loss": 2.1561, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.9014186977082576, |
| "grad_norm": 1.100598661105781, |
| "learning_rate": 2.5426712437403134e-07, |
| "loss": 2.1581, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.9021462349945435, |
| "grad_norm": 1.1703700133267052, |
| "learning_rate": 2.5057308468218913e-07, |
| "loss": 2.1473, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9028737722808294, |
| "grad_norm": 1.1567399217761656, |
| "learning_rate": 2.4690538579845933e-07, |
| "loss": 2.1112, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.9036013095671153, |
| "grad_norm": 1.115099441313513, |
| "learning_rate": 2.432640480642756e-07, |
| "loss": 2.1854, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.9043288468534012, |
| "grad_norm": 1.3913584825821503, |
| "learning_rate": 2.396490916748706e-07, |
| "loss": 2.1817, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.9050563841396871, |
| "grad_norm": 1.1314527530267833, |
| "learning_rate": 2.360605366791624e-07, |
| "loss": 2.1848, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.9057839214259731, |
| "grad_norm": 1.1318209517298516, |
| "learning_rate": 2.32498402979644e-07, |
| "loss": 2.1345, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.906511458712259, |
| "grad_norm": 1.0985737775074131, |
| "learning_rate": 2.2896271033227392e-07, |
| "loss": 2.158, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.907238995998545, |
| "grad_norm": 1.2655865839503828, |
| "learning_rate": 2.2545347834636632e-07, |
| "loss": 2.1542, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.9079665332848309, |
| "grad_norm": 1.4823682609114033, |
| "learning_rate": 2.219707264844806e-07, |
| "loss": 2.1673, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.9086940705711167, |
| "grad_norm": 1.2066786328395243, |
| "learning_rate": 2.1851447406231573e-07, |
| "loss": 2.13, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.9094216078574027, |
| "grad_norm": 1.0771985903083936, |
| "learning_rate": 2.1508474024860171e-07, |
| "loss": 2.1457, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9101491451436886, |
| "grad_norm": 1.1095865446255258, |
| "learning_rate": 2.1168154406499275e-07, |
| "loss": 2.2382, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.9108766824299745, |
| "grad_norm": 1.3103891317698702, |
| "learning_rate": 2.0830490438596418e-07, |
| "loss": 2.1959, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.9116042197162605, |
| "grad_norm": 1.2306939222199123, |
| "learning_rate": 2.0495483993870578e-07, |
| "loss": 2.1139, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.9123317570025464, |
| "grad_norm": 2.967994937097106, |
| "learning_rate": 2.0163136930301696e-07, |
| "loss": 2.0948, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.9130592942888323, |
| "grad_norm": 1.080126789220765, |
| "learning_rate": 1.9833451091120727e-07, |
| "loss": 2.1823, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.9137868315751182, |
| "grad_norm": 1.0892792590150484, |
| "learning_rate": 1.9506428304799095e-07, |
| "loss": 2.1579, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.9145143688614041, |
| "grad_norm": 1.1192972960003569, |
| "learning_rate": 1.9182070385038555e-07, |
| "loss": 2.2095, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.9152419061476901, |
| "grad_norm": 1.3083562274897766, |
| "learning_rate": 1.886037913076144e-07, |
| "loss": 2.1905, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.915969443433976, |
| "grad_norm": 1.105616909797537, |
| "learning_rate": 1.8541356326100436e-07, |
| "loss": 2.1699, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.916696980720262, |
| "grad_norm": 1.5218961639183424, |
| "learning_rate": 1.8225003740388546e-07, |
| "loss": 2.1513, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9174245180065478, |
| "grad_norm": 1.0592820677755947, |
| "learning_rate": 1.791132312814975e-07, |
| "loss": 2.1357, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.9181520552928337, |
| "grad_norm": 1.240605202233977, |
| "learning_rate": 1.760031622908881e-07, |
| "loss": 2.2205, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.9188795925791197, |
| "grad_norm": 1.8555531064869182, |
| "learning_rate": 1.729198476808186e-07, |
| "loss": 2.2314, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.9196071298654056, |
| "grad_norm": 1.228934084019767, |
| "learning_rate": 1.6986330455166733e-07, |
| "loss": 2.1364, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.9203346671516915, |
| "grad_norm": 1.1859552322747644, |
| "learning_rate": 1.6683354985533583e-07, |
| "loss": 2.1482, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.9210622044379775, |
| "grad_norm": 1.6560873234086992, |
| "learning_rate": 1.6383060039515343e-07, |
| "loss": 2.1396, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.9217897417242634, |
| "grad_norm": 1.5232709345600017, |
| "learning_rate": 1.6085447282578548e-07, |
| "loss": 2.1474, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.9225172790105493, |
| "grad_norm": 1.0928289545918428, |
| "learning_rate": 1.579051836531409e-07, |
| "loss": 2.139, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.9232448162968352, |
| "grad_norm": 1.1214465184167164, |
| "learning_rate": 1.5498274923427925e-07, |
| "loss": 2.2277, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.9239723535831211, |
| "grad_norm": 1.1315512124069351, |
| "learning_rate": 1.5208718577732096e-07, |
| "loss": 2.1498, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9246998908694071, |
| "grad_norm": 1.107067470667357, |
| "learning_rate": 1.4921850934135785e-07, |
| "loss": 2.178, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.925427428155693, |
| "grad_norm": 1.0921667144280345, |
| "learning_rate": 1.463767358363627e-07, |
| "loss": 2.1863, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.926154965441979, |
| "grad_norm": 1.1131197425175732, |
| "learning_rate": 1.4356188102310266e-07, |
| "loss": 2.1662, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.9268825027282648, |
| "grad_norm": 1.1701178383938489, |
| "learning_rate": 1.4077396051305093e-07, |
| "loss": 2.1882, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.9276100400145507, |
| "grad_norm": 1.1838245677231893, |
| "learning_rate": 1.3801298976830025e-07, |
| "loss": 2.1209, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.9283375773008367, |
| "grad_norm": 1.1933829830491318, |
| "learning_rate": 1.3527898410147677e-07, |
| "loss": 2.1649, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.9290651145871226, |
| "grad_norm": 1.145526098399752, |
| "learning_rate": 1.325719586756563e-07, |
| "loss": 2.1817, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.9297926518734085, |
| "grad_norm": 1.0749155606673366, |
| "learning_rate": 1.2989192850427933e-07, |
| "loss": 2.1557, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.9305201891596945, |
| "grad_norm": 1.3783023050622016, |
| "learning_rate": 1.2723890845106723e-07, |
| "loss": 2.1287, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.9312477264459803, |
| "grad_norm": 1.6006748713293288, |
| "learning_rate": 1.2461291322994118e-07, |
| "loss": 2.085, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9319752637322662, |
| "grad_norm": 1.059269234572768, |
| "learning_rate": 1.2201395740493948e-07, |
| "loss": 2.1708, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.9327028010185522, |
| "grad_norm": 1.105289296728809, |
| "learning_rate": 1.1944205539013708e-07, |
| "loss": 2.1864, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.9334303383048381, |
| "grad_norm": 1.193801817088734, |
| "learning_rate": 1.1689722144956672e-07, |
| "loss": 2.1534, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.9341578755911241, |
| "grad_norm": 1.2624183501841544, |
| "learning_rate": 1.1437946969713731e-07, |
| "loss": 2.2187, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.93488541287741, |
| "grad_norm": 1.461535614887746, |
| "learning_rate": 1.1188881409655849e-07, |
| "loss": 2.1274, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.9356129501636959, |
| "grad_norm": 1.617278091319289, |
| "learning_rate": 1.0942526846126122e-07, |
| "loss": 2.1088, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.9363404874499818, |
| "grad_norm": 1.0932395005042113, |
| "learning_rate": 1.0698884645432117e-07, |
| "loss": 2.1984, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.9370680247362677, |
| "grad_norm": 1.1107685198802046, |
| "learning_rate": 1.0457956158838545e-07, |
| "loss": 2.1502, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.9377955620225537, |
| "grad_norm": 1.1432608826273079, |
| "learning_rate": 1.0219742722559433e-07, |
| "loss": 2.2091, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.9385230993088396, |
| "grad_norm": 1.889492536398189, |
| "learning_rate": 9.984245657750857e-08, |
| "loss": 2.1273, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9392506365951255, |
| "grad_norm": 1.2709195270172968, |
| "learning_rate": 9.751466270503718e-08, |
| "loss": 2.1281, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.9399781738814115, |
| "grad_norm": 1.0799994138588318, |
| "learning_rate": 9.521405851836252e-08, |
| "loss": 2.2231, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.9407057111676973, |
| "grad_norm": 1.525855749830862, |
| "learning_rate": 9.294065677687202e-08, |
| "loss": 2.1466, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.9414332484539832, |
| "grad_norm": 1.1761906693535622, |
| "learning_rate": 9.069447008908383e-08, |
| "loss": 2.1912, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.9421607857402692, |
| "grad_norm": 1.1312463149407157, |
| "learning_rate": 8.847551091257956e-08, |
| "loss": 2.1097, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.9428883230265551, |
| "grad_norm": 1.6796709095897908, |
| "learning_rate": 8.62837915539344e-08, |
| "loss": 2.1561, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.9436158603128411, |
| "grad_norm": 1.0297370910963033, |
| "learning_rate": 8.411932416864832e-08, |
| "loss": 2.1631, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.944343397599127, |
| "grad_norm": 1.4116528470571312, |
| "learning_rate": 8.198212076107881e-08, |
| "loss": 2.1428, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.9450709348854128, |
| "grad_norm": 1.098962010036319, |
| "learning_rate": 7.987219318437489e-08, |
| "loss": 2.2117, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.9457984721716988, |
| "grad_norm": 1.2540943557186013, |
| "learning_rate": 7.778955314041103e-08, |
| "loss": 2.1657, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9465260094579847, |
| "grad_norm": 1.2400620556554858, |
| "learning_rate": 7.573421217972222e-08, |
| "loss": 2.1536, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.9472535467442706, |
| "grad_norm": 1.151462311267039, |
| "learning_rate": 7.370618170144062e-08, |
| "loss": 2.1253, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.9479810840305566, |
| "grad_norm": 1.1976131413618134, |
| "learning_rate": 7.170547295323016e-08, |
| "loss": 2.087, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.9487086213168425, |
| "grad_norm": 1.4258177957311036, |
| "learning_rate": 6.973209703122652e-08, |
| "loss": 2.1011, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.9494361586031284, |
| "grad_norm": 1.3415584018629254, |
| "learning_rate": 6.778606487997496e-08, |
| "loss": 2.1651, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.9501636958894143, |
| "grad_norm": 1.099049032462197, |
| "learning_rate": 6.58673872923693e-08, |
| "loss": 2.1783, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.9508912331757002, |
| "grad_norm": 1.0740685526691962, |
| "learning_rate": 6.397607490959134e-08, |
| "loss": 2.1613, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.9516187704619862, |
| "grad_norm": 1.2432577438660946, |
| "learning_rate": 6.211213822105378e-08, |
| "loss": 2.1624, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.9523463077482721, |
| "grad_norm": 1.1213774439829132, |
| "learning_rate": 6.027558756434015e-08, |
| "loss": 2.1513, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.953073845034558, |
| "grad_norm": 1.1012965217884594, |
| "learning_rate": 5.846643312514888e-08, |
| "loss": 2.1826, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.953801382320844, |
| "grad_norm": 1.109684951398295, |
| "learning_rate": 5.668468493723489e-08, |
| "loss": 2.1705, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.9545289196071298, |
| "grad_norm": 1.0970545980570312, |
| "learning_rate": 5.4930352882357486e-08, |
| "loss": 2.1471, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.9552564568934158, |
| "grad_norm": 1.1020402645803815, |
| "learning_rate": 5.3203446690220374e-08, |
| "loss": 2.1642, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.9559839941797017, |
| "grad_norm": 1.2667183522454073, |
| "learning_rate": 5.1503975938422824e-08, |
| "loss": 2.1417, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.9567115314659876, |
| "grad_norm": 1.2915533164915878, |
| "learning_rate": 4.983195005240415e-08, |
| "loss": 2.1976, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.9574390687522736, |
| "grad_norm": 1.1076129486018869, |
| "learning_rate": 4.8187378305390994e-08, |
| "loss": 2.1808, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.9581666060385595, |
| "grad_norm": 1.1387016009937267, |
| "learning_rate": 4.657026981834623e-08, |
| "loss": 2.1806, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.9588941433248453, |
| "grad_norm": 1.1899678063694281, |
| "learning_rate": 4.498063355991955e-08, |
| "loss": 2.1713, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.9596216806111313, |
| "grad_norm": 1.24744884870978, |
| "learning_rate": 4.341847834639645e-08, |
| "loss": 2.1992, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.9603492178974172, |
| "grad_norm": 1.146131261626508, |
| "learning_rate": 4.188381284164933e-08, |
| "loss": 2.1161, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9610767551837032, |
| "grad_norm": 1.1277234081016827, |
| "learning_rate": 4.0376645557090864e-08, |
| "loss": 2.2013, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.9618042924699891, |
| "grad_norm": 1.1559215314132825, |
| "learning_rate": 3.889698485162463e-08, |
| "loss": 2.16, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.962531829756275, |
| "grad_norm": 1.1381481425895759, |
| "learning_rate": 3.744483893160067e-08, |
| "loss": 2.2413, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.9632593670425609, |
| "grad_norm": 1.415309549759452, |
| "learning_rate": 3.602021585076942e-08, |
| "loss": 2.1665, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.9639869043288468, |
| "grad_norm": 1.4440616690572, |
| "learning_rate": 3.462312351023567e-08, |
| "loss": 2.1889, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.9647144416151328, |
| "grad_norm": 1.9538836520889915, |
| "learning_rate": 3.325356965841686e-08, |
| "loss": 2.1754, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.9654419789014187, |
| "grad_norm": 1.1248225603912345, |
| "learning_rate": 3.191156189099931e-08, |
| "loss": 2.1724, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.9661695161877046, |
| "grad_norm": 1.2614599283450048, |
| "learning_rate": 3.0597107650894855e-08, |
| "loss": 2.1464, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.9668970534739906, |
| "grad_norm": 1.2011934411088816, |
| "learning_rate": 2.9310214228202016e-08, |
| "loss": 2.1381, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.9676245907602765, |
| "grad_norm": 1.2998812947548053, |
| "learning_rate": 2.8050888760163265e-08, |
| "loss": 2.1581, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9683521280465623, |
| "grad_norm": 1.1140672894015613, |
| "learning_rate": 2.6819138231126695e-08, |
| "loss": 2.1749, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.9690796653328483, |
| "grad_norm": 1.1216126535021602, |
| "learning_rate": 2.5614969472506634e-08, |
| "loss": 2.1792, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.9698072026191342, |
| "grad_norm": 1.25322513583671, |
| "learning_rate": 2.4438389162746434e-08, |
| "loss": 2.1696, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.9705347399054202, |
| "grad_norm": 1.1131994577649031, |
| "learning_rate": 2.3289403827281287e-08, |
| "loss": 2.1516, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.9712622771917061, |
| "grad_norm": 1.149788902199477, |
| "learning_rate": 2.2168019838501032e-08, |
| "loss": 2.1206, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.971989814477992, |
| "grad_norm": 1.2320932338097785, |
| "learning_rate": 2.1074243415716288e-08, |
| "loss": 2.1656, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.9727173517642779, |
| "grad_norm": 1.1657936554154817, |
| "learning_rate": 2.0008080625124048e-08, |
| "loss": 2.1708, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.9734448890505638, |
| "grad_norm": 1.1894175601351693, |
| "learning_rate": 1.896953737977103e-08, |
| "loss": 2.1455, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.9741724263368498, |
| "grad_norm": 1.2162504154036424, |
| "learning_rate": 1.7958619439524817e-08, |
| "loss": 2.153, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.9748999636231357, |
| "grad_norm": 1.0695427579088523, |
| "learning_rate": 1.6975332411040547e-08, |
| "loss": 2.1903, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9756275009094216, |
| "grad_norm": 1.128006404235128, |
| "learning_rate": 1.601968174772761e-08, |
| "loss": 2.1733, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.9763550381957076, |
| "grad_norm": 1.078617608471699, |
| "learning_rate": 1.5091672749723564e-08, |
| "loss": 2.1835, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.9770825754819934, |
| "grad_norm": 1.870175223923746, |
| "learning_rate": 1.4191310563860806e-08, |
| "loss": 2.0813, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.9778101127682793, |
| "grad_norm": 1.6310544307639132, |
| "learning_rate": 1.331860018363995e-08, |
| "loss": 2.1312, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.9785376500545653, |
| "grad_norm": 1.1349574618732923, |
| "learning_rate": 1.2473546449203178e-08, |
| "loss": 2.2078, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.9792651873408512, |
| "grad_norm": 1.2159937263760292, |
| "learning_rate": 1.1656154047303691e-08, |
| "loss": 2.1796, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.9799927246271372, |
| "grad_norm": 1.14281219680189, |
| "learning_rate": 1.0866427511285194e-08, |
| "loss": 2.1934, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.9807202619134231, |
| "grad_norm": 1.1929972027287525, |
| "learning_rate": 1.0104371221050236e-08, |
| "loss": 2.0814, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.981447799199709, |
| "grad_norm": 1.0720512824500246, |
| "learning_rate": 9.369989403041347e-09, |
| "loss": 2.1718, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.9821753364859949, |
| "grad_norm": 1.1678936203643433, |
| "learning_rate": 8.663286130216608e-09, |
| "loss": 2.1682, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9829028737722808, |
| "grad_norm": 1.245453145807709, |
| "learning_rate": 7.984265322023011e-09, |
| "loss": 2.158, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.9836304110585667, |
| "grad_norm": 1.1492193587944532, |
| "learning_rate": 7.332930744380906e-09, |
| "loss": 2.1758, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.9843579483448527, |
| "grad_norm": 1.2612149321084427, |
| "learning_rate": 6.709286009657368e-09, |
| "loss": 2.1567, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.9850854856311386, |
| "grad_norm": 1.3115967794340748, |
| "learning_rate": 6.1133345766511975e-09, |
| "loss": 2.1383, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.9858130229174246, |
| "grad_norm": 1.2352405406930702, |
| "learning_rate": 5.5450797505690605e-09, |
| "loss": 2.1914, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.9865405602037104, |
| "grad_norm": 1.2123024417780415, |
| "learning_rate": 5.004524683011048e-09, |
| "loss": 2.2086, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.9872680974899963, |
| "grad_norm": 1.0605306568096313, |
| "learning_rate": 4.491672371950695e-09, |
| "loss": 2.1677, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.9879956347762823, |
| "grad_norm": 1.159183588970115, |
| "learning_rate": 4.0065256617199954e-09, |
| "loss": 2.1543, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.9887231720625682, |
| "grad_norm": 1.1587983090969554, |
| "learning_rate": 3.5490872429910784e-09, |
| "loss": 2.1265, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.9894507093488542, |
| "grad_norm": 1.260819419569731, |
| "learning_rate": 3.119359652765108e-09, |
| "loss": 2.1756, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9901782466351401, |
| "grad_norm": 1.1705707883099417, |
| "learning_rate": 2.7173452743550767e-09, |
| "loss": 2.1663, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.9909057839214259, |
| "grad_norm": 1.3422853083589237, |
| "learning_rate": 2.343046337374144e-09, |
| "loss": 2.1805, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.9916333212077119, |
| "grad_norm": 1.2168506130447845, |
| "learning_rate": 1.9964649177223184e-09, |
| "loss": 2.1607, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.9923608584939978, |
| "grad_norm": 1.1771922885619566, |
| "learning_rate": 1.6776029375759062e-09, |
| "loss": 2.1397, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.9930883957802837, |
| "grad_norm": 1.1042541645524246, |
| "learning_rate": 1.386462165375857e-09, |
| "loss": 2.1772, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.9938159330665697, |
| "grad_norm": 1.2703508857723977, |
| "learning_rate": 1.1230442158188804e-09, |
| "loss": 2.1632, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.9945434703528556, |
| "grad_norm": 1.108751259348474, |
| "learning_rate": 8.873505498474544e-10, |
| "loss": 2.1881, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.9952710076391416, |
| "grad_norm": 1.1228251721868356, |
| "learning_rate": 6.793824746437194e-10, |
| "loss": 2.175, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.9959985449254274, |
| "grad_norm": 1.1248446425450205, |
| "learning_rate": 4.991411436189308e-10, |
| "loss": 2.1946, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.9967260822117133, |
| "grad_norm": 1.0751502038617766, |
| "learning_rate": 3.466275564101285e-10, |
| "loss": 2.1715, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9974536194979993, |
| "grad_norm": 1.1002050846433344, |
| "learning_rate": 2.2184255887403028e-10, |
| "loss": 2.1891, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.9981811567842852, |
| "grad_norm": 1.350470042755439, |
| "learning_rate": 1.2478684308037115e-10, |
| "loss": 2.1012, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.9989086940705711, |
| "grad_norm": 1.0908546681209645, |
| "learning_rate": 5.5460947310237435e-11, |
| "loss": 2.1487, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.9996362313568571, |
| "grad_norm": 1.1465345153912363, |
| "learning_rate": 1.3865256052181252e-11, |
| "loss": 2.202, |
| "step": 1374 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1374, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 687, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4520916805419008e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|