{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998704383502484, "eval_steps": 500, "global_step": 2894, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006909954653422587, "grad_norm": 2.7603113651275635, "learning_rate": 8.673533304426543e-06, "loss": 3.6901, "step": 20 }, { "epoch": 0.013819909306845174, "grad_norm": 2.2209839820861816, "learning_rate": 1.0680399942186417e-05, "loss": 3.4962, "step": 40 }, { "epoch": 0.020729863960267762, "grad_norm": 2.200331687927246, "learning_rate": 1.1854341669224292e-05, "loss": 3.4824, "step": 60 }, { "epoch": 0.02763981861369035, "grad_norm": 2.167349100112915, "learning_rate": 1.2687266579946291e-05, "loss": 3.4759, "step": 80 }, { "epoch": 0.034549773267112935, "grad_norm": 2.0614750385284424, "learning_rate": 1.3333333333333337e-05, "loss": 3.4502, "step": 100 }, { "epoch": 0.041459727920535525, "grad_norm": 2.170149087905884, "learning_rate": 1.3861208306984167e-05, "loss": 3.4189, "step": 120 }, { "epoch": 0.04836968257395811, "grad_norm": 2.2909820079803467, "learning_rate": 1.4307520237854922e-05, "loss": 3.4281, "step": 140 }, { "epoch": 0.0552796372273807, "grad_norm": 2.159343719482422, "learning_rate": 1.4694133217706166e-05, "loss": 3.414, "step": 160 }, { "epoch": 0.06218959188080328, "grad_norm": 2.2137129306793213, "learning_rate": 1.5035150034022042e-05, "loss": 3.4163, "step": 180 }, { "epoch": 0.06909954653422587, "grad_norm": 1.9851800203323364, "learning_rate": 1.5340199971093208e-05, "loss": 3.4075, "step": 200 }, { "epoch": 0.07600950118764846, "grad_norm": 2.3020198345184326, "learning_rate": 1.5616151205481378e-05, "loss": 3.3834, "step": 220 }, { "epoch": 0.08291945584107105, "grad_norm": 2.0768489837646484, "learning_rate": 1.586807494474404e-05, "loss": 3.3947, "step": 240 }, { "epoch": 0.08982941049449363, "grad_norm": 2.0693891048431396, "learning_rate": 1.6099822319805453e-05, "loss": 3.4267, "step": 260 }, { "epoch": 0.09673936514791621, "grad_norm": 1.994624137878418, "learning_rate": 1.6314386875614796e-05, "loss": 3.402, "step": 280 }, { "epoch": 0.1036493198013388, "grad_norm": 2.0815863609313965, "learning_rate": 1.6514141698131085e-05, "loss": 3.4122, "step": 300 }, { "epoch": 0.1105592744547614, "grad_norm": 2.1544482707977295, "learning_rate": 1.670099985546604e-05, "loss": 3.4049, "step": 320 }, { "epoch": 0.11746922910818398, "grad_norm": 2.2229878902435303, "learning_rate": 1.6876526113615038e-05, "loss": 3.4034, "step": 340 }, { "epoch": 0.12437918376160656, "grad_norm": 2.2661125659942627, "learning_rate": 1.704201667178192e-05, "loss": 3.4268, "step": 360 }, { "epoch": 0.13128913841502915, "grad_norm": 2.1003775596618652, "learning_rate": 1.7198557310778737e-05, "loss": 3.3937, "step": 380 }, { "epoch": 0.13819909306845174, "grad_norm": 2.0601627826690674, "learning_rate": 1.7347066608853085e-05, "loss": 3.3605, "step": 400 }, { "epoch": 0.14510904772187433, "grad_norm": 1.9609519243240356, "learning_rate": 1.748832860265267e-05, "loss": 3.3977, "step": 420 }, { "epoch": 0.15201900237529692, "grad_norm": 2.147367000579834, "learning_rate": 1.762301784324125e-05, "loss": 3.407, "step": 440 }, { "epoch": 0.1589289570287195, "grad_norm": 2.011277675628662, "learning_rate": 1.7751718877877165e-05, "loss": 3.3841, "step": 460 }, { "epoch": 0.1658389116821421, "grad_norm": 2.076007127761841, "learning_rate": 1.7874941582503917e-05, "loss": 3.3784, "step": 480 }, { "epoch": 0.17274886633556466, "grad_norm": 2.187765121459961, "learning_rate": 1.7993133362240127e-05, "loss": 3.3801, "step": 500 }, { "epoch": 0.17965882098898725, "grad_norm": 2.0247693061828613, "learning_rate": 1.810668895756533e-05, "loss": 3.3751, "step": 520 }, { "epoch": 0.18656877564240984, "grad_norm": 2.131352424621582, "learning_rate": 1.8215958398819793e-05, "loss": 3.3631, "step": 540 }, { "epoch": 0.19347873029583243, "grad_norm": 2.178847312927246, "learning_rate": 1.832125351337467e-05, "loss": 3.3764, "step": 560 }, { "epoch": 0.20038868494925502, "grad_norm": 2.2057948112487793, "learning_rate": 1.8422853290419585e-05, "loss": 3.3686, "step": 580 }, { "epoch": 0.2072986396026776, "grad_norm": 2.5370497703552246, "learning_rate": 1.852100833589096e-05, "loss": 3.377, "step": 600 }, { "epoch": 0.2142085942561002, "grad_norm": 2.284680128097534, "learning_rate": 1.861594459665503e-05, "loss": 3.3723, "step": 620 }, { "epoch": 0.2211185489095228, "grad_norm": 2.1865601539611816, "learning_rate": 1.8707866493225918e-05, "loss": 3.3216, "step": 640 }, { "epoch": 0.22802850356294538, "grad_norm": 2.0960466861724854, "learning_rate": 1.879695957027913e-05, "loss": 3.3597, "step": 660 }, { "epoch": 0.23493845821636797, "grad_norm": 2.184783935546875, "learning_rate": 1.888339275137491e-05, "loss": 3.3432, "step": 680 }, { "epoch": 0.24184841286979053, "grad_norm": 2.191296100616455, "learning_rate": 1.8967320266761712e-05, "loss": 3.3668, "step": 700 }, { "epoch": 0.24875836752321312, "grad_norm": 2.0871424674987793, "learning_rate": 1.9048883309541794e-05, "loss": 3.3539, "step": 720 }, { "epoch": 0.2556683221766357, "grad_norm": 2.1541998386383057, "learning_rate": 1.9128211464873177e-05, "loss": 3.3594, "step": 740 }, { "epoch": 0.2625782768300583, "grad_norm": 2.0716910362243652, "learning_rate": 1.920542394853861e-05, "loss": 3.3173, "step": 760 }, { "epoch": 0.2694882314834809, "grad_norm": 2.270064353942871, "learning_rate": 1.9280630684603204e-05, "loss": 3.3289, "step": 780 }, { "epoch": 0.2763981861369035, "grad_norm": 2.1515491008758545, "learning_rate": 1.935393324661296e-05, "loss": 3.3316, "step": 800 }, { "epoch": 0.28330814079032607, "grad_norm": 2.1054258346557617, "learning_rate": 1.9425425682558113e-05, "loss": 3.3063, "step": 820 }, { "epoch": 0.29021809544374866, "grad_norm": 2.2717254161834717, "learning_rate": 1.9495195240412547e-05, "loss": 3.3388, "step": 840 }, { "epoch": 0.29712805009717125, "grad_norm": 2.2322545051574707, "learning_rate": 1.9563323008290453e-05, "loss": 3.3187, "step": 860 }, { "epoch": 0.30403800475059384, "grad_norm": 2.102282762527466, "learning_rate": 1.9629884481001123e-05, "loss": 3.2985, "step": 880 }, { "epoch": 0.3109479594040164, "grad_norm": 2.2856414318084717, "learning_rate": 1.9694950062928836e-05, "loss": 3.2925, "step": 900 }, { "epoch": 0.317857914057439, "grad_norm": 2.143099308013916, "learning_rate": 1.975858551563704e-05, "loss": 3.3045, "step": 920 }, { "epoch": 0.3247678687108616, "grad_norm": 2.1752912998199463, "learning_rate": 1.982085235733133e-05, "loss": 3.3158, "step": 940 }, { "epoch": 0.3316778233642842, "grad_norm": 2.247835159301758, "learning_rate": 1.988180822026379e-05, "loss": 3.3155, "step": 960 }, { "epoch": 0.3385877780177068, "grad_norm": 2.0297396183013916, "learning_rate": 1.99415071712833e-05, "loss": 3.3113, "step": 980 }, { "epoch": 0.3454977326711293, "grad_norm": 2.0661704540252686, "learning_rate": 2e-05, "loss": 3.2767, "step": 1000 }, { "epoch": 0.3524076873245519, "grad_norm": 2.0839781761169434, "learning_rate": 1.9799366420274552e-05, "loss": 3.2869, "step": 1020 }, { "epoch": 0.3593176419779745, "grad_norm": 2.1040310859680176, "learning_rate": 1.958817317845829e-05, "loss": 3.2935, "step": 1040 }, { "epoch": 0.3662275966313971, "grad_norm": 2.2290778160095215, "learning_rate": 1.937697993664203e-05, "loss": 3.2958, "step": 1060 }, { "epoch": 0.3731375512848197, "grad_norm": 2.12402081489563, "learning_rate": 1.916578669482577e-05, "loss": 3.2603, "step": 1080 }, { "epoch": 0.38004750593824227, "grad_norm": 2.0378010272979736, "learning_rate": 1.8954593453009504e-05, "loss": 3.2961, "step": 1100 }, { "epoch": 0.38695746059166486, "grad_norm": 2.16814923286438, "learning_rate": 1.8743400211193243e-05, "loss": 3.2748, "step": 1120 }, { "epoch": 0.39386741524508745, "grad_norm": 2.180172920227051, "learning_rate": 1.8532206969376982e-05, "loss": 3.2758, "step": 1140 }, { "epoch": 0.40077736989851004, "grad_norm": 2.0336861610412598, "learning_rate": 1.832101372756072e-05, "loss": 3.2804, "step": 1160 }, { "epoch": 0.40768732455193263, "grad_norm": 2.077043056488037, "learning_rate": 1.810982048574446e-05, "loss": 3.2668, "step": 1180 }, { "epoch": 0.4145972792053552, "grad_norm": 2.0104854106903076, "learning_rate": 1.7898627243928195e-05, "loss": 3.2707, "step": 1200 }, { "epoch": 0.4215072338587778, "grad_norm": 2.3837151527404785, "learning_rate": 1.7687434002111933e-05, "loss": 3.2416, "step": 1220 }, { "epoch": 0.4284171885122004, "grad_norm": 2.058412551879883, "learning_rate": 1.7476240760295672e-05, "loss": 3.2403, "step": 1240 }, { "epoch": 0.435327143165623, "grad_norm": 2.1999926567077637, "learning_rate": 1.726504751847941e-05, "loss": 3.2177, "step": 1260 }, { "epoch": 0.4422370978190456, "grad_norm": 2.096730947494507, "learning_rate": 1.705385427666315e-05, "loss": 3.2254, "step": 1280 }, { "epoch": 0.44914705247246817, "grad_norm": 2.0574615001678467, "learning_rate": 1.6842661034846885e-05, "loss": 3.2167, "step": 1300 }, { "epoch": 0.45605700712589076, "grad_norm": 2.133613348007202, "learning_rate": 1.6631467793030624e-05, "loss": 3.1911, "step": 1320 }, { "epoch": 0.46296696177931335, "grad_norm": 2.3095550537109375, "learning_rate": 1.6420274551214363e-05, "loss": 3.2139, "step": 1340 }, { "epoch": 0.46987691643273594, "grad_norm": 2.173490285873413, "learning_rate": 1.62090813093981e-05, "loss": 3.1964, "step": 1360 }, { "epoch": 0.47678687108615847, "grad_norm": 2.0556015968322754, "learning_rate": 1.599788806758184e-05, "loss": 3.1824, "step": 1380 }, { "epoch": 0.48369682573958106, "grad_norm": 2.140432357788086, "learning_rate": 1.5786694825765576e-05, "loss": 3.1972, "step": 1400 }, { "epoch": 0.49060678039300365, "grad_norm": 2.209411859512329, "learning_rate": 1.5575501583949314e-05, "loss": 3.1942, "step": 1420 }, { "epoch": 0.49751673504642624, "grad_norm": 2.1304755210876465, "learning_rate": 1.5364308342133053e-05, "loss": 3.1723, "step": 1440 }, { "epoch": 0.5044266896998488, "grad_norm": 2.2864432334899902, "learning_rate": 1.515311510031679e-05, "loss": 3.1777, "step": 1460 }, { "epoch": 0.5113366443532714, "grad_norm": 2.17474365234375, "learning_rate": 1.4941921858500529e-05, "loss": 3.1849, "step": 1480 }, { "epoch": 0.518246599006694, "grad_norm": 2.0795702934265137, "learning_rate": 1.4730728616684266e-05, "loss": 3.1643, "step": 1500 }, { "epoch": 0.5251565536601166, "grad_norm": 2.2711730003356934, "learning_rate": 1.4519535374868005e-05, "loss": 3.1572, "step": 1520 }, { "epoch": 0.5320665083135392, "grad_norm": 2.0966429710388184, "learning_rate": 1.4308342133051742e-05, "loss": 3.1984, "step": 1540 }, { "epoch": 0.5389764629669618, "grad_norm": 2.2726895809173584, "learning_rate": 1.409714889123548e-05, "loss": 3.1593, "step": 1560 }, { "epoch": 0.5458864176203844, "grad_norm": 2.102625608444214, "learning_rate": 1.3885955649419221e-05, "loss": 3.1733, "step": 1580 }, { "epoch": 0.552796372273807, "grad_norm": 2.2264513969421387, "learning_rate": 1.3674762407602957e-05, "loss": 3.1159, "step": 1600 }, { "epoch": 0.5597063269272295, "grad_norm": 2.2396631240844727, "learning_rate": 1.3463569165786697e-05, "loss": 3.1444, "step": 1620 }, { "epoch": 0.5666162815806521, "grad_norm": 2.4287407398223877, "learning_rate": 1.3252375923970432e-05, "loss": 3.1341, "step": 1640 }, { "epoch": 0.5735262362340747, "grad_norm": 2.253844976425171, "learning_rate": 1.3041182682154171e-05, "loss": 3.114, "step": 1660 }, { "epoch": 0.5804361908874973, "grad_norm": 2.08655047416687, "learning_rate": 1.2829989440337912e-05, "loss": 3.1117, "step": 1680 }, { "epoch": 0.5873461455409199, "grad_norm": 2.4364819526672363, "learning_rate": 1.2618796198521647e-05, "loss": 3.1262, "step": 1700 }, { "epoch": 0.5942561001943425, "grad_norm": 2.2320666313171387, "learning_rate": 1.2407602956705388e-05, "loss": 3.0833, "step": 1720 }, { "epoch": 0.6011660548477651, "grad_norm": 2.156684160232544, "learning_rate": 1.2196409714889123e-05, "loss": 3.1382, "step": 1740 }, { "epoch": 0.6080760095011877, "grad_norm": 2.1044089794158936, "learning_rate": 1.1985216473072863e-05, "loss": 3.1222, "step": 1760 }, { "epoch": 0.6149859641546103, "grad_norm": 2.1616947650909424, "learning_rate": 1.1774023231256602e-05, "loss": 3.1213, "step": 1780 }, { "epoch": 0.6218959188080329, "grad_norm": 2.161734104156494, "learning_rate": 1.1562829989440338e-05, "loss": 3.0898, "step": 1800 }, { "epoch": 0.6288058734614554, "grad_norm": 2.0919594764709473, "learning_rate": 1.1351636747624078e-05, "loss": 3.0756, "step": 1820 }, { "epoch": 0.635715828114878, "grad_norm": 2.4278299808502197, "learning_rate": 1.1140443505807813e-05, "loss": 3.0654, "step": 1840 }, { "epoch": 0.6426257827683006, "grad_norm": 2.1781225204467773, "learning_rate": 1.0929250263991554e-05, "loss": 3.0855, "step": 1860 }, { "epoch": 0.6495357374217232, "grad_norm": 2.1798110008239746, "learning_rate": 1.0718057022175293e-05, "loss": 3.0628, "step": 1880 }, { "epoch": 0.6564456920751458, "grad_norm": 2.3901069164276123, "learning_rate": 1.050686378035903e-05, "loss": 3.0388, "step": 1900 }, { "epoch": 0.6633556467285684, "grad_norm": 2.2091197967529297, "learning_rate": 1.0295670538542769e-05, "loss": 3.0418, "step": 1920 }, { "epoch": 0.670265601381991, "grad_norm": 2.403480052947998, "learning_rate": 1.0084477296726504e-05, "loss": 3.063, "step": 1940 }, { "epoch": 0.6771755560354136, "grad_norm": 2.185926914215088, "learning_rate": 9.873284054910244e-06, "loss": 3.044, "step": 1960 }, { "epoch": 0.684085510688836, "grad_norm": 2.147468328475952, "learning_rate": 9.662090813093982e-06, "loss": 3.0328, "step": 1980 }, { "epoch": 0.6909954653422586, "grad_norm": 2.153027057647705, "learning_rate": 9.45089757127772e-06, "loss": 3.0437, "step": 2000 }, { "epoch": 0.6979054199956812, "grad_norm": 2.172102451324463, "learning_rate": 9.239704329461457e-06, "loss": 3.0516, "step": 2020 }, { "epoch": 0.7048153746491038, "grad_norm": 2.4078903198242188, "learning_rate": 9.028511087645196e-06, "loss": 3.0235, "step": 2040 }, { "epoch": 0.7117253293025264, "grad_norm": 2.3446578979492188, "learning_rate": 8.817317845828935e-06, "loss": 3.0051, "step": 2060 }, { "epoch": 0.718635283955949, "grad_norm": 2.2547926902770996, "learning_rate": 8.606124604012672e-06, "loss": 3.0165, "step": 2080 }, { "epoch": 0.7255452386093716, "grad_norm": 2.216155767440796, "learning_rate": 8.39493136219641e-06, "loss": 3.0411, "step": 2100 }, { "epoch": 0.7324551932627942, "grad_norm": 2.2646193504333496, "learning_rate": 8.183738120380148e-06, "loss": 2.9969, "step": 2120 }, { "epoch": 0.7393651479162168, "grad_norm": 2.208395004272461, "learning_rate": 7.972544878563887e-06, "loss": 2.9966, "step": 2140 }, { "epoch": 0.7462751025696394, "grad_norm": 2.2717556953430176, "learning_rate": 7.761351636747625e-06, "loss": 2.9973, "step": 2160 }, { "epoch": 0.753185057223062, "grad_norm": 2.19114351272583, "learning_rate": 7.5501583949313625e-06, "loss": 2.9948, "step": 2180 }, { "epoch": 0.7600950118764845, "grad_norm": 2.1782710552215576, "learning_rate": 7.3389651531151e-06, "loss": 3.0065, "step": 2200 }, { "epoch": 0.7670049665299071, "grad_norm": 2.2360358238220215, "learning_rate": 7.127771911298838e-06, "loss": 2.9865, "step": 2220 }, { "epoch": 0.7739149211833297, "grad_norm": 2.233429193496704, "learning_rate": 6.916578669482578e-06, "loss": 2.9858, "step": 2240 }, { "epoch": 0.7808248758367523, "grad_norm": 2.2624831199645996, "learning_rate": 6.705385427666316e-06, "loss": 2.9851, "step": 2260 }, { "epoch": 0.7877348304901749, "grad_norm": 2.1775155067443848, "learning_rate": 6.494192185850053e-06, "loss": 2.9961, "step": 2280 }, { "epoch": 0.7946447851435975, "grad_norm": 2.2491140365600586, "learning_rate": 6.282998944033791e-06, "loss": 2.9476, "step": 2300 }, { "epoch": 0.8015547397970201, "grad_norm": 2.2396609783172607, "learning_rate": 6.071805702217529e-06, "loss": 2.9736, "step": 2320 }, { "epoch": 0.8084646944504427, "grad_norm": 2.3773739337921143, "learning_rate": 5.8606124604012685e-06, "loss": 2.9598, "step": 2340 }, { "epoch": 0.8153746491038653, "grad_norm": 2.2326018810272217, "learning_rate": 5.649419218585006e-06, "loss": 2.9475, "step": 2360 }, { "epoch": 0.8222846037572878, "grad_norm": 2.466160535812378, "learning_rate": 5.438225976768744e-06, "loss": 2.9475, "step": 2380 }, { "epoch": 0.8291945584107104, "grad_norm": 2.3290674686431885, "learning_rate": 5.227032734952482e-06, "loss": 2.9525, "step": 2400 }, { "epoch": 0.836104513064133, "grad_norm": 2.1915531158447266, "learning_rate": 5.01583949313622e-06, "loss": 2.935, "step": 2420 }, { "epoch": 0.8430144677175556, "grad_norm": 2.295179605484009, "learning_rate": 4.804646251319958e-06, "loss": 2.9198, "step": 2440 }, { "epoch": 0.8499244223709782, "grad_norm": 2.103804111480713, "learning_rate": 4.593453009503696e-06, "loss": 2.9274, "step": 2460 }, { "epoch": 0.8568343770244008, "grad_norm": 2.2191033363342285, "learning_rate": 4.382259767687435e-06, "loss": 2.9178, "step": 2480 }, { "epoch": 0.8637443316778234, "grad_norm": 2.251919746398926, "learning_rate": 4.171066525871173e-06, "loss": 2.9324, "step": 2500 }, { "epoch": 0.870654286331246, "grad_norm": 2.2608258724212646, "learning_rate": 3.959873284054911e-06, "loss": 2.912, "step": 2520 }, { "epoch": 0.8775642409846686, "grad_norm": 2.294982433319092, "learning_rate": 3.7486800422386486e-06, "loss": 2.9023, "step": 2540 }, { "epoch": 0.8844741956380912, "grad_norm": 2.280447006225586, "learning_rate": 3.5374868004223865e-06, "loss": 2.9203, "step": 2560 }, { "epoch": 0.8913841502915137, "grad_norm": 2.182582378387451, "learning_rate": 3.3262935586061253e-06, "loss": 2.9146, "step": 2580 }, { "epoch": 0.8982941049449363, "grad_norm": 2.1298370361328125, "learning_rate": 3.1151003167898632e-06, "loss": 2.875, "step": 2600 }, { "epoch": 0.9052040595983589, "grad_norm": 2.147123336791992, "learning_rate": 2.9039070749736007e-06, "loss": 2.9039, "step": 2620 }, { "epoch": 0.9121140142517815, "grad_norm": 2.381716728210449, "learning_rate": 2.6927138331573395e-06, "loss": 2.8772, "step": 2640 }, { "epoch": 0.9190239689052041, "grad_norm": 2.1160385608673096, "learning_rate": 2.4815205913410774e-06, "loss": 2.8996, "step": 2660 }, { "epoch": 0.9259339235586267, "grad_norm": 2.1785471439361572, "learning_rate": 2.2703273495248154e-06, "loss": 2.8793, "step": 2680 }, { "epoch": 0.9328438782120493, "grad_norm": 2.3042492866516113, "learning_rate": 2.0591341077085537e-06, "loss": 2.8936, "step": 2700 }, { "epoch": 0.9397538328654719, "grad_norm": 2.329930543899536, "learning_rate": 1.8479408658922914e-06, "loss": 2.8752, "step": 2720 }, { "epoch": 0.9466637875188945, "grad_norm": 2.297677993774414, "learning_rate": 1.6367476240760296e-06, "loss": 2.8898, "step": 2740 }, { "epoch": 0.9535737421723169, "grad_norm": 2.117172956466675, "learning_rate": 1.425554382259768e-06, "loss": 2.9107, "step": 2760 }, { "epoch": 0.9604836968257395, "grad_norm": 2.2045278549194336, "learning_rate": 1.2143611404435059e-06, "loss": 2.8748, "step": 2780 }, { "epoch": 0.9673936514791621, "grad_norm": 2.441049098968506, "learning_rate": 1.003167898627244e-06, "loss": 2.876, "step": 2800 }, { "epoch": 0.9743036061325847, "grad_norm": 2.242483615875244, "learning_rate": 7.91974656810982e-07, "loss": 2.8553, "step": 2820 }, { "epoch": 0.9812135607860073, "grad_norm": 2.046325206756592, "learning_rate": 5.807814149947203e-07, "loss": 2.8689, "step": 2840 }, { "epoch": 0.9881235154394299, "grad_norm": 2.166343927383423, "learning_rate": 3.6958817317845836e-07, "loss": 2.8678, "step": 2860 }, { "epoch": 0.9950334700928525, "grad_norm": 2.2947232723236084, "learning_rate": 1.583949313621964e-07, "loss": 2.8587, "step": 2880 }, { "epoch": 0.9998704383502484, "step": 2894, "total_flos": 4.190944749311492e+18, "train_loss": 3.1699299486077237, "train_runtime": 39562.1085, "train_samples_per_second": 9.364, "train_steps_per_second": 0.073 } ], "logging_steps": 20, "max_steps": 2894, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.190944749311492e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }