| { |
| "best_global_step": 138718, |
| "best_metric": 0.9915470627263667, |
| "best_model_checkpoint": "/home/skwon01/scratch/sibal/finetuned_models/serengeti_camera_ready/checkpoint-138718", |
| "epoch": 2.0, |
| "eval_steps": 1000.0, |
| "global_step": 138718, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007208869793393791, |
| "grad_norm": 2.880587577819824, |
| "learning_rate": 1.9985611095892387e-05, |
| "loss": 3.675, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.014417739586787583, |
| "grad_norm": 3.1965484619140625, |
| "learning_rate": 1.99711933563056e-05, |
| "loss": 1.3703, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.021626609380181374, |
| "grad_norm": 3.587383270263672, |
| "learning_rate": 1.9956775616718814e-05, |
| "loss": 0.7317, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.028835479173575165, |
| "grad_norm": 2.73246169090271, |
| "learning_rate": 1.9942357877132026e-05, |
| "loss": 0.4764, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03604434896696896, |
| "grad_norm": 3.9599311351776123, |
| "learning_rate": 1.9927940137545237e-05, |
| "loss": 0.3488, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.04325321876036275, |
| "grad_norm": 3.690446138381958, |
| "learning_rate": 1.991352239795845e-05, |
| "loss": 0.2729, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.05046208855375654, |
| "grad_norm": 3.0428125858306885, |
| "learning_rate": 1.989910465837166e-05, |
| "loss": 0.2249, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.05767095834715033, |
| "grad_norm": 2.6362786293029785, |
| "learning_rate": 1.9884686918784876e-05, |
| "loss": 0.1907, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.06487982814054413, |
| "grad_norm": 4.072872161865234, |
| "learning_rate": 1.9870269179198087e-05, |
| "loss": 0.1695, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.07208869793393792, |
| "grad_norm": 2.4177143573760986, |
| "learning_rate": 1.98558514396113e-05, |
| "loss": 0.1535, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.07929756772733171, |
| "grad_norm": 2.4438438415527344, |
| "learning_rate": 1.9841433700024514e-05, |
| "loss": 0.1429, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.0865064375207255, |
| "grad_norm": 1.9982225894927979, |
| "learning_rate": 1.9827015960437722e-05, |
| "loss": 0.1348, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.0937153073141193, |
| "grad_norm": 2.988769769668579, |
| "learning_rate": 1.9812598220850938e-05, |
| "loss": 0.1226, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.10092417710751309, |
| "grad_norm": 2.386380672454834, |
| "learning_rate": 1.979818048126415e-05, |
| "loss": 0.1168, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.10813304690090687, |
| "grad_norm": 1.9924527406692505, |
| "learning_rate": 1.978376274167736e-05, |
| "loss": 0.1082, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.11534191669430066, |
| "grad_norm": 1.9020510911941528, |
| "learning_rate": 1.9769345002090573e-05, |
| "loss": 0.1064, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.12255078648769446, |
| "grad_norm": 2.333510160446167, |
| "learning_rate": 1.9754927262503788e-05, |
| "loss": 0.1029, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.12975965628108826, |
| "grad_norm": 2.677407741546631, |
| "learning_rate": 1.9740509522917e-05, |
| "loss": 0.0995, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.13696852607448204, |
| "grad_norm": 1.5480279922485352, |
| "learning_rate": 1.972609178333021e-05, |
| "loss": 0.0948, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.14417739586787584, |
| "grad_norm": 2.630037546157837, |
| "learning_rate": 1.9711674043743423e-05, |
| "loss": 0.0937, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.15138626566126961, |
| "grad_norm": 2.267946243286133, |
| "learning_rate": 1.9697256304156634e-05, |
| "loss": 0.0909, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.15859513545466342, |
| "grad_norm": 2.932375907897949, |
| "learning_rate": 1.968283856456985e-05, |
| "loss": 0.0889, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.16580400524805722, |
| "grad_norm": 2.69350528717041, |
| "learning_rate": 1.966842082498306e-05, |
| "loss": 0.086, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.173012875041451, |
| "grad_norm": 2.1316378116607666, |
| "learning_rate": 1.9654003085396273e-05, |
| "loss": 0.0843, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.1802217448348448, |
| "grad_norm": 2.52103853225708, |
| "learning_rate": 1.9639585345809488e-05, |
| "loss": 0.0828, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.1874306146282386, |
| "grad_norm": 1.939334511756897, |
| "learning_rate": 1.9625167606222696e-05, |
| "loss": 0.0795, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.19463948442163237, |
| "grad_norm": 2.3057949542999268, |
| "learning_rate": 1.961074986663591e-05, |
| "loss": 0.0786, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.20184835421502617, |
| "grad_norm": 2.0021777153015137, |
| "learning_rate": 1.9596332127049123e-05, |
| "loss": 0.0773, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.20905722400841997, |
| "grad_norm": 2.276421546936035, |
| "learning_rate": 1.9581914387462335e-05, |
| "loss": 0.0772, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.21626609380181375, |
| "grad_norm": 2.426966428756714, |
| "learning_rate": 1.9567496647875546e-05, |
| "loss": 0.0746, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.22347496359520755, |
| "grad_norm": 1.984330415725708, |
| "learning_rate": 1.955307890828876e-05, |
| "loss": 0.074, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.23068383338860132, |
| "grad_norm": 2.1131157875061035, |
| "learning_rate": 1.9538661168701973e-05, |
| "loss": 0.0754, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.23789270318199512, |
| "grad_norm": 2.672717332839966, |
| "learning_rate": 1.9524243429115185e-05, |
| "loss": 0.0719, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.24510157297538893, |
| "grad_norm": 1.4720840454101562, |
| "learning_rate": 1.9509825689528396e-05, |
| "loss": 0.0689, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.25231044276878273, |
| "grad_norm": 1.7824233770370483, |
| "learning_rate": 1.9495407949941608e-05, |
| "loss": 0.0711, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.25951931256217653, |
| "grad_norm": 1.7139828205108643, |
| "learning_rate": 1.9480990210354823e-05, |
| "loss": 0.067, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.2667281823555703, |
| "grad_norm": 2.2731082439422607, |
| "learning_rate": 1.9466572470768035e-05, |
| "loss": 0.0678, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.2739370521489641, |
| "grad_norm": 2.2537448406219482, |
| "learning_rate": 1.9452154731181247e-05, |
| "loss": 0.0657, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.2811459219423579, |
| "grad_norm": 3.0216615200042725, |
| "learning_rate": 1.943773699159446e-05, |
| "loss": 0.0656, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.2883547917357517, |
| "grad_norm": 1.4544578790664673, |
| "learning_rate": 1.942331925200767e-05, |
| "loss": 0.0658, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.2955636615291455, |
| "grad_norm": 2.4549198150634766, |
| "learning_rate": 1.9408901512420885e-05, |
| "loss": 0.0641, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.30277253132253923, |
| "grad_norm": 1.514060616493225, |
| "learning_rate": 1.9394483772834097e-05, |
| "loss": 0.0633, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.30998140111593303, |
| "grad_norm": 2.4346635341644287, |
| "learning_rate": 1.9380066033247308e-05, |
| "loss": 0.0627, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.31719027090932683, |
| "grad_norm": 1.432133436203003, |
| "learning_rate": 1.9365648293660523e-05, |
| "loss": 0.0616, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.32439914070272063, |
| "grad_norm": 1.2359411716461182, |
| "learning_rate": 1.9351230554073735e-05, |
| "loss": 0.0628, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.33160801049611444, |
| "grad_norm": 2.1902575492858887, |
| "learning_rate": 1.9336812814486947e-05, |
| "loss": 0.0628, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.33881688028950824, |
| "grad_norm": 1.7415978908538818, |
| "learning_rate": 1.932239507490016e-05, |
| "loss": 0.0616, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.346025750082902, |
| "grad_norm": 1.401383399963379, |
| "learning_rate": 1.930797733531337e-05, |
| "loss": 0.0589, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.3532346198762958, |
| "grad_norm": 1.5828105211257935, |
| "learning_rate": 1.9293559595726582e-05, |
| "loss": 0.0604, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.3604434896696896, |
| "grad_norm": 0.8541142344474792, |
| "learning_rate": 1.9279141856139797e-05, |
| "loss": 0.0599, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.3676523594630834, |
| "grad_norm": 2.8157145977020264, |
| "learning_rate": 1.926472411655301e-05, |
| "loss": 0.0593, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.3748612292564772, |
| "grad_norm": 2.129725217819214, |
| "learning_rate": 1.925030637696622e-05, |
| "loss": 0.0578, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.38207009904987094, |
| "grad_norm": 2.5838279724121094, |
| "learning_rate": 1.9235888637379435e-05, |
| "loss": 0.0574, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.38927896884326474, |
| "grad_norm": 1.7000998258590698, |
| "learning_rate": 1.9221470897792647e-05, |
| "loss": 0.0553, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.39648783863665854, |
| "grad_norm": 1.2641727924346924, |
| "learning_rate": 1.920705315820586e-05, |
| "loss": 0.0549, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.40369670843005234, |
| "grad_norm": 1.7529101371765137, |
| "learning_rate": 1.919263541861907e-05, |
| "loss": 0.0562, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.41090557822344614, |
| "grad_norm": 1.4027022123336792, |
| "learning_rate": 1.9178217679032282e-05, |
| "loss": 0.0552, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.41811444801683995, |
| "grad_norm": 1.6767141819000244, |
| "learning_rate": 1.9163799939445497e-05, |
| "loss": 0.0572, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.4253233178102337, |
| "grad_norm": 0.8946545720100403, |
| "learning_rate": 1.914938219985871e-05, |
| "loss": 0.0556, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.4325321876036275, |
| "grad_norm": 2.469862937927246, |
| "learning_rate": 1.913496446027192e-05, |
| "loss": 0.0546, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.4397410573970213, |
| "grad_norm": 3.368171215057373, |
| "learning_rate": 1.9120546720685132e-05, |
| "loss": 0.0527, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.4469499271904151, |
| "grad_norm": 2.107477903366089, |
| "learning_rate": 1.9106128981098344e-05, |
| "loss": 0.0538, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.4541587969838089, |
| "grad_norm": 1.8676276206970215, |
| "learning_rate": 1.9091711241511555e-05, |
| "loss": 0.0529, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.46136766677720265, |
| "grad_norm": 1.8789501190185547, |
| "learning_rate": 1.907729350192477e-05, |
| "loss": 0.0525, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.46857653657059645, |
| "grad_norm": 1.8588016033172607, |
| "learning_rate": 1.9062875762337982e-05, |
| "loss": 0.0519, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.47578540636399025, |
| "grad_norm": 1.6721725463867188, |
| "learning_rate": 1.9048458022751194e-05, |
| "loss": 0.0508, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.48299427615738405, |
| "grad_norm": 1.9724555015563965, |
| "learning_rate": 1.903404028316441e-05, |
| "loss": 0.0502, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.49020314595077785, |
| "grad_norm": 1.9921311140060425, |
| "learning_rate": 1.901962254357762e-05, |
| "loss": 0.051, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.49741201574417165, |
| "grad_norm": 2.889782190322876, |
| "learning_rate": 1.9005204803990832e-05, |
| "loss": 0.0518, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.5046208855375655, |
| "grad_norm": 1.7622694969177246, |
| "learning_rate": 1.8990787064404044e-05, |
| "loss": 0.0494, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.5118297553309592, |
| "grad_norm": 1.713699460029602, |
| "learning_rate": 1.8976369324817256e-05, |
| "loss": 0.0493, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.5190386251243531, |
| "grad_norm": 1.262862205505371, |
| "learning_rate": 1.896195158523047e-05, |
| "loss": 0.0496, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.5262474949177468, |
| "grad_norm": 2.085010051727295, |
| "learning_rate": 1.8947533845643682e-05, |
| "loss": 0.0509, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.5334563647111406, |
| "grad_norm": 1.6257765293121338, |
| "learning_rate": 1.8933116106056894e-05, |
| "loss": 0.0498, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.5406652345045344, |
| "grad_norm": 0.6558777093887329, |
| "learning_rate": 1.8918698366470106e-05, |
| "loss": 0.0484, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.5478741042979282, |
| "grad_norm": 1.7351698875427246, |
| "learning_rate": 1.8904280626883318e-05, |
| "loss": 0.0496, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.555082974091322, |
| "grad_norm": 0.915392279624939, |
| "learning_rate": 1.888986288729653e-05, |
| "loss": 0.0467, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.5622918438847158, |
| "grad_norm": 0.9719710350036621, |
| "learning_rate": 1.8875445147709744e-05, |
| "loss": 0.0491, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.5695007136781095, |
| "grad_norm": 0.4347970485687256, |
| "learning_rate": 1.8861027408122956e-05, |
| "loss": 0.0478, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.5767095834715034, |
| "grad_norm": 1.4013206958770752, |
| "learning_rate": 1.8846609668536168e-05, |
| "loss": 0.0482, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.5839184532648971, |
| "grad_norm": 1.6916135549545288, |
| "learning_rate": 1.8832191928949383e-05, |
| "loss": 0.0487, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.591127323058291, |
| "grad_norm": 1.1497479677200317, |
| "learning_rate": 1.8817774189362594e-05, |
| "loss": 0.0473, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.5983361928516847, |
| "grad_norm": 2.1202707290649414, |
| "learning_rate": 1.8803356449775806e-05, |
| "loss": 0.046, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.6055450626450785, |
| "grad_norm": 1.8288294076919556, |
| "learning_rate": 1.8788938710189018e-05, |
| "loss": 0.0473, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.6127539324384723, |
| "grad_norm": 0.8600142598152161, |
| "learning_rate": 1.877452097060223e-05, |
| "loss": 0.0452, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.6199628022318661, |
| "grad_norm": 2.8069839477539062, |
| "learning_rate": 1.8760103231015445e-05, |
| "loss": 0.048, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.6271716720252599, |
| "grad_norm": 0.8850429058074951, |
| "learning_rate": 1.8745685491428656e-05, |
| "loss": 0.0474, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.6343805418186537, |
| "grad_norm": 1.063219666481018, |
| "learning_rate": 1.8731267751841868e-05, |
| "loss": 0.0446, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.6415894116120474, |
| "grad_norm": 1.3925724029541016, |
| "learning_rate": 1.871685001225508e-05, |
| "loss": 0.0468, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.6487982814054413, |
| "grad_norm": 0.9575428366661072, |
| "learning_rate": 1.870243227266829e-05, |
| "loss": 0.0447, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.656007151198835, |
| "grad_norm": 2.547752618789673, |
| "learning_rate": 1.8688014533081503e-05, |
| "loss": 0.0456, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.6632160209922289, |
| "grad_norm": 0.6029974222183228, |
| "learning_rate": 1.8673596793494718e-05, |
| "loss": 0.0464, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.6704248907856226, |
| "grad_norm": 0.27106812596321106, |
| "learning_rate": 1.865917905390793e-05, |
| "loss": 0.0437, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.6776337605790165, |
| "grad_norm": 1.3233801126480103, |
| "learning_rate": 1.864476131432114e-05, |
| "loss": 0.0447, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.6848426303724102, |
| "grad_norm": 0.38903898000717163, |
| "learning_rate": 1.8630343574734356e-05, |
| "loss": 0.0455, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.692051500165804, |
| "grad_norm": 1.247036337852478, |
| "learning_rate": 1.8615925835147568e-05, |
| "loss": 0.044, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.6992603699591978, |
| "grad_norm": 0.9771102666854858, |
| "learning_rate": 1.860150809556078e-05, |
| "loss": 0.0446, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.7064692397525916, |
| "grad_norm": 1.6191680431365967, |
| "learning_rate": 1.858709035597399e-05, |
| "loss": 0.0455, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.7136781095459854, |
| "grad_norm": 0.9542379975318909, |
| "learning_rate": 1.8572672616387203e-05, |
| "loss": 0.0426, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.7208869793393792, |
| "grad_norm": 1.6160619258880615, |
| "learning_rate": 1.8558254876800418e-05, |
| "loss": 0.0433, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.7280958491327729, |
| "grad_norm": 1.1810977458953857, |
| "learning_rate": 1.854383713721363e-05, |
| "loss": 0.0443, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.7353047189261668, |
| "grad_norm": 1.4848960638046265, |
| "learning_rate": 1.852941939762684e-05, |
| "loss": 0.0442, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.7425135887195605, |
| "grad_norm": 1.2140188217163086, |
| "learning_rate": 1.8515001658040053e-05, |
| "loss": 0.0436, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.7497224585129544, |
| "grad_norm": 0.6803346276283264, |
| "learning_rate": 1.8500583918453265e-05, |
| "loss": 0.0416, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.7569313283063481, |
| "grad_norm": 2.847879409790039, |
| "learning_rate": 1.8486166178866477e-05, |
| "loss": 0.0401, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.7641401980997419, |
| "grad_norm": 1.3574286699295044, |
| "learning_rate": 1.8471748439279692e-05, |
| "loss": 0.0426, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.7713490678931357, |
| "grad_norm": 1.5763428211212158, |
| "learning_rate": 1.8457330699692903e-05, |
| "loss": 0.0416, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.7785579376865295, |
| "grad_norm": 2.006143808364868, |
| "learning_rate": 1.8442912960106115e-05, |
| "loss": 0.0423, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.7857668074799233, |
| "grad_norm": 2.0041260719299316, |
| "learning_rate": 1.842849522051933e-05, |
| "loss": 0.043, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.7929756772733171, |
| "grad_norm": 1.0083436965942383, |
| "learning_rate": 1.8414077480932542e-05, |
| "loss": 0.0428, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.8001845470667108, |
| "grad_norm": 1.2364863157272339, |
| "learning_rate": 1.8399659741345754e-05, |
| "loss": 0.0431, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.8073934168601047, |
| "grad_norm": 1.1397020816802979, |
| "learning_rate": 1.8385242001758965e-05, |
| "loss": 0.0408, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.8146022866534984, |
| "grad_norm": 1.046647071838379, |
| "learning_rate": 1.8370824262172177e-05, |
| "loss": 0.0424, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.8218111564468923, |
| "grad_norm": 0.7180289626121521, |
| "learning_rate": 1.8356406522585392e-05, |
| "loss": 0.0417, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.829020026240286, |
| "grad_norm": 1.866095781326294, |
| "learning_rate": 1.8341988782998604e-05, |
| "loss": 0.0406, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.8362288960336799, |
| "grad_norm": 1.7192025184631348, |
| "learning_rate": 1.8327571043411815e-05, |
| "loss": 0.042, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.8434377658270736, |
| "grad_norm": 1.3043447732925415, |
| "learning_rate": 1.8313153303825027e-05, |
| "loss": 0.0419, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.8506466356204674, |
| "grad_norm": 2.372190237045288, |
| "learning_rate": 1.829873556423824e-05, |
| "loss": 0.0421, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.8578555054138612, |
| "grad_norm": 0.9028930068016052, |
| "learning_rate": 1.828431782465145e-05, |
| "loss": 0.0396, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.865064375207255, |
| "grad_norm": 1.2869058847427368, |
| "learning_rate": 1.8269900085064665e-05, |
| "loss": 0.0401, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.8722732450006488, |
| "grad_norm": 2.214855670928955, |
| "learning_rate": 1.8255482345477877e-05, |
| "loss": 0.04, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.8794821147940426, |
| "grad_norm": 0.9826574325561523, |
| "learning_rate": 1.824106460589109e-05, |
| "loss": 0.0397, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.8866909845874363, |
| "grad_norm": 0.7741074562072754, |
| "learning_rate": 1.8226646866304304e-05, |
| "loss": 0.0397, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.8938998543808302, |
| "grad_norm": 1.2778081893920898, |
| "learning_rate": 1.8212229126717516e-05, |
| "loss": 0.0396, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.9011087241742239, |
| "grad_norm": 0.7415226697921753, |
| "learning_rate": 1.8197811387130727e-05, |
| "loss": 0.0398, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.9083175939676178, |
| "grad_norm": 2.152737617492676, |
| "learning_rate": 1.818339364754394e-05, |
| "loss": 0.0395, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.9155264637610115, |
| "grad_norm": 0.9719590544700623, |
| "learning_rate": 1.816897590795715e-05, |
| "loss": 0.0387, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.9227353335544053, |
| "grad_norm": 1.4587551355361938, |
| "learning_rate": 1.8154558168370366e-05, |
| "loss": 0.0395, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.9299442033477991, |
| "grad_norm": 1.4218809604644775, |
| "learning_rate": 1.8140140428783577e-05, |
| "loss": 0.0375, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.9371530731411929, |
| "grad_norm": 1.8009737730026245, |
| "learning_rate": 1.812572268919679e-05, |
| "loss": 0.0387, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.9443619429345868, |
| "grad_norm": 1.2379016876220703, |
| "learning_rate": 1.811130494961e-05, |
| "loss": 0.0386, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.9515708127279805, |
| "grad_norm": 1.1901589632034302, |
| "learning_rate": 1.8096887210023216e-05, |
| "loss": 0.0381, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.9587796825213742, |
| "grad_norm": 1.0341569185256958, |
| "learning_rate": 1.8082469470436424e-05, |
| "loss": 0.0402, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.9659885523147681, |
| "grad_norm": 1.4235957860946655, |
| "learning_rate": 1.806805173084964e-05, |
| "loss": 0.0382, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.9731974221081618, |
| "grad_norm": 1.095893383026123, |
| "learning_rate": 1.805363399126285e-05, |
| "loss": 0.0396, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.9804062919015557, |
| "grad_norm": 1.8859561681747437, |
| "learning_rate": 1.8039216251676063e-05, |
| "loss": 0.038, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.9876151616949495, |
| "grad_norm": 1.8770360946655273, |
| "learning_rate": 1.8024798512089278e-05, |
| "loss": 0.039, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.9948240314883433, |
| "grad_norm": 1.870827555656433, |
| "learning_rate": 1.801038077250249e-05, |
| "loss": 0.038, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.9895049158009324, |
| "eval_loss": 0.034001659601926804, |
| "eval_runtime": 683.1241, |
| "eval_samples_per_second": 1528.989, |
| "eval_steps_per_second": 47.782, |
| "step": 69359 |
| }, |
| { |
| "epoch": 1.002032901281737, |
| "grad_norm": 0.4856395125389099, |
| "learning_rate": 1.79959630329157e-05, |
| "loss": 0.0352, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.009241771075131, |
| "grad_norm": 1.8835086822509766, |
| "learning_rate": 1.7981545293328913e-05, |
| "loss": 0.0287, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.0164506408685245, |
| "grad_norm": 1.941490888595581, |
| "learning_rate": 1.7967127553742124e-05, |
| "loss": 0.0307, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.0236595106619184, |
| "grad_norm": 1.525707483291626, |
| "learning_rate": 1.795270981415534e-05, |
| "loss": 0.03, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.0308683804553123, |
| "grad_norm": 0.6174446940422058, |
| "learning_rate": 1.793829207456855e-05, |
| "loss": 0.029, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.0380772502487061, |
| "grad_norm": 1.043771505355835, |
| "learning_rate": 1.7923874334981763e-05, |
| "loss": 0.0311, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.0452861200420998, |
| "grad_norm": 0.28765255212783813, |
| "learning_rate": 1.7909456595394978e-05, |
| "loss": 0.0291, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.0524949898354936, |
| "grad_norm": 0.8367669582366943, |
| "learning_rate": 1.789503885580819e-05, |
| "loss": 0.0307, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.0597038596288875, |
| "grad_norm": 0.8930952548980713, |
| "learning_rate": 1.7880621116221398e-05, |
| "loss": 0.0297, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.066912729422281, |
| "grad_norm": 1.0413399934768677, |
| "learning_rate": 1.7866203376634613e-05, |
| "loss": 0.03, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.074121599215675, |
| "grad_norm": 1.1929751634597778, |
| "learning_rate": 1.7851785637047825e-05, |
| "loss": 0.0287, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.0813304690090688, |
| "grad_norm": 0.8676954507827759, |
| "learning_rate": 1.7837367897461036e-05, |
| "loss": 0.0307, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.0885393388024625, |
| "grad_norm": 0.733383059501648, |
| "learning_rate": 1.782295015787425e-05, |
| "loss": 0.029, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.0957482085958563, |
| "grad_norm": 1.005913257598877, |
| "learning_rate": 1.7808532418287463e-05, |
| "loss": 0.0288, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.1029570783892502, |
| "grad_norm": 1.4946510791778564, |
| "learning_rate": 1.7794114678700675e-05, |
| "loss": 0.0294, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.110165948182644, |
| "grad_norm": 0.966665506362915, |
| "learning_rate": 1.7779696939113886e-05, |
| "loss": 0.0311, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.1173748179760377, |
| "grad_norm": 0.8129379749298096, |
| "learning_rate": 1.7765279199527098e-05, |
| "loss": 0.0301, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.1245836877694315, |
| "grad_norm": 1.1672717332839966, |
| "learning_rate": 1.7750861459940313e-05, |
| "loss": 0.0297, |
| "step": 78000 |
| }, |
| { |
| "epoch": 1.1317925575628254, |
| "grad_norm": 1.0149409770965576, |
| "learning_rate": 1.7736443720353525e-05, |
| "loss": 0.031, |
| "step": 78500 |
| }, |
| { |
| "epoch": 1.139001427356219, |
| "grad_norm": 1.3319754600524902, |
| "learning_rate": 1.7722025980766736e-05, |
| "loss": 0.0294, |
| "step": 79000 |
| }, |
| { |
| "epoch": 1.1462102971496129, |
| "grad_norm": 3.036787509918213, |
| "learning_rate": 1.770760824117995e-05, |
| "loss": 0.0294, |
| "step": 79500 |
| }, |
| { |
| "epoch": 1.1534191669430067, |
| "grad_norm": 0.6281238198280334, |
| "learning_rate": 1.7693190501593163e-05, |
| "loss": 0.0312, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.1606280367364006, |
| "grad_norm": 1.39284086227417, |
| "learning_rate": 1.767877276200637e-05, |
| "loss": 0.0299, |
| "step": 80500 |
| }, |
| { |
| "epoch": 1.1678369065297942, |
| "grad_norm": 2.4636764526367188, |
| "learning_rate": 1.7664355022419587e-05, |
| "loss": 0.0304, |
| "step": 81000 |
| }, |
| { |
| "epoch": 1.175045776323188, |
| "grad_norm": 1.0513309240341187, |
| "learning_rate": 1.7649937282832798e-05, |
| "loss": 0.0293, |
| "step": 81500 |
| }, |
| { |
| "epoch": 1.182254646116582, |
| "grad_norm": 0.739205539226532, |
| "learning_rate": 1.763551954324601e-05, |
| "loss": 0.0297, |
| "step": 82000 |
| }, |
| { |
| "epoch": 1.1894635159099756, |
| "grad_norm": 1.1646817922592163, |
| "learning_rate": 1.7621101803659225e-05, |
| "loss": 0.0281, |
| "step": 82500 |
| }, |
| { |
| "epoch": 1.1966723857033694, |
| "grad_norm": 1.6882481575012207, |
| "learning_rate": 1.7606684064072437e-05, |
| "loss": 0.0308, |
| "step": 83000 |
| }, |
| { |
| "epoch": 1.2038812554967633, |
| "grad_norm": 2.1905980110168457, |
| "learning_rate": 1.759226632448565e-05, |
| "loss": 0.0301, |
| "step": 83500 |
| }, |
| { |
| "epoch": 1.211090125290157, |
| "grad_norm": 0.4102253317832947, |
| "learning_rate": 1.757784858489886e-05, |
| "loss": 0.0296, |
| "step": 84000 |
| }, |
| { |
| "epoch": 1.2182989950835508, |
| "grad_norm": 1.5355827808380127, |
| "learning_rate": 1.7563430845312072e-05, |
| "loss": 0.031, |
| "step": 84500 |
| }, |
| { |
| "epoch": 1.2255078648769446, |
| "grad_norm": 0.4144400954246521, |
| "learning_rate": 1.7549013105725287e-05, |
| "loss": 0.0303, |
| "step": 85000 |
| }, |
| { |
| "epoch": 1.2327167346703383, |
| "grad_norm": 0.5286178588867188, |
| "learning_rate": 1.75345953661385e-05, |
| "loss": 0.0311, |
| "step": 85500 |
| }, |
| { |
| "epoch": 1.2399256044637321, |
| "grad_norm": 1.3401720523834229, |
| "learning_rate": 1.752017762655171e-05, |
| "loss": 0.0303, |
| "step": 86000 |
| }, |
| { |
| "epoch": 1.247134474257126, |
| "grad_norm": 1.5546993017196655, |
| "learning_rate": 1.7505759886964925e-05, |
| "loss": 0.0296, |
| "step": 86500 |
| }, |
| { |
| "epoch": 1.2543433440505198, |
| "grad_norm": 1.7993361949920654, |
| "learning_rate": 1.7491342147378137e-05, |
| "loss": 0.03, |
| "step": 87000 |
| }, |
| { |
| "epoch": 1.2615522138439135, |
| "grad_norm": 1.058311939239502, |
| "learning_rate": 1.7476924407791345e-05, |
| "loss": 0.0283, |
| "step": 87500 |
| }, |
| { |
| "epoch": 1.2687610836373073, |
| "grad_norm": 1.1616915464401245, |
| "learning_rate": 1.746250666820456e-05, |
| "loss": 0.0306, |
| "step": 88000 |
| }, |
| { |
| "epoch": 1.2759699534307012, |
| "grad_norm": 1.5120762586593628, |
| "learning_rate": 1.7448088928617772e-05, |
| "loss": 0.0296, |
| "step": 88500 |
| }, |
| { |
| "epoch": 1.283178823224095, |
| "grad_norm": 1.033087134361267, |
| "learning_rate": 1.7433671189030984e-05, |
| "loss": 0.0296, |
| "step": 89000 |
| }, |
| { |
| "epoch": 1.2903876930174887, |
| "grad_norm": 0.9456692337989807, |
| "learning_rate": 1.74192534494442e-05, |
| "loss": 0.0293, |
| "step": 89500 |
| }, |
| { |
| "epoch": 1.2975965628108825, |
| "grad_norm": 0.4252309799194336, |
| "learning_rate": 1.740483570985741e-05, |
| "loss": 0.0287, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.3048054326042764, |
| "grad_norm": 1.4315825700759888, |
| "learning_rate": 1.7390417970270622e-05, |
| "loss": 0.0314, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.31201430239767, |
| "grad_norm": 0.9023242592811584, |
| "learning_rate": 1.7376000230683834e-05, |
| "loss": 0.0296, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.3192231721910639, |
| "grad_norm": 1.8055963516235352, |
| "learning_rate": 1.7361582491097045e-05, |
| "loss": 0.0289, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.3264320419844577, |
| "grad_norm": 1.2063618898391724, |
| "learning_rate": 1.734716475151026e-05, |
| "loss": 0.03, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.3336409117778514, |
| "grad_norm": 2.5645272731781006, |
| "learning_rate": 1.7332747011923472e-05, |
| "loss": 0.0289, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.3408497815712452, |
| "grad_norm": 1.9335203170776367, |
| "learning_rate": 1.7318329272336684e-05, |
| "loss": 0.0285, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.348058651364639, |
| "grad_norm": 0.8842147588729858, |
| "learning_rate": 1.73039115327499e-05, |
| "loss": 0.0287, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.3552675211580327, |
| "grad_norm": 1.2006937265396118, |
| "learning_rate": 1.728949379316311e-05, |
| "loss": 0.0288, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.3624763909514266, |
| "grad_norm": 1.1261006593704224, |
| "learning_rate": 1.7275076053576322e-05, |
| "loss": 0.0293, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.3696852607448204, |
| "grad_norm": 1.2065215110778809, |
| "learning_rate": 1.7260658313989534e-05, |
| "loss": 0.0282, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.3768941305382143, |
| "grad_norm": 1.8486534357070923, |
| "learning_rate": 1.7246240574402746e-05, |
| "loss": 0.029, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.384103000331608, |
| "grad_norm": 0.8908069729804993, |
| "learning_rate": 1.7231822834815957e-05, |
| "loss": 0.0294, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.3913118701250018, |
| "grad_norm": 0.6375325918197632, |
| "learning_rate": 1.7217405095229172e-05, |
| "loss": 0.0287, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.3985207399183957, |
| "grad_norm": 1.9673434495925903, |
| "learning_rate": 1.7202987355642384e-05, |
| "loss": 0.0282, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.4057296097117895, |
| "grad_norm": 1.1606006622314453, |
| "learning_rate": 1.7188569616055596e-05, |
| "loss": 0.0284, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.4129384795051831, |
| "grad_norm": 1.003493309020996, |
| "learning_rate": 1.7174151876468807e-05, |
| "loss": 0.0283, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.420147349298577, |
| "grad_norm": 0.9186868071556091, |
| "learning_rate": 1.715973413688202e-05, |
| "loss": 0.0277, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.4273562190919709, |
| "grad_norm": 1.3305683135986328, |
| "learning_rate": 1.7145316397295234e-05, |
| "loss": 0.0292, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.4345650888853645, |
| "grad_norm": 1.3776835203170776, |
| "learning_rate": 1.7130898657708446e-05, |
| "loss": 0.0286, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.4417739586787583, |
| "grad_norm": 1.6687921285629272, |
| "learning_rate": 1.7116480918121658e-05, |
| "loss": 0.029, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.4489828284721522, |
| "grad_norm": 1.9249308109283447, |
| "learning_rate": 1.7102063178534873e-05, |
| "loss": 0.0262, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.4561916982655458, |
| "grad_norm": 1.1834752559661865, |
| "learning_rate": 1.7087645438948084e-05, |
| "loss": 0.0294, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.4634005680589397, |
| "grad_norm": 2.1350696086883545, |
| "learning_rate": 1.7073227699361296e-05, |
| "loss": 0.0276, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.4706094378523336, |
| "grad_norm": 2.563725709915161, |
| "learning_rate": 1.7058809959774508e-05, |
| "loss": 0.0276, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.4778183076457272, |
| "grad_norm": 0.9226647019386292, |
| "learning_rate": 1.704439222018772e-05, |
| "loss": 0.0284, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.485027177439121, |
| "grad_norm": 0.34231990575790405, |
| "learning_rate": 1.702997448060093e-05, |
| "loss": 0.0281, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.492236047232515, |
| "grad_norm": 2.339191436767578, |
| "learning_rate": 1.7015556741014146e-05, |
| "loss": 0.029, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.4994449170259085, |
| "grad_norm": 1.7756520509719849, |
| "learning_rate": 1.7001139001427358e-05, |
| "loss": 0.0288, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.5066537868193026, |
| "grad_norm": 2.0807387828826904, |
| "learning_rate": 1.698672126184057e-05, |
| "loss": 0.0281, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.5138626566126963, |
| "grad_norm": 1.4787542819976807, |
| "learning_rate": 1.6972303522253785e-05, |
| "loss": 0.0284, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.52107152640609, |
| "grad_norm": 1.719581961631775, |
| "learning_rate": 1.6957885782666993e-05, |
| "loss": 0.0287, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.528280396199484, |
| "grad_norm": 0.8158332109451294, |
| "learning_rate": 1.6943468043080208e-05, |
| "loss": 0.029, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.5354892659928776, |
| "grad_norm": 0.10212863981723785, |
| "learning_rate": 1.692905030349342e-05, |
| "loss": 0.0275, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.5426981357862715, |
| "grad_norm": 1.0970171689987183, |
| "learning_rate": 1.691463256390663e-05, |
| "loss": 0.0282, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.5499070055796653, |
| "grad_norm": 0.4221758246421814, |
| "learning_rate": 1.6900214824319846e-05, |
| "loss": 0.0285, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.557115875373059, |
| "grad_norm": 1.5400525331497192, |
| "learning_rate": 1.6885797084733058e-05, |
| "loss": 0.0282, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.5643247451664528, |
| "grad_norm": 1.6638318300247192, |
| "learning_rate": 1.687137934514627e-05, |
| "loss": 0.0301, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.5715336149598467, |
| "grad_norm": 1.3407906293869019, |
| "learning_rate": 1.685696160555948e-05, |
| "loss": 0.0276, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.5787424847532403, |
| "grad_norm": 0.8864063024520874, |
| "learning_rate": 1.6842543865972693e-05, |
| "loss": 0.0273, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.5859513545466342, |
| "grad_norm": 1.5699615478515625, |
| "learning_rate": 1.6828126126385905e-05, |
| "loss": 0.0267, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.593160224340028, |
| "grad_norm": 0.20337066054344177, |
| "learning_rate": 1.681370838679912e-05, |
| "loss": 0.0285, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.6003690941334217, |
| "grad_norm": 0.7260587811470032, |
| "learning_rate": 1.679929064721233e-05, |
| "loss": 0.028, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.6075779639268155, |
| "grad_norm": 0.434865266084671, |
| "learning_rate": 1.6784872907625543e-05, |
| "loss": 0.027, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.6147868337202094, |
| "grad_norm": 1.0067859888076782, |
| "learning_rate": 1.677045516803876e-05, |
| "loss": 0.0276, |
| "step": 112000 |
| }, |
| { |
| "epoch": 1.621995703513603, |
| "grad_norm": 1.7014882564544678, |
| "learning_rate": 1.6756037428451967e-05, |
| "loss": 0.0276, |
| "step": 112500 |
| }, |
| { |
| "epoch": 1.629204573306997, |
| "grad_norm": 1.2809230089187622, |
| "learning_rate": 1.674161968886518e-05, |
| "loss": 0.0276, |
| "step": 113000 |
| }, |
| { |
| "epoch": 1.6364134431003907, |
| "grad_norm": 1.2574232816696167, |
| "learning_rate": 1.6727201949278393e-05, |
| "loss": 0.0284, |
| "step": 113500 |
| }, |
| { |
| "epoch": 1.6436223128937844, |
| "grad_norm": 1.3797274827957153, |
| "learning_rate": 1.6712784209691605e-05, |
| "loss": 0.0282, |
| "step": 114000 |
| }, |
| { |
| "epoch": 1.6508311826871784, |
| "grad_norm": 0.32101693749427795, |
| "learning_rate": 1.669836647010482e-05, |
| "loss": 0.0274, |
| "step": 114500 |
| }, |
| { |
| "epoch": 1.658040052480572, |
| "grad_norm": 0.41121360659599304, |
| "learning_rate": 1.6683948730518032e-05, |
| "loss": 0.0286, |
| "step": 115000 |
| }, |
| { |
| "epoch": 1.665248922273966, |
| "grad_norm": 0.5161770582199097, |
| "learning_rate": 1.6669530990931243e-05, |
| "loss": 0.0271, |
| "step": 115500 |
| }, |
| { |
| "epoch": 1.6724577920673598, |
| "grad_norm": 1.153785228729248, |
| "learning_rate": 1.6655113251344455e-05, |
| "loss": 0.0264, |
| "step": 116000 |
| }, |
| { |
| "epoch": 1.6796666618607534, |
| "grad_norm": 1.5621336698532104, |
| "learning_rate": 1.6640695511757667e-05, |
| "loss": 0.0272, |
| "step": 116500 |
| }, |
| { |
| "epoch": 1.6868755316541473, |
| "grad_norm": 2.4250948429107666, |
| "learning_rate": 1.662627777217088e-05, |
| "loss": 0.0282, |
| "step": 117000 |
| }, |
| { |
| "epoch": 1.6940844014475411, |
| "grad_norm": 0.24833956360816956, |
| "learning_rate": 1.6611860032584094e-05, |
| "loss": 0.0279, |
| "step": 117500 |
| }, |
| { |
| "epoch": 1.7012932712409348, |
| "grad_norm": 2.7739059925079346, |
| "learning_rate": 1.6597442292997305e-05, |
| "loss": 0.0283, |
| "step": 118000 |
| }, |
| { |
| "epoch": 1.7085021410343286, |
| "grad_norm": 0.29604852199554443, |
| "learning_rate": 1.6583024553410517e-05, |
| "loss": 0.0271, |
| "step": 118500 |
| }, |
| { |
| "epoch": 1.7157110108277225, |
| "grad_norm": 1.0948668718338013, |
| "learning_rate": 1.6568606813823732e-05, |
| "loss": 0.0269, |
| "step": 119000 |
| }, |
| { |
| "epoch": 1.7229198806211161, |
| "grad_norm": 0.20236891508102417, |
| "learning_rate": 1.655418907423694e-05, |
| "loss": 0.0264, |
| "step": 119500 |
| }, |
| { |
| "epoch": 1.73012875041451, |
| "grad_norm": 0.9090920090675354, |
| "learning_rate": 1.6539771334650155e-05, |
| "loss": 0.0282, |
| "step": 120000 |
| }, |
| { |
| "epoch": 1.7373376202079038, |
| "grad_norm": 2.128474473953247, |
| "learning_rate": 1.6525353595063367e-05, |
| "loss": 0.0283, |
| "step": 120500 |
| }, |
| { |
| "epoch": 1.7445464900012975, |
| "grad_norm": 1.6552634239196777, |
| "learning_rate": 1.651093585547658e-05, |
| "loss": 0.0272, |
| "step": 121000 |
| }, |
| { |
| "epoch": 1.7517553597946915, |
| "grad_norm": 0.7921839356422424, |
| "learning_rate": 1.6496518115889794e-05, |
| "loss": 0.0301, |
| "step": 121500 |
| }, |
| { |
| "epoch": 1.7589642295880852, |
| "grad_norm": 0.8467416763305664, |
| "learning_rate": 1.6482100376303006e-05, |
| "loss": 0.0266, |
| "step": 122000 |
| }, |
| { |
| "epoch": 1.7661730993814788, |
| "grad_norm": 1.4604544639587402, |
| "learning_rate": 1.6467682636716217e-05, |
| "loss": 0.0253, |
| "step": 122500 |
| }, |
| { |
| "epoch": 1.773381969174873, |
| "grad_norm": 0.677890956401825, |
| "learning_rate": 1.645326489712943e-05, |
| "loss": 0.0266, |
| "step": 123000 |
| }, |
| { |
| "epoch": 1.7805908389682665, |
| "grad_norm": 0.2728472352027893, |
| "learning_rate": 1.643884715754264e-05, |
| "loss": 0.027, |
| "step": 123500 |
| }, |
| { |
| "epoch": 1.7877997087616604, |
| "grad_norm": 1.2005136013031006, |
| "learning_rate": 1.6424429417955852e-05, |
| "loss": 0.0265, |
| "step": 124000 |
| }, |
| { |
| "epoch": 1.7950085785550542, |
| "grad_norm": 2.1395583152770996, |
| "learning_rate": 1.6410011678369067e-05, |
| "loss": 0.0285, |
| "step": 124500 |
| }, |
| { |
| "epoch": 1.8022174483484479, |
| "grad_norm": 1.5524953603744507, |
| "learning_rate": 1.639559393878228e-05, |
| "loss": 0.026, |
| "step": 125000 |
| }, |
| { |
| "epoch": 1.8094263181418417, |
| "grad_norm": 1.5434062480926514, |
| "learning_rate": 1.638117619919549e-05, |
| "loss": 0.0272, |
| "step": 125500 |
| }, |
| { |
| "epoch": 1.8166351879352356, |
| "grad_norm": 1.4732664823532104, |
| "learning_rate": 1.6366758459608706e-05, |
| "loss": 0.0264, |
| "step": 126000 |
| }, |
| { |
| "epoch": 1.8238440577286292, |
| "grad_norm": 0.5316962599754333, |
| "learning_rate": 1.6352340720021914e-05, |
| "loss": 0.0262, |
| "step": 126500 |
| }, |
| { |
| "epoch": 1.831052927522023, |
| "grad_norm": 0.09009312838315964, |
| "learning_rate": 1.633792298043513e-05, |
| "loss": 0.0272, |
| "step": 127000 |
| }, |
| { |
| "epoch": 1.838261797315417, |
| "grad_norm": 1.211990475654602, |
| "learning_rate": 1.632350524084834e-05, |
| "loss": 0.0272, |
| "step": 127500 |
| }, |
| { |
| "epoch": 1.8454706671088106, |
| "grad_norm": 1.1306172609329224, |
| "learning_rate": 1.6309087501261552e-05, |
| "loss": 0.0268, |
| "step": 128000 |
| }, |
| { |
| "epoch": 1.8526795369022044, |
| "grad_norm": 1.8232672214508057, |
| "learning_rate": 1.6294669761674768e-05, |
| "loss": 0.0282, |
| "step": 128500 |
| }, |
| { |
| "epoch": 1.8598884066955983, |
| "grad_norm": 2.736703395843506, |
| "learning_rate": 1.628025202208798e-05, |
| "loss": 0.0271, |
| "step": 129000 |
| }, |
| { |
| "epoch": 1.867097276488992, |
| "grad_norm": 2.2017531394958496, |
| "learning_rate": 1.626583428250119e-05, |
| "loss": 0.0264, |
| "step": 129500 |
| }, |
| { |
| "epoch": 1.874306146282386, |
| "grad_norm": 0.6630580425262451, |
| "learning_rate": 1.6251416542914403e-05, |
| "loss": 0.0268, |
| "step": 130000 |
| }, |
| { |
| "epoch": 1.8815150160757796, |
| "grad_norm": 0.2576875388622284, |
| "learning_rate": 1.6236998803327614e-05, |
| "loss": 0.0275, |
| "step": 130500 |
| }, |
| { |
| "epoch": 1.8887238858691733, |
| "grad_norm": 0.625859260559082, |
| "learning_rate": 1.6222581063740826e-05, |
| "loss": 0.0263, |
| "step": 131000 |
| }, |
| { |
| "epoch": 1.8959327556625674, |
| "grad_norm": 2.3079171180725098, |
| "learning_rate": 1.620816332415404e-05, |
| "loss": 0.0266, |
| "step": 131500 |
| }, |
| { |
| "epoch": 1.903141625455961, |
| "grad_norm": 0.8551648259162903, |
| "learning_rate": 1.6193745584567253e-05, |
| "loss": 0.0268, |
| "step": 132000 |
| }, |
| { |
| "epoch": 1.9103504952493549, |
| "grad_norm": 1.2068754434585571, |
| "learning_rate": 1.6179327844980464e-05, |
| "loss": 0.0276, |
| "step": 132500 |
| }, |
| { |
| "epoch": 1.9175593650427487, |
| "grad_norm": 0.4594031274318695, |
| "learning_rate": 1.616491010539368e-05, |
| "loss": 0.0271, |
| "step": 133000 |
| }, |
| { |
| "epoch": 1.9247682348361423, |
| "grad_norm": 0.5821360945701599, |
| "learning_rate": 1.6150492365806888e-05, |
| "loss": 0.0267, |
| "step": 133500 |
| }, |
| { |
| "epoch": 1.9319771046295362, |
| "grad_norm": 0.5188286304473877, |
| "learning_rate": 1.6136074626220103e-05, |
| "loss": 0.027, |
| "step": 134000 |
| }, |
| { |
| "epoch": 1.93918597442293, |
| "grad_norm": 1.6506882905960083, |
| "learning_rate": 1.6121656886633315e-05, |
| "loss": 0.026, |
| "step": 134500 |
| }, |
| { |
| "epoch": 1.9463948442163237, |
| "grad_norm": 1.5678963661193848, |
| "learning_rate": 1.6107239147046526e-05, |
| "loss": 0.0264, |
| "step": 135000 |
| }, |
| { |
| "epoch": 1.9536037140097176, |
| "grad_norm": 0.3626735210418701, |
| "learning_rate": 1.609282140745974e-05, |
| "loss": 0.0264, |
| "step": 135500 |
| }, |
| { |
| "epoch": 1.9608125838031114, |
| "grad_norm": 0.48542195558547974, |
| "learning_rate": 1.6078403667872953e-05, |
| "loss": 0.0257, |
| "step": 136000 |
| }, |
| { |
| "epoch": 1.968021453596505, |
| "grad_norm": 0.93156498670578, |
| "learning_rate": 1.6063985928286165e-05, |
| "loss": 0.0274, |
| "step": 136500 |
| }, |
| { |
| "epoch": 1.975230323389899, |
| "grad_norm": 0.6599089503288269, |
| "learning_rate": 1.6049568188699376e-05, |
| "loss": 0.0253, |
| "step": 137000 |
| }, |
| { |
| "epoch": 1.9824391931832928, |
| "grad_norm": 2.511162519454956, |
| "learning_rate": 1.6035150449112588e-05, |
| "loss": 0.0264, |
| "step": 137500 |
| }, |
| { |
| "epoch": 1.9896480629766864, |
| "grad_norm": 0.7365297675132751, |
| "learning_rate": 1.6020732709525803e-05, |
| "loss": 0.0263, |
| "step": 138000 |
| }, |
| { |
| "epoch": 1.9968569327700805, |
| "grad_norm": 0.9106433391571045, |
| "learning_rate": 1.6006314969939015e-05, |
| "loss": 0.027, |
| "step": 138500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.9915470627263667, |
| "eval_loss": 0.02749801054596901, |
| "eval_runtime": 1640.2112, |
| "eval_samples_per_second": 636.802, |
| "eval_steps_per_second": 19.9, |
| "step": 138718 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 693590, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000.0, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.005 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.35149119187216e+18, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|